From 056f7c2674abc21c42131e5c71504418ef7b2242 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Thu, 6 Jan 2022 13:06:43 -0800 Subject: [PATCH] cranelift: Port `vselect` over to ISLE on x64 --- cranelift/codegen/src/isa/x64/inst.isle | 22 ++ cranelift/codegen/src/isa/x64/lower.isle | 9 + cranelift/codegen/src/isa/x64/lower.rs | 65 +---- .../x64/lower/isle/generated_code.manifest | 4 +- .../src/isa/x64/lower/isle/generated_code.rs | 239 ++++++++++++------ .../isa/x64/simd-bitwise-compile.clif | 30 ++- .../filetests/runtests/simd-iabs.clif | 21 -- 7 files changed, 224 insertions(+), 166 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 33080ae6485c..8f94248d764c 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -954,6 +954,28 @@ (rule (pandn src1 src2) (xmm_rm_r $F64X2 (SseOpcode.Pandn) src1 src2)) +(decl sse_blend_op (Type) SseOpcode) +(rule (sse_blend_op $F32X4) (SseOpcode.Blendvps)) +(rule (sse_blend_op $F64X2) (SseOpcode.Blendvpd)) +(rule (sse_blend_op (multi_lane _bits _lanes)) (SseOpcode.Pblendvb)) + +(decl sse_mov_op (Type) SseOpcode) +(rule (sse_mov_op $F32X4) (SseOpcode.Movaps)) +(rule (sse_mov_op $F64X2) (SseOpcode.Movapd)) +(rule (sse_mov_op (multi_lane _bits _lanes)) (SseOpcode.Movdqa)) + +;; Helper for creating `blendvp{d,s}` and `pblendvb` instructions. +(decl sse_blend (Type RegMem RegMem Reg) Reg) +(rule (sse_blend ty mask src1 src2) + ;; Move the mask into `xmm0`, as blend instructions implicitly operate on + ;; that register. (This kind of thing would normally happen inside of + ;; `Inst::mov_mitosis`, but has to happen here, where we still have the + ;; mask register, because the mask is implicit and doesn't appear in the + ;; `Inst` itself.) + (let ((mask2 WritableReg (xmm0)) + (_ Unit (emit (MInst.XmmUnaryRmR (sse_mov_op ty) mask mask2)))) + (xmm_rm_r ty (sse_blend_op ty) src2 src1))) + ;; Helper for creating `blendvpd` instructions. (decl blendvpd (Reg RegMem Reg) Reg) (rule (blendvpd src1 src2 mask) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 183753c2dc47..5647ef06dd6c 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -1050,6 +1050,15 @@ (b Reg (sse_and_not ty cond_reg (put_in_reg_mem if_false)))) (value_reg (sse_or ty b (RegMem.Reg a))))) +;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty @ (multi_lane _bits _lanes) + (vselect condition if_true if_false))) + (value_reg (sse_blend ty + (put_in_reg_mem condition) + (put_in_reg_mem if_true) + (put_in_reg if_false)))) + ;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (insertlane vec @ (value_type ty) val (u8_from_uimm8 idx))) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 4093fef0e8b2..5b4ed7fbbd79 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -1515,6 +1515,8 @@ fn lower_insn_to_regs>( match op { Opcode::Iconst | Opcode::Bconst + | Opcode::F32const + | Opcode::F64const | Opcode::Null | Opcode::Iadd | Opcode::IaddIfcout @@ -1535,50 +1537,8 @@ fn lower_insn_to_regs>( | Opcode::Imin | Opcode::Umin | Opcode::Bnot - | Opcode::Bitselect => implemented_in_isle(ctx), - - Opcode::Vselect => { - let ty = ty.unwrap(); - let condition = put_input_in_reg(ctx, inputs[0]); - let condition_ty = ctx.input_ty(insn, 0); - let if_true = input_to_reg_mem(ctx, inputs[1]); - let if_false = put_input_in_reg(ctx, inputs[2]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - - if ty.is_vector() { - // `vselect` relies on the bit representation of the condition: - // vector boolean types are defined in Cranelift to be all 1s or - // all 0s. This lowering relies on that fact to use x86's - // variable blend instructions, which look at the _high_bit_ of - // the condition mask. All the bits of vector booleans will - // match (all 1s or all 0s), so we can just use the high bit. - assert!(condition_ty.lane_type().is_bool()); - - // Variable blend instructions expect the condition mask to be - // in XMM0. - let xmm0 = Writable::from_reg(regs::xmm0()); - ctx.emit(Inst::gen_move(xmm0, condition, ty)); - - // Match up the source and destination registers for regalloc. - ctx.emit(Inst::gen_move(dst, if_false, ty)); - - // Technically PBLENDVB would work in all cases (since the bytes - // inside the mask will be all 1s or 0s we can blend - // byte-by-byte instead of word-by-word, e.g.) but - // type-specialized versions are included here for clarity when - // troubleshooting and due to slight improvements in - // latency/throughput on certain processor families. - let opcode = match condition_ty { - types::B64X2 => SseOpcode::Blendvpd, - types::B32X4 => SseOpcode::Blendvps, - types::B16X8 | types::B8X16 => SseOpcode::Pblendvb, - _ => unimplemented!("unable lower vselect for type: {}", condition_ty), - }; - ctx.emit(Inst::xmm_rm_r(opcode, if_true, dst)); - } else { - unimplemented!("no lowering for scalar vselect instruction") - } - } + | Opcode::Bitselect + | Opcode::Vselect => implemented_in_isle(ctx), Opcode::Ishl | Opcode::Ushr | Opcode::Sshr | Opcode::Rotl | Opcode::Rotr => { let dst_ty = ctx.output_ty(insn, 0); @@ -3254,22 +3214,6 @@ fn lower_insn_to_regs>( }; } - Opcode::F64const => { - unreachable!( - "implemented in ISLE: inst = `{}`, type = `{:?}`", - ctx.dfg().display_inst(insn), - ty - ); - } - - Opcode::F32const => { - unreachable!( - "implemented in ISLE: inst = `{}`, type = `{:?}`", - ctx.dfg().display_inst(insn), - ty - ); - } - Opcode::WideningPairwiseDotProductS => { let lhs = put_input_in_reg(ctx, inputs[0]); let rhs = input_to_reg_mem(ctx, inputs[1]); @@ -5927,6 +5871,7 @@ fn lower_insn_to_regs>( println!("Did not match fcvt input!"); } } + // Unimplemented opcodes below. These are not currently used by Wasm // lowering or other known embeddings, but should be either supported or // removed eventually. diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest index d42f252c8afe..e3cdbcd263f1 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest +++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest @@ -1,4 +1,4 @@ src/clif.isle f176ef3bba99365 src/prelude.isle babc931e5dc5b4cf -src/isa/x64/inst.isle fb5d3ac8e68c46d2 -src/isa/x64/lower.isle 5d66b88a371d4d70 +src/isa/x64/inst.isle bc5fc626492752c8 +src/isa/x64/lower.isle 33e94300f4c08455 diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs index c8da4c56bbf3..f2f8dd0ca2ca 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs @@ -1427,6 +1427,74 @@ pub fn constructor_pandn(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O return Some(expr2_0); } +// Generated as internal constructor for term sse_blend_op. +pub fn constructor_sse_blend_op(ctx: &mut C, arg0: Type) -> Option { + let pattern0_0 = arg0; + if pattern0_0 == F32X4 { + // Rule at src/isa/x64/inst.isle line 958. + let expr0_0 = SseOpcode::Blendvps; + return Some(expr0_0); + } + if pattern0_0 == F64X2 { + // Rule at src/isa/x64/inst.isle line 959. + let expr0_0 = SseOpcode::Blendvpd; + return Some(expr0_0); + } + if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { + // Rule at src/isa/x64/inst.isle line 960. + let expr0_0 = SseOpcode::Pblendvb; + return Some(expr0_0); + } + return None; +} + +// Generated as internal constructor for term sse_mov_op. +pub fn constructor_sse_mov_op(ctx: &mut C, arg0: Type) -> Option { + let pattern0_0 = arg0; + if pattern0_0 == F32X4 { + // Rule at src/isa/x64/inst.isle line 963. + let expr0_0 = SseOpcode::Movaps; + return Some(expr0_0); + } + if pattern0_0 == F64X2 { + // Rule at src/isa/x64/inst.isle line 964. + let expr0_0 = SseOpcode::Movapd; + return Some(expr0_0); + } + if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { + // Rule at src/isa/x64/inst.isle line 965. + let expr0_0 = SseOpcode::Movdqa; + return Some(expr0_0); + } + return None; +} + +// Generated as internal constructor for term sse_blend. +pub fn constructor_sse_blend( + ctx: &mut C, + arg0: Type, + arg1: &RegMem, + arg2: &RegMem, + arg3: Reg, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + let pattern3_0 = arg3; + // Rule at src/isa/x64/inst.isle line 969. + let expr0_0 = C::xmm0(ctx); + let expr1_0 = constructor_sse_mov_op(ctx, pattern0_0)?; + let expr2_0 = MInst::XmmUnaryRmR { + op: expr1_0, + src: pattern1_0.clone(), + dst: expr0_0, + }; + let expr3_0 = C::emit(ctx, &expr2_0); + let expr4_0 = constructor_sse_blend_op(ctx, pattern0_0)?; + let expr5_0 = constructor_xmm_rm_r(ctx, pattern0_0, &expr4_0, pattern3_0, pattern2_0)?; + return Some(expr5_0); +} + // Generated as internal constructor for term blendvpd. pub fn constructor_blendvpd( ctx: &mut C, @@ -1437,7 +1505,7 @@ pub fn constructor_blendvpd( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 959. + // Rule at src/isa/x64/inst.isle line 981. let expr0_0 = C::xmm0(ctx); let expr1_0 = SseOpcode::Movapd; let expr2_0 = RegMem::Reg { reg: pattern2_0 }; @@ -1457,7 +1525,7 @@ pub fn constructor_blendvpd( pub fn constructor_movsd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 971. + // Rule at src/isa/x64/inst.isle line 993. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Movsd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1468,7 +1536,7 @@ pub fn constructor_movsd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> O pub fn constructor_movlhps(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 976. + // Rule at src/isa/x64/inst.isle line 998. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Movlhps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1479,7 +1547,7 @@ pub fn constructor_movlhps(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pmaxsb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 981. + // Rule at src/isa/x64/inst.isle line 1003. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxsb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1490,7 +1558,7 @@ pub fn constructor_pmaxsb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pmaxsw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 986. + // Rule at src/isa/x64/inst.isle line 1008. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxsw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1501,7 +1569,7 @@ pub fn constructor_pmaxsw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pmaxsd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 991. + // Rule at src/isa/x64/inst.isle line 1013. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxsd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1512,7 +1580,7 @@ pub fn constructor_pmaxsd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pminsb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 996. + // Rule at src/isa/x64/inst.isle line 1018. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminsb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1523,7 +1591,7 @@ pub fn constructor_pminsb(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pminsw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1001. + // Rule at src/isa/x64/inst.isle line 1023. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminsw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1534,7 +1602,7 @@ pub fn constructor_pminsw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pminsd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1006. + // Rule at src/isa/x64/inst.isle line 1028. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminsd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1545,7 +1613,7 @@ pub fn constructor_pminsd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pmaxub(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1011. + // Rule at src/isa/x64/inst.isle line 1033. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxub; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1556,7 +1624,7 @@ pub fn constructor_pmaxub(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pmaxuw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1016. + // Rule at src/isa/x64/inst.isle line 1038. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxuw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1567,7 +1635,7 @@ pub fn constructor_pmaxuw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pmaxud(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1021. + // Rule at src/isa/x64/inst.isle line 1043. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxud; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1578,7 +1646,7 @@ pub fn constructor_pmaxud(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pminub(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1026. + // Rule at src/isa/x64/inst.isle line 1048. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminub; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1589,7 +1657,7 @@ pub fn constructor_pminub(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pminuw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1031. + // Rule at src/isa/x64/inst.isle line 1053. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminuw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1600,7 +1668,7 @@ pub fn constructor_pminuw(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> pub fn constructor_pminud(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1036. + // Rule at src/isa/x64/inst.isle line 1058. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminud; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -1621,7 +1689,7 @@ pub fn constructor_xmm_rm_r_imm( let pattern2_0 = arg2; let pattern3_0 = arg3; let pattern4_0 = arg4; - // Rule at src/isa/x64/inst.isle line 1041. + // Rule at src/isa/x64/inst.isle line 1063. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::XmmRmRImm { @@ -1649,7 +1717,7 @@ pub fn constructor_palignr( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1053. + // Rule at src/isa/x64/inst.isle line 1075. let expr0_0 = SseOpcode::Palignr; let expr1_0 = constructor_xmm_rm_r_imm( ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, pattern3_0, @@ -1667,7 +1735,7 @@ pub fn constructor_pshufd( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1062. + // Rule at src/isa/x64/inst.isle line 1084. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = C::writable_reg_to_reg(ctx, expr1_0); @@ -1692,7 +1760,7 @@ pub fn constructor_xmm_unary_rm_r( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1075. + // Rule at src/isa/x64/inst.isle line 1097. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::XmmUnaryRmR { @@ -1708,7 +1776,7 @@ pub fn constructor_xmm_unary_rm_r( // Generated as internal constructor for term pmovsxbw. pub fn constructor_pmovsxbw(ctx: &mut C, arg0: &RegMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1082. + // Rule at src/isa/x64/inst.isle line 1104. let expr0_0 = SseOpcode::Pmovsxbw; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1717,7 +1785,7 @@ pub fn constructor_pmovsxbw(ctx: &mut C, arg0: &RegMem) -> Option(ctx: &mut C, arg0: &RegMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1087. + // Rule at src/isa/x64/inst.isle line 1109. let expr0_0 = SseOpcode::Pmovzxbw; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1726,7 +1794,7 @@ pub fn constructor_pmovzxbw(ctx: &mut C, arg0: &RegMem) -> Option(ctx: &mut C, arg0: &RegMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1092. + // Rule at src/isa/x64/inst.isle line 1114. let expr0_0 = SseOpcode::Pabsb; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1735,7 +1803,7 @@ pub fn constructor_pabsb(ctx: &mut C, arg0: &RegMem) -> Option // Generated as internal constructor for term pabsw. pub fn constructor_pabsw(ctx: &mut C, arg0: &RegMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1097. + // Rule at src/isa/x64/inst.isle line 1119. let expr0_0 = SseOpcode::Pabsw; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1744,7 +1812,7 @@ pub fn constructor_pabsw(ctx: &mut C, arg0: &RegMem) -> Option // Generated as internal constructor for term pabsd. pub fn constructor_pabsd(ctx: &mut C, arg0: &RegMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1102. + // Rule at src/isa/x64/inst.isle line 1124. let expr0_0 = SseOpcode::Pabsd; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1758,7 +1826,7 @@ pub fn constructor_xmm_unary_rm_r_evex( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1107. + // Rule at src/isa/x64/inst.isle line 1129. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::XmmUnaryRmREvex { @@ -1774,7 +1842,7 @@ pub fn constructor_xmm_unary_rm_r_evex( // Generated as internal constructor for term vpabsq. pub fn constructor_vpabsq(ctx: &mut C, arg0: &RegMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1114. + // Rule at src/isa/x64/inst.isle line 1136. let expr0_0 = Avx512Opcode::Vpabsq; let expr1_0 = constructor_xmm_unary_rm_r_evex(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1790,7 +1858,7 @@ pub fn constructor_xmm_rm_r_evex( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1119. + // Rule at src/isa/x64/inst.isle line 1141. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::XmmRmREvex { @@ -1808,7 +1876,7 @@ pub fn constructor_xmm_rm_r_evex( pub fn constructor_vpmullq(ctx: &mut C, arg0: &RegMem, arg1: Reg) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1131. + // Rule at src/isa/x64/inst.isle line 1153. let expr0_0 = Avx512Opcode::Vpmullq; let expr1_0 = constructor_xmm_rm_r_evex(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -1824,7 +1892,7 @@ pub fn constructor_xmm_rmi_reg( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1138. + // Rule at src/isa/x64/inst.isle line 1160. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::XmmRmiReg { @@ -1842,7 +1910,7 @@ pub fn constructor_xmm_rmi_reg( pub fn constructor_psllq(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1148. + // Rule at src/isa/x64/inst.isle line 1170. let expr0_0 = SseOpcode::Psllq; let expr1_0 = constructor_xmm_rmi_reg(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -1852,7 +1920,7 @@ pub fn constructor_psllq(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) - pub fn constructor_psrld(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1153. + // Rule at src/isa/x64/inst.isle line 1175. let expr0_0 = SseOpcode::Psrld; let expr1_0 = constructor_xmm_rmi_reg(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -1862,7 +1930,7 @@ pub fn constructor_psrld(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) - pub fn constructor_psrlq(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1158. + // Rule at src/isa/x64/inst.isle line 1180. let expr0_0 = SseOpcode::Psrlq; let expr1_0 = constructor_xmm_rmi_reg(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -1880,7 +1948,7 @@ pub fn constructor_mul_hi( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1165. + // Rule at src/isa/x64/inst.isle line 1187. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::temp_writable_reg(ctx, pattern0_0); let expr2_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); @@ -1909,7 +1977,7 @@ pub fn constructor_mulhi_u( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1181. + // Rule at src/isa/x64/inst.isle line 1203. let expr0_0: bool = false; let expr1_0 = constructor_mul_hi(ctx, pattern0_0, expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1925,7 +1993,7 @@ pub fn constructor_cmpps( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1186. + // Rule at src/isa/x64/inst.isle line 1208. let expr0_0 = SseOpcode::Cmpps; let expr1_0 = C::encode_fcmp_imm(ctx, pattern2_0); let expr2_0 = OperandSize::Size32; @@ -1944,7 +2012,7 @@ pub fn constructor_cmppd( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1199. + // Rule at src/isa/x64/inst.isle line 1221. let expr0_0 = SseOpcode::Cmppd; let expr1_0 = C::encode_fcmp_imm(ctx, pattern2_0); let expr2_0 = OperandSize::Size32; @@ -1965,7 +2033,7 @@ pub fn constructor_gpr_to_xmm( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1208. + // Rule at src/isa/x64/inst.isle line 1230. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = MInst::GprToXmm { op: pattern1_0.clone(), @@ -1988,7 +2056,7 @@ pub fn constructor_pinsrb( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1215. + // Rule at src/isa/x64/inst.isle line 1237. let expr0_0 = SseOpcode::Pinsrb; let expr1_0 = OperandSize::Size32; let expr2_0 = @@ -2006,7 +2074,7 @@ pub fn constructor_pinsrw( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1220. + // Rule at src/isa/x64/inst.isle line 1242. let expr0_0 = SseOpcode::Pinsrw; let expr1_0 = OperandSize::Size32; let expr2_0 = @@ -2026,7 +2094,7 @@ pub fn constructor_pinsrd( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1225. + // Rule at src/isa/x64/inst.isle line 1247. let expr0_0 = SseOpcode::Pinsrd; let expr1_0 = constructor_xmm_rm_r_imm( ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, pattern3_0, @@ -2044,7 +2112,7 @@ pub fn constructor_insertps( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1230. + // Rule at src/isa/x64/inst.isle line 1252. let expr0_0 = SseOpcode::Insertps; let expr1_0 = OperandSize::Size32; let expr2_0 = @@ -2056,7 +2124,7 @@ pub fn constructor_insertps( pub fn constructor_not(ctx: &mut C, arg0: Type, arg1: Reg) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1235. + // Rule at src/isa/x64/inst.isle line 1257. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = MInst::Not { @@ -2109,7 +2177,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1123. + // Rule at src/isa/x64/lower.isle line 1132. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminsb(ctx, expr0_0, &expr1_0)?; @@ -2484,7 +2552,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1145. + // Rule at src/isa/x64/lower.isle line 1154. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminub(ctx, expr0_0, &expr1_0)?; @@ -2494,7 +2562,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1112. + // Rule at src/isa/x64/lower.isle line 1121. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxsb(ctx, expr0_0, &expr1_0)?; @@ -2504,7 +2572,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1134. + // Rule at src/isa/x64/lower.isle line 1143. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxub(ctx, expr0_0, &expr1_0)?; @@ -2540,7 +2608,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1126. + // Rule at src/isa/x64/lower.isle line 1135. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminsw(ctx, expr0_0, &expr1_0)?; @@ -2550,7 +2618,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1148. + // Rule at src/isa/x64/lower.isle line 1157. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminuw(ctx, expr0_0, &expr1_0)?; @@ -2560,7 +2628,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1115. + // Rule at src/isa/x64/lower.isle line 1124. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxsw(ctx, expr0_0, &expr1_0)?; @@ -2570,7 +2638,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1137. + // Rule at src/isa/x64/lower.isle line 1146. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxuw(ctx, expr0_0, &expr1_0)?; @@ -2606,7 +2674,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1129. + // Rule at src/isa/x64/lower.isle line 1138. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminsd(ctx, expr0_0, &expr1_0)?; @@ -2616,7 +2684,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1151. + // Rule at src/isa/x64/lower.isle line 1160. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminud(ctx, expr0_0, &expr1_0)?; @@ -2626,7 +2694,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1118. + // Rule at src/isa/x64/lower.isle line 1127. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxsd(ctx, expr0_0, &expr1_0)?; @@ -2636,7 +2704,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1140. + // Rule at src/isa/x64/lower.isle line 1149. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxud(ctx, expr0_0, &expr1_0)?; @@ -3938,20 +4006,37 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { - if let &Opcode::Bitselect = &pattern5_0 { - let (pattern7_0, pattern7_1, pattern7_2) = - C::unpack_value_array_3(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1041. - let expr0_0 = C::put_in_reg(ctx, pattern7_0); - let expr1_0 = C::put_in_reg(ctx, pattern7_1); - let expr2_0 = RegMem::Reg { reg: expr0_0 }; - let expr3_0 = constructor_sse_and(ctx, pattern2_0, expr1_0, &expr2_0)?; - let expr4_0 = C::put_in_reg_mem(ctx, pattern7_2); - let expr5_0 = constructor_sse_and_not(ctx, pattern2_0, expr0_0, &expr4_0)?; - let expr6_0 = RegMem::Reg { reg: expr3_0 }; - let expr7_0 = constructor_sse_or(ctx, pattern2_0, expr5_0, &expr6_0)?; - let expr8_0 = C::value_reg(ctx, expr7_0); - return Some(expr8_0); + match &pattern5_0 { + &Opcode::Bitselect => { + let (pattern7_0, pattern7_1, pattern7_2) = + C::unpack_value_array_3(ctx, &pattern5_1); + // Rule at src/isa/x64/lower.isle line 1041. + let expr0_0 = C::put_in_reg(ctx, pattern7_0); + let expr1_0 = C::put_in_reg(ctx, pattern7_1); + let expr2_0 = RegMem::Reg { reg: expr0_0 }; + let expr3_0 = constructor_sse_and(ctx, pattern2_0, expr1_0, &expr2_0)?; + let expr4_0 = C::put_in_reg_mem(ctx, pattern7_2); + let expr5_0 = + constructor_sse_and_not(ctx, pattern2_0, expr0_0, &expr4_0)?; + let expr6_0 = RegMem::Reg { reg: expr3_0 }; + let expr7_0 = constructor_sse_or(ctx, pattern2_0, expr5_0, &expr6_0)?; + let expr8_0 = C::value_reg(ctx, expr7_0); + return Some(expr8_0); + } + &Opcode::Vselect => { + let (pattern7_0, pattern7_1, pattern7_2) = + C::unpack_value_array_3(ctx, &pattern5_1); + // Rule at src/isa/x64/lower.isle line 1055. + let expr0_0 = C::put_in_reg_mem(ctx, pattern7_0); + let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); + let expr2_0 = C::put_in_reg(ctx, pattern7_2); + let expr3_0 = constructor_sse_blend( + ctx, pattern2_0, &expr0_0, &expr1_0, expr2_0, + )?; + let expr4_0 = C::value_reg(ctx, expr3_0); + return Some(expr4_0); + } + _ => {} } } &InstructionData::Unary { @@ -4725,7 +4810,7 @@ pub fn constructor_vec_insert_lane( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/lower.isle line 1066. + // Rule at src/isa/x64/lower.isle line 1075. let expr0_0 = constructor_pinsrb(ctx, pattern2_0, pattern3_0, pattern4_0)?; return Some(expr0_0); } @@ -4733,7 +4818,7 @@ pub fn constructor_vec_insert_lane( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/lower.isle line 1069. + // Rule at src/isa/x64/lower.isle line 1078. let expr0_0 = constructor_pinsrw(ctx, pattern2_0, pattern3_0, pattern4_0)?; return Some(expr0_0); } @@ -4741,7 +4826,7 @@ pub fn constructor_vec_insert_lane( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/lower.isle line 1072. + // Rule at src/isa/x64/lower.isle line 1081. let expr0_0 = OperandSize::Size32; let expr1_0 = constructor_pinsrd(ctx, pattern2_0, pattern3_0, pattern4_0, &expr0_0)?; return Some(expr1_0); @@ -4750,7 +4835,7 @@ pub fn constructor_vec_insert_lane( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/lower.isle line 1075. + // Rule at src/isa/x64/lower.isle line 1084. let expr0_0 = OperandSize::Size64; let expr1_0 = constructor_pinsrd(ctx, pattern2_0, pattern3_0, pattern4_0, &expr0_0)?; return Some(expr1_0); @@ -4759,7 +4844,7 @@ pub fn constructor_vec_insert_lane( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/lower.isle line 1078. + // Rule at src/isa/x64/lower.isle line 1087. let expr0_0 = C::sse_insertps_lane_imm(ctx, pattern4_0); let expr1_0 = constructor_insertps(ctx, pattern2_0, pattern3_0, expr0_0)?; return Some(expr1_0); @@ -4770,7 +4855,7 @@ pub fn constructor_vec_insert_lane( if let &RegMem::Reg { reg: pattern4_0 } = pattern3_0 { let pattern5_0 = arg3; if pattern5_0 == 0 { - // Rule at src/isa/x64/lower.isle line 1099. + // Rule at src/isa/x64/lower.isle line 1108. let expr0_0 = RegMem::Reg { reg: pattern4_0 }; let expr1_0 = constructor_movsd(ctx, pattern2_0, &expr0_0)?; return Some(expr1_0); @@ -4778,7 +4863,7 @@ pub fn constructor_vec_insert_lane( } let pattern4_0 = arg3; if pattern4_0 == 0 { - // Rule at src/isa/x64/lower.isle line 1100. + // Rule at src/isa/x64/lower.isle line 1109. let expr0_0 = SseOpcode::Movsd; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern3_0)?; let expr2_0 = RegMem::Reg { reg: expr1_0 }; @@ -4786,7 +4871,7 @@ pub fn constructor_vec_insert_lane( return Some(expr3_0); } if pattern4_0 == 1 { - // Rule at src/isa/x64/lower.isle line 1108. + // Rule at src/isa/x64/lower.isle line 1117. let expr0_0 = constructor_movlhps(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif index 607810b2de24..3626d55cd482 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif @@ -96,15 +96,33 @@ block0: ; nextln: por %xmm1, %xmm0 ; not: movdqa -function %vselect_i16x8() -> i16x8 { -block0: - v0 = vconst.b16x8 [false true false true false true false true] - v1 = vconst.i16x8 [0 0 0 0 0 0 0 0] - v2 = vconst.i16x8 [0 0 0 0 0 0 0 0] + +function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 { +block0(v0: b16x8, v1: i16x8, v2: i16x8): + v3 = vselect v0, v1, v2 + return v3 +} +; check: pblendvb +; not: blendvps +; not: blendvpd + +function %vselect_f32x4(b32x4, f32x4, f32x4) -> f32x4 { +block0(v0: b32x4, v1: f32x4, v2: f32x4): + v3 = vselect v0, v1, v2 + return v3 +} +; check: blendvps +; not: pblendvb +; not: blendvpd + +function %vselect_f64x2(b64x2, f64x2, f64x2) -> f64x2 { +block0(v0: b64x2, v1: f64x2, v2: f64x2): v3 = vselect v0, v1, v2 return v3 } -; check: pblendvb %xmm1, %xmm2 +; check: blendvpd +; not: pblendvb +; not: blendvps diff --git a/cranelift/filetests/filetests/runtests/simd-iabs.clif b/cranelift/filetests/filetests/runtests/simd-iabs.clif index ee1db6762c32..1dc36c38481a 100644 --- a/cranelift/filetests/filetests/runtests/simd-iabs.clif +++ b/cranelift/filetests/filetests/runtests/simd-iabs.clif @@ -4,27 +4,6 @@ target aarch64 set enable_simd target x86_64 -function %iabs_i8x16(i8x16) -> i8x16 { -block0(v0: i8x16): - v1 = iabs v0 - return v1 -} -; run: %iabs_i8x16([0 0 0 0 127 127 127 127 -127 -127 -127 -127 -128 -128 -128 -128]) == [0 0 0 0 127 127 127 127 127 127 127 127 -128 -128 -128 -128] - -function %iabs_i16x8(i16x8) -> i16x8 { -block0(v0: i16x8): - v1 = iabs v0 - return v1 -} -; run: %iabs_i16x8([0 0 32767 32767 -32767 -32767 -32768 -32768]) == [0 0 32767 32767 32767 32767 -32768 -32768] - -function %iabs_i32x4(i32x4) -> i32x4 { -block0(v0: i32x4): - v1 = iabs v0 - return v1 -} -; run: %iabs_i32x4([0 2147483647 -2147483647 -2147483648]) == [0 2147483647 2147483647 -2147483648] - function %iabs_i64x2(i64x2) -> i64x2 { block0(v0: i64x2): v1 = iabs v0