From cb48f593e6fdcc84ef0e02f9ffa7153f807b695a Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 26 May 2020 13:08:18 -0700 Subject: [PATCH 1/5] Add x86_vcvtudq2ps instruction This instruction converts i32x4 to f32x4 in several AVX512 feature sets. --- .../codegen/meta/src/isa/x86/encodings.rs | 9 ++++++ .../codegen/meta/src/isa/x86/instructions.rs | 31 +++++++++++++++++++ cranelift/codegen/meta/src/isa/x86/opcodes.rs | 6 ++++ cranelift/codegen/meta/src/isa/x86/recipes.rs | 18 +++++++++++ .../codegen/src/isa/aarch64/lower_inst.rs | 1 + .../x86/simd-avx512-conversion-binemit.clif | 9 ++++++ 6 files changed, 74 insertions(+) create mode 100644 cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 8afe4e400b2b..e2261171490d 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -1655,10 +1655,12 @@ fn define_simd( let x86_ptest = x86.by_name("x86_ptest"); let x86_punpckh = x86.by_name("x86_punpckh"); let x86_punpckl = x86.by_name("x86_punpckl"); + let x86_vcvtudq2ps = x86.by_name("x86_vcvtudq2ps"); // Shorthands for recipes. let rec_blend = r.template("blend"); let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128"); + let rec_evex_reg_rm_128 = r.template("evex_reg_rm_128"); let rec_f_ib = r.template("f_ib"); let rec_fa = r.template("fa"); let rec_fa_ib = r.template("fa_ib"); @@ -1702,6 +1704,7 @@ fn define_simd( let use_sse41_simd = settings.predicate_by_name("use_sse41_simd"); let use_sse42_simd = settings.predicate_by_name("use_sse42_simd"); let use_avx512dq_simd = settings.predicate_by_name("use_avx512dq_simd"); + let use_avx512vl_simd = settings.predicate_by_name("use_avx512vl_simd"); // SIMD vector size: eventually multiple vector sizes may be supported but for now only // SSE-sized vectors are available. @@ -1885,6 +1888,12 @@ fn define_simd( .bind(vector(F32, sse_vector_size)) .bind(vector(I32, sse_vector_size)); e.enc_both(fcvt_from_sint_32, rec_furm.opcodes(&CVTDQ2PS)); + + e.enc_32_64_maybe_isap( + x86_vcvtudq2ps, + rec_evex_reg_rm_128.opcodes(&VCVTUDQ2PS), + Some(use_avx512vl_simd), // TODO need an OR predicate to join with AVX512F + ); } // SIMD vconst for special cases (all zeroes, all ones) diff --git a/cranelift/codegen/meta/src/isa/x86/instructions.rs b/cranelift/codegen/meta/src/isa/x86/instructions.rs index 53d91ca8614d..5e9c80e6adc3 100644 --- a/cranelift/codegen/meta/src/isa/x86/instructions.rs +++ b/cranelift/codegen/meta/src/isa/x86/instructions.rs @@ -145,6 +145,37 @@ pub(crate) fn define( .operands_out(vec![a]), ); + let f32x4 = &TypeVar::new( + "f32x4", + "A floating point number", + TypeSetBuilder::new() + .floats(32..32) + .simd_lanes(4..4) + .build(), + ); + let i32x4 = &TypeVar::new( + "i32x4", + "An integer type with the same number of lanes", + TypeSetBuilder::new().ints(32..32).simd_lanes(4..4).build(), + ); + let x = &Operand::new("x", i32x4); + let a = &Operand::new("a", f32x4); + + ig.push( + Inst::new( + "x86_vcvtudq2ps", + r#" + Convert unsigned integer to floating point. + + Convert packed doubleword unsigned integers to packed single-precision floating-point + values. This instruction does not trap. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + let x = &Operand::new("x", Float); let a = &Operand::new("a", Float); let y = &Operand::new("y", Float); diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs index c1d4fa0ef5a2..23efc620d2fd 100644 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -665,6 +665,12 @@ pub static UCOMISS: [u8; 2] = [0x0f, 0x2e]; /// Raise invalid opcode instruction. pub static UNDEFINED2: [u8; 2] = [0x0f, 0x0b]; +/// Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed +/// single-precision floating-point values in xmm1 with writemask k1. Rounding behavior +/// is controlled by MXCSR but can be overriden by EVEX.L'L in static rounding mode +/// (AVX512VL, AVX512F). +pub static VCVTUDQ2PS: [u8; 3] = [0xf2, 0x0f, 0x7a]; + /// imm{16,32} XOR r/m{16,32,64}, possibly sign-extended. pub static XOR_IMM: [u8; 1] = [0x81]; diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs index 0cfd83d3734a..74645d0b5922 100644 --- a/cranelift/codegen/meta/src/isa/x86/recipes.rs +++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs @@ -3417,5 +3417,23 @@ pub(crate) fn define<'shared>( regs).rex_kind(RecipePrefixKind::Evex) ); + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("evex_reg_rm_128", &formats.unary, 1) + .operands_in(vec![fpr]) + .operands_out(vec![fpr]) + .emit( + r#" + // instruction encoding operands: reg (op1, w), rm (op2, r) + // this maps to: out_reg0, in_reg0 + let context = EvexContext::Other { length: EvexVectorLength::V128 }; + let masking = EvexMasking::None; + put_evex(bits, out_reg0, 0, in_reg0, context, masking, sink); // params: reg, vvvv, rm + modrm_rr(in_reg0, out_reg0, sink); // params: rm, reg + "#, + ), + regs).rex_kind(RecipePrefixKind::Evex) + ); + recipes } diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index bce727655263..2e17cd7b0e89 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -2066,6 +2066,7 @@ pub(crate) fn lower_insn_to_regs>( | Opcode::X86Packss | Opcode::X86Punpckh | Opcode::X86Punpckl + | Opcode::X86Vcvtudq2ps | Opcode::X86ElfTlsGetAddr | Opcode::X86MachoTlsGetAddr => { panic!("x86-specific opcode in supposedly arch-neutral IR!"); diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif new file mode 100644 index 000000000000..37abef0e61c9 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif @@ -0,0 +1,9 @@ +test binemit +set enable_simd +target x86_64 has_avx512vl=true + +function %fcvt_from_uint(i32x4) { +block0(v0: i32x4 [%xmm2]): +[-, %xmm6] v1 = x86_vcvtudq2ps v0 ; bin: 62 f1 7f 08 7a f2 + return +} From 5a6ae42656392c4ad3d2dd1a1123363e7c245c0d Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 26 May 2020 14:12:25 -0700 Subject: [PATCH 2/5] Add x86_pblendw instruction This instruction is necessary for lowering `fcvt_from_uint`. --- cranelift/codegen/meta/src/isa/x86/encodings.rs | 8 ++++++++ cranelift/codegen/meta/src/isa/x86/instructions.rs | 14 ++++++++++++++ cranelift/codegen/meta/src/isa/x86/opcodes.rs | 4 ++++ cranelift/codegen/src/isa/aarch64/lower_inst.rs | 1 + .../isa/x86/simd-lane-access-binemit.clif | 8 ++++++++ 5 files changed, 35 insertions(+) diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index e2261171490d..d507abc0f5fb 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -1639,6 +1639,7 @@ fn define_simd( let x86_movlhps = x86.by_name("x86_movlhps"); let x86_movsd = x86.by_name("x86_movsd"); let x86_packss = x86.by_name("x86_packss"); + let x86_pblendw = x86.by_name("x86_pblendw"); let x86_pextr = x86.by_name("x86_pextr"); let x86_pinsr = x86.by_name("x86_pinsr"); let x86_pmaxs = x86.by_name("x86_pmaxs"); @@ -1744,6 +1745,13 @@ fn define_simd( e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); } + // PBLENDW, select lanes using a u8 immediate. + for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) { + let instruction = x86_pblendw.bind(vector(ty, sse_vector_size)); + let template = rec_fa_ib.opcodes(&PBLENDW); + e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); + } + // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according // to the Intel manual: "When the destination operand is an XMM register, the source operand is // written to the low doubleword of the register and the register is zero-extended to 128 bits." diff --git a/cranelift/codegen/meta/src/isa/x86/instructions.rs b/cranelift/codegen/meta/src/isa/x86/instructions.rs index 5e9c80e6adc3..4afbc8874702 100644 --- a/cranelift/codegen/meta/src/isa/x86/instructions.rs +++ b/cranelift/codegen/meta/src/isa/x86/instructions.rs @@ -333,6 +333,20 @@ pub(crate) fn define( .operands_out(vec![a]), ); + let mask = &Operand::new("mask", uimm8).with_doc("mask to select lanes from b"); + ig.push( + Inst::new( + "x86_pblendw", + r#" + Blend packed words using an immediate mask. Each bit of the 8-bit immediate corresponds to a + lane in ``b``: if the bit is set, the lane is copied into ``a``. + "#, + &formats.ternary_imm8, + ) + .operands_in(vec![a, b, mask]) + .operands_out(vec![a]), + ); + let Idx = &Operand::new("Idx", uimm8).with_doc("Lane index"); let x = &Operand::new("x", TxN); let a = &Operand::new("a", &TxN.lane_of()); diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs index 23efc620d2fd..d2391fe2eef8 100644 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -347,6 +347,10 @@ pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3]; /// in XMM0 and store the values into xmm1 (SSE4.1). pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10]; +/// Select words from xmm1 and xmm2/m128 from mask specified in imm8 and store the values into xmm1 +/// (SSE4.1). +pub static PBLENDW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0e]; + /// Compare packed data for equal (SSE2). pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74]; diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 2e17cd7b0e89..5805ab63c4b8 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -2046,6 +2046,7 @@ pub(crate) fn lower_insn_to_regs>( | Opcode::X86Pop | Opcode::X86Bsr | Opcode::X86Bsf + | Opcode::X86Pblendw | Opcode::X86Pshufd | Opcode::X86Pshufb | Opcode::X86Pextr diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif index e5eea1f6372a..24bc8cfa2409 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif @@ -96,6 +96,14 @@ block0: return } +;; blend + +function %pblendw(b16x8, b16x8) { +block0(v0: b16x8 [%xmm10], v1: b16x8 [%xmm2]): +[-, %xmm10] v2 = x86_pblendw v0, v1, 0x55 ; bin: 66 44 0f 3a 0e d2 55 + return +} + ;; pack/unpack function %unpack_high_i8x16(i8x16, i8x16) { From 235e4f9723bdc74dd3650a51cf24966a2c02dcde Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 26 May 2020 14:53:59 -0700 Subject: [PATCH 3/5] Add AVX512F flag --- cranelift/codegen/meta/src/isa/x86/settings.rs | 11 ++++++++++- cranelift/native/src/lib.rs | 3 +++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/cranelift/codegen/meta/src/isa/x86/settings.rs b/cranelift/codegen/meta/src/isa/x86/settings.rs index 0ef36b668673..0059bf08640c 100644 --- a/cranelift/codegen/meta/src/isa/x86/settings.rs +++ b/cranelift/codegen/meta/src/isa/x86/settings.rs @@ -23,7 +23,12 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup { ); let has_avx512vl = settings.add_bool( "has_avx512vl", - "AVX512DQ: CPUID.07H:EBX.AVX512VL[bit 31]", + "AVX512VL: CPUID.07H:EBX.AVX512VL[bit 31]", + false, + ); + let has_avx512f = settings.add_bool( + "has_avx512f", + "AVX512F: CPUID.07H:EBX.AVX512F[bit 16]", false, ); let has_popcnt = settings.add_bool("has_popcnt", "POPCNT: CPUID.01H:ECX.POPCNT[bit 23]", false); @@ -76,6 +81,10 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup { "use_avx512vl_simd", predicate!(shared_enable_simd && has_avx512vl), ); + settings.add_predicate( + "use_avx512f_simd", + predicate!(shared_enable_simd && has_avx512f), + ); settings.add_predicate("use_popcnt", predicate!(has_popcnt && has_sse42)); settings.add_predicate("use_bmi1", predicate!(has_bmi1)); diff --git a/cranelift/native/src/lib.rs b/cranelift/native/src/lib.rs index b45dab8dd5c5..903fbb3522ba 100644 --- a/cranelift/native/src/lib.rs +++ b/cranelift/native/src/lib.rs @@ -91,6 +91,9 @@ fn parse_x86_cpuid(isa_builder: &mut isa::Builder) -> Result<(), &'static str> { if info.has_avx512vl() { isa_builder.enable("has_avx512vl").unwrap(); } + if info.has_avx512f() { + isa_builder.enable("has_avx512f").unwrap(); + } } if let Some(info) = cpuid.get_extended_function_info() { if info.has_lzcnt() { From eca8126efa37ea0a7db1709978a0e2346d9a3ef1 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 26 May 2020 15:20:30 -0700 Subject: [PATCH 4/5] Add x86 legalization for fcvt_from_uint.f32x4 This converts an `i32x4` into an `f32x4` with some rounding either by using an AVX512VL/F instruction--VCVTUDQ2PS--or a long sequence of SSE4.1 compatible instructions. --- .../codegen/meta/src/isa/x86/legalize.rs | 3 +- cranelift/codegen/meta/src/isa/x86/mod.rs | 2 + cranelift/codegen/src/isa/x86/enc_tables.rs | 53 +++++++++++++++++++ .../x86/simd-avx512-conversion-legalize.clif | 10 ++++ .../isa/x86/simd-conversion-legalize.clif | 19 +++++++ .../isa/x86/simd-conversion-run.clif | 7 +++ 6 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif create mode 100644 cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index 6e5c791b7984..940ffe6d01ab 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -380,6 +380,7 @@ fn define_simd( let bxor = insts.by_name("bxor"); let extractlane = insts.by_name("extractlane"); let fcmp = insts.by_name("fcmp"); + let fcvt_from_uint = insts.by_name("fcvt_from_uint"); let fabs = insts.by_name("fabs"); let fneg = insts.by_name("fneg"); let iadd_imm = insts.by_name("iadd_imm"); @@ -788,6 +789,6 @@ fn define_simd( narrow.custom_legalize(ushr, "convert_ushr"); narrow.custom_legalize(ishl, "convert_ishl"); - // This lives in the expand group to avoid conflicting with, e.g., i128 legalizations. narrow_avx.custom_legalize(imul, "convert_i64x2_imul"); + narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector"); } diff --git a/cranelift/codegen/meta/src/isa/x86/mod.rs b/cranelift/codegen/meta/src/isa/x86/mod.rs index 2e9305e9f730..8d2e33be732c 100644 --- a/cranelift/codegen/meta/src/isa/x86/mod.rs +++ b/cranelift/codegen/meta/src/isa/x86/mod.rs @@ -48,6 +48,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { x86_32.legalize_type(F32, x86_expand); x86_32.legalize_type(F64, x86_expand); x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx); + x86_32.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx); x86_64.legalize_monomorphic(expand_flags); x86_64.legalize_default(x86_narrow); @@ -60,6 +61,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { x86_64.legalize_type(F32, x86_expand); x86_64.legalize_type(F64, x86_expand); x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx); + x86_64.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx); let recipes = recipes::define(shared_defs, &settings, ®s); diff --git a/cranelift/codegen/src/isa/x86/enc_tables.rs b/cranelift/codegen/src/isa/x86/enc_tables.rs index a751ea313858..0786d375783c 100644 --- a/cranelift/codegen/src/isa/x86/enc_tables.rs +++ b/cranelift/codegen/src/isa/x86/enc_tables.rs @@ -598,6 +598,9 @@ fn expand_minmax( /// x86 has no unsigned-to-float conversions. We handle the easy case of zero-extending i32 to /// i64 with a pattern, the rest needs more code. +/// +/// Note that this is the scalar implementation; for the vector implemenation see +/// [expand_fcvt_from_uint_vector]. fn expand_fcvt_from_uint( inst: ir::Inst, func: &mut ir::Function, @@ -679,6 +682,56 @@ fn expand_fcvt_from_uint( cfg.recompute_block(pos.func, done); } +/// To convert packed unsigned integers to their float equivalents, we must legalize to a special +/// AVX512 instruction (using MCSR rounding) or use a long sequence of instructions. This logic is +/// separate from [expand_fcvt_from_uint] above (the scalar version), only due to how the transform +/// groups are set up; TODO if we change the SIMD legalization groups, then this logic could be +/// merged into [expand_fcvt_from_uint] (see https://github.com/bytecodealliance/wasmtime/issues/1745). +fn expand_fcvt_from_uint_vector( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + isa: &dyn TargetIsa, +) { + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + if let ir::InstructionData::Unary { + opcode: ir::Opcode::FcvtFromUint, + arg, + } = pos.func.dfg[inst] + { + let controlling_type = pos.func.dfg.ctrl_typevar(inst); + if controlling_type == F32X4 { + debug_assert_eq!(pos.func.dfg.value_type(arg), I32X4); + let x86_isa = isa + .as_any() + .downcast_ref::() + .expect("the target ISA must be x86 at this point"); + if x86_isa.isa_flags.use_avx512vl_simd() || x86_isa.isa_flags.use_avx512f_simd() { + // If we have certain AVX512 features, we can lower this instruction simply. + pos.func.dfg.replace(inst).x86_vcvtudq2ps(arg); + } else { + // Otherwise, we default to a very lengthy SSE4.1-compatible sequence: PXOR, + // PBLENDW, PSUB, CVTDQ2PS, PSRLD, CVTDQ2PS, ADDPS, ADDPS + let bitcast_arg = pos.ins().raw_bitcast(I16X8, arg); + let zero_constant = pos.func.dfg.constants.insert(vec![0; 16].into()); + let zero = pos.ins().vconst(I16X8, zero_constant); + let low = pos.ins().x86_pblendw(zero, bitcast_arg, 0x55); + let bitcast_low = pos.ins().raw_bitcast(I32X4, low); + let high = pos.ins().isub(arg, bitcast_low); + let convert_low = pos.ins().fcvt_from_sint(F32X4, bitcast_low); + let shift_high = pos.ins().ushr_imm(high, 1); + let convert_high = pos.ins().fcvt_from_sint(F32X4, shift_high); + let double_high = pos.ins().fadd(convert_high, convert_high); + pos.func.dfg.replace(inst).fadd(double_high, convert_low); + } + } else { + unimplemented!("cannot legalize {}", pos.func.dfg.display_inst(inst, None)) + } + } +} + fn expand_fcvt_to_sint( inst: ir::Inst, func: &mut ir::Function, diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif new file mode 100644 index 000000000000..78dc1cf2200e --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif @@ -0,0 +1,10 @@ +test legalizer +set enable_simd +target x86_64 skylake has_avx512f=true + +function %fcvt_from_uint(i32x4) -> f32x4 { +block0(v0:i32x4): + v1 = fcvt_from_uint.f32x4 v0 + ; check: v1 = x86_vcvtudq2ps v0 + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif new file mode 100644 index 000000000000..7db52967e4f8 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif @@ -0,0 +1,19 @@ +test legalizer +set enable_simd +target x86_64 skylake + +function %fcvt_from_uint(i32x4) -> f32x4 { +block0(v0:i32x4): + v1 = fcvt_from_uint.f32x4 v0 + ; check: v2 = raw_bitcast.i16x8 v0 + ; nextln: v3 = vconst.i16x8 const0 + ; nextln: v4 = x86_pblendw v3, v2, 85 + ; nextln: v5 = raw_bitcast.i32x4 v4 + ; nextln: v6 = isub v0, v5 + ; nextln: v7 = fcvt_from_sint.f32x4 v5 + ; nextln: v8 = ushr_imm v6, 1 + ; nextln: v9 = fcvt_from_sint.f32x4 v8 + ; nextln: v10 = fadd v9, v9 + ; nextln: v1 = fadd v10, v7 + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-run.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-run.clif index 3484818aa3db..2a97474adc9a 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-conversion-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-conversion-run.clif @@ -13,3 +13,10 @@ block0: return v4 } ; run + +function %fcvt_from_uint(i32x4) -> f32x4 { +block0(v0:i32x4): + v1 = fcvt_from_uint.f32x4 v0 + return v1 +} +; run: %fcvt_from_uint([0 0 0 0]) == [0x0.0 0x0.0 0x0.0 0x0.0] From c513892e3495c990b4af39b240d0a1479999a09c Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 26 May 2020 15:21:24 -0700 Subject: [PATCH 5/5] Translate Wasm's f32x4.convert_i32x4_u instruction to Cranelift's fcvt_from_uint --- cranelift/wasm/src/code_translator.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index df6eda17601a..20c6e3af8d5d 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -1544,9 +1544,12 @@ pub fn translate_operator( let a = pop1_with_bitcast(state, I32X4, builder); state.push1(builder.ins().fcvt_from_sint(F32X4, a)) } + Operator::F32x4ConvertI32x4U => { + let a = pop1_with_bitcast(state, I32X4, builder); + state.push1(builder.ins().fcvt_from_uint(F32X4, a)) + } Operator::I32x4TruncSatF32x4S | Operator::I32x4TruncSatF32x4U - | Operator::F32x4ConvertI32x4U | Operator::I8x16Abs | Operator::I16x8Abs | Operator::I32x4Abs