Merge pull request #3209 from jlb6740/fix-3161

Remove unnecessary, too strict assertion. Fix for 3161.
bytecodealliance · Aug 26, 2021 · 0771abf · 0771abf
2 parents def394e + e3aae9e
commit 0771abf
Show file tree

Hide file tree

Showing 2 changed files with 109 additions and 56 deletions.
diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs
@@ -4528,8 +4528,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
         Opcode::FcvtFromUint => {
             let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
             let ty = ty.unwrap();
-
             let input_ty = ctx.input_ty(insn, 0);
+            let output_ty = ctx.output_ty(insn, 0);
+
             if !ty.is_vector() {
                 match input_ty {
                     types::I8 | types::I16 | types::I32 => {
@@ -4572,67 +4573,71 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                     }
                     _ => panic!("unexpected input type for FcvtFromUint: {:?}", input_ty),
                 };
-            } else if let Some(uwiden) = matches_input(ctx, inputs[0], Opcode::UwidenLow) {
-                let uwiden_input = InsnInput {
-                    insn: uwiden,
-                    input: 0,
-                };
-                let src = put_input_in_reg(ctx, uwiden_input);
-                let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-                let input_ty = ctx.input_ty(uwiden, 0);
-                let output_ty = ctx.output_ty(insn, 0);
-
-                // Matches_input further obfuscates which Wasm instruction this is ultimately
-                // lowering. Check here that the types are as expected for F64x2ConvertLowI32x4U.
-                debug_assert!(input_ty == types::I32X4 || output_ty == types::F64X2);
-
-                // Algorithm uses unpcklps to help create a float that is equivalent
-                // 0x1.0p52 + double(src). 0x1.0p52 is unique because at this exponent
-                // every value of the mantissa represents a corresponding uint32 number.
-                // When we subtract 0x1.0p52 we are left with double(src).
-                let uint_mask = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
-                ctx.emit(Inst::gen_move(dst, src, types::I32X4));
-
-                static UINT_MASK: [u8; 16] = [
-                    0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00,
-                    0x00, 0x00, 0x00,
-                ];
+            } else if output_ty == types::F64X2 {
+                if let Some(uwiden) = matches_input(ctx, inputs[0], Opcode::UwidenLow) {
+                    let uwiden_input = InsnInput {
+                        insn: uwiden,
+                        input: 0,
+                    };
+                    let src = put_input_in_reg(ctx, uwiden_input);
+                    let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
+                    let input_ty = ctx.input_ty(uwiden, 0);
+
+                    // Matches_input further obfuscates which Wasm instruction this is ultimately
+                    // lowering. Check here that the types are as expected for F64x2ConvertLowI32x4U.
+                    debug_assert!(input_ty == types::I32X4);
+
+                    // Algorithm uses unpcklps to help create a float that is equivalent
+                    // 0x1.0p52 + double(src). 0x1.0p52 is unique because at this exponent
+                    // every value of the mantissa represents a corresponding uint32 number.
+                    // When we subtract 0x1.0p52 we are left with double(src).
+                    let uint_mask = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
+                    ctx.emit(Inst::gen_move(dst, src, types::I32X4));
+
+                    static UINT_MASK: [u8; 16] = [
+                        0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00,
+                        0x00, 0x00, 0x00, 0x00,
+                    ];
 
-                let uint_mask_const = ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK));
+                    let uint_mask_const =
+                        ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK));
 
-                ctx.emit(Inst::xmm_load_const(
-                    uint_mask_const,
-                    uint_mask,
-                    types::I32X4,
-                ));
+                    ctx.emit(Inst::xmm_load_const(
+                        uint_mask_const,
+                        uint_mask,
+                        types::I32X4,
+                    ));
 
-                // Creates 0x1.0p52 + double(src)
-                ctx.emit(Inst::xmm_rm_r(
-                    SseOpcode::Unpcklps,
-                    RegMem::from(uint_mask),
-                    dst,
-                ));
+                    // Creates 0x1.0p52 + double(src)
+                    ctx.emit(Inst::xmm_rm_r(
+                        SseOpcode::Unpcklps,
+                        RegMem::from(uint_mask),
+                        dst,
+                    ));
 
-                static UINT_MASK_HIGH: [u8; 16] = [
-                    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00,
-                    0x00, 0x30, 0x43,
-                ];
+                    static UINT_MASK_HIGH: [u8; 16] = [
+                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00,
+                        0x00, 0x00, 0x30, 0x43,
+                    ];
 
-                let uint_mask_high_const =
-                    ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH));
-                let uint_mask_high = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
-                ctx.emit(Inst::xmm_load_const(
-                    uint_mask_high_const,
-                    uint_mask_high,
-                    types::I32X4,
-                ));
+                    let uint_mask_high_const =
+                        ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH));
+                    let uint_mask_high = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
+                    ctx.emit(Inst::xmm_load_const(
+                        uint_mask_high_const,
+                        uint_mask_high,
+                        types::I32X4,
+                    ));
 
-                // 0x1.0p52 + double(src) - 0x1.0p52
-                ctx.emit(Inst::xmm_rm_r(
-                    SseOpcode::Subpd,
-                    RegMem::from(uint_mask_high),
-                    dst,
-                ));
+                    // 0x1.0p52 + double(src) - 0x1.0p52
+                    ctx.emit(Inst::xmm_rm_r(
+                        SseOpcode::Subpd,
+                        RegMem::from(uint_mask_high),
+                        dst,
+                    ));
+                } else {
+                    panic!("Unsupported FcvtFromUint conversion types: {}", ty);
+                }
             } else {
                 assert_eq!(ctx.input_ty(insn, 0), types::I32X4);
                 let src = put_input_in_reg(ctx, inputs[0]);

diff --git a/tests/misc_testsuite/simd/cvt-from-uint.wast b/tests/misc_testsuite/simd/cvt-from-uint.wast
@@ -0,0 +1,48 @@
+
+;; Tests inspired by https://github.com/bytecodealliance/wasmtime/issues/3161
+;; which found issue in lowering Opcode::FcvtFromUint where valid instruction
+;; patterns were rejected
+(module
+    (func (export "i16x8.extend_low_i8x16_s") (param v128) (result v128)
+        local.get 0
+        i16x8.extend_low_i8x16_s
+        f32x4.convert_i32x4_u)
+    (func (export "i16x8.extend_low_i8x16_u") (param v128) (result v128)
+        local.get 0
+        i16x8.extend_low_i8x16_u
+        f32x4.convert_i32x4_u)
+    (func (export "i32x4.extend_low_i16x8_s") (param v128) (result v128)
+        local.get 0
+        i32x4.extend_low_i16x8_s
+        f32x4.convert_i32x4_u)
+    (func (export "i32x4.extend_low_i16x8_u") (param v128) (result v128)
+        local.get 0
+        i32x4.extend_low_i16x8_u
+        f32x4.convert_i32x4_u)
+    (func (export "i64x2.extend_low_i32x4_s") (param v128) (result v128)
+        local.get 0
+        i64x2.extend_low_i32x4_s
+        f32x4.convert_i32x4_u)
+    (func (export "i64x2.extend_low_i32x4_u") (param v128) (result v128)
+        local.get 0
+        i64x2.extend_low_i32x4_u
+        f32x4.convert_i32x4_u)
+)
+
+(assert_return (invoke "i16x8.extend_low_i8x16_s" (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000))
+               (v128.const f32x4 0 0 0 0))
+
+(assert_return (invoke "i16x8.extend_low_i8x16_u" (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000))
+               (v128.const f32x4 0 0 0 0))
+
+(assert_return (invoke "i32x4.extend_low_i16x8_s" (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000))
+               (v128.const f32x4 0 0 0 0))
+
+(assert_return (invoke "i32x4.extend_low_i16x8_u" (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000))
+               (v128.const f32x4 0 0 0 0))
+
+(assert_return (invoke "i64x2.extend_low_i32x4_s" (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000))
+               (v128.const f32x4 0 0 0 0))
+
+(assert_return (invoke "i64x2.extend_low_i32x4_u" (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000))
+               (v128.const f32x4 0 0 0 0))