Skip to content

Commit

Permalink
Merge pull request #3209 from jlb6740/fix-3161
Browse files Browse the repository at this point in the history
Remove unnecessary, too strict assertion. Fix for 3161.
  • Loading branch information
cfallin authored Aug 26, 2021
2 parents def394e + e3aae9e commit 0771abf
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 56 deletions.
117 changes: 61 additions & 56 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4528,8 +4528,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::FcvtFromUint => {
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let ty = ty.unwrap();

let input_ty = ctx.input_ty(insn, 0);
let output_ty = ctx.output_ty(insn, 0);

if !ty.is_vector() {
match input_ty {
types::I8 | types::I16 | types::I32 => {
Expand Down Expand Up @@ -4572,67 +4573,71 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
_ => panic!("unexpected input type for FcvtFromUint: {:?}", input_ty),
};
} else if let Some(uwiden) = matches_input(ctx, inputs[0], Opcode::UwidenLow) {
let uwiden_input = InsnInput {
insn: uwiden,
input: 0,
};
let src = put_input_in_reg(ctx, uwiden_input);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let input_ty = ctx.input_ty(uwiden, 0);
let output_ty = ctx.output_ty(insn, 0);

// Matches_input further obfuscates which Wasm instruction this is ultimately
// lowering. Check here that the types are as expected for F64x2ConvertLowI32x4U.
debug_assert!(input_ty == types::I32X4 || output_ty == types::F64X2);

// Algorithm uses unpcklps to help create a float that is equivalent
// 0x1.0p52 + double(src). 0x1.0p52 is unique because at this exponent
// every value of the mantissa represents a corresponding uint32 number.
// When we subtract 0x1.0p52 we are left with double(src).
let uint_mask = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
ctx.emit(Inst::gen_move(dst, src, types::I32X4));

static UINT_MASK: [u8; 16] = [
0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00,
];
} else if output_ty == types::F64X2 {
if let Some(uwiden) = matches_input(ctx, inputs[0], Opcode::UwidenLow) {
let uwiden_input = InsnInput {
insn: uwiden,
input: 0,
};
let src = put_input_in_reg(ctx, uwiden_input);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let input_ty = ctx.input_ty(uwiden, 0);

// Matches_input further obfuscates which Wasm instruction this is ultimately
// lowering. Check here that the types are as expected for F64x2ConvertLowI32x4U.
debug_assert!(input_ty == types::I32X4);

// Algorithm uses unpcklps to help create a float that is equivalent
// 0x1.0p52 + double(src). 0x1.0p52 is unique because at this exponent
// every value of the mantissa represents a corresponding uint32 number.
// When we subtract 0x1.0p52 we are left with double(src).
let uint_mask = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
ctx.emit(Inst::gen_move(dst, src, types::I32X4));

static UINT_MASK: [u8; 16] = [
0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
];

let uint_mask_const = ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK));
let uint_mask_const =
ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK));

ctx.emit(Inst::xmm_load_const(
uint_mask_const,
uint_mask,
types::I32X4,
));
ctx.emit(Inst::xmm_load_const(
uint_mask_const,
uint_mask,
types::I32X4,
));

// Creates 0x1.0p52 + double(src)
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Unpcklps,
RegMem::from(uint_mask),
dst,
));
// Creates 0x1.0p52 + double(src)
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Unpcklps,
RegMem::from(uint_mask),
dst,
));

static UINT_MASK_HIGH: [u8; 16] = [
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x30, 0x43,
];
static UINT_MASK_HIGH: [u8; 16] = [
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x30, 0x43,
];

let uint_mask_high_const =
ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH));
let uint_mask_high = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(
uint_mask_high_const,
uint_mask_high,
types::I32X4,
));
let uint_mask_high_const =
ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH));
let uint_mask_high = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(
uint_mask_high_const,
uint_mask_high,
types::I32X4,
));

// 0x1.0p52 + double(src) - 0x1.0p52
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Subpd,
RegMem::from(uint_mask_high),
dst,
));
// 0x1.0p52 + double(src) - 0x1.0p52
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Subpd,
RegMem::from(uint_mask_high),
dst,
));
} else {
panic!("Unsupported FcvtFromUint conversion types: {}", ty);
}
} else {
assert_eq!(ctx.input_ty(insn, 0), types::I32X4);
let src = put_input_in_reg(ctx, inputs[0]);
Expand Down
48 changes: 48 additions & 0 deletions tests/misc_testsuite/simd/cvt-from-uint.wast
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@

;; Tests inspired by https://github.com/bytecodealliance/wasmtime/issues/3161
;; which found issue in lowering Opcode::FcvtFromUint where valid instruction
;; patterns were rejected
(module
(func (export "i16x8.extend_low_i8x16_s") (param v128) (result v128)
local.get 0
i16x8.extend_low_i8x16_s
f32x4.convert_i32x4_u)
(func (export "i16x8.extend_low_i8x16_u") (param v128) (result v128)
local.get 0
i16x8.extend_low_i8x16_u
f32x4.convert_i32x4_u)
(func (export "i32x4.extend_low_i16x8_s") (param v128) (result v128)
local.get 0
i32x4.extend_low_i16x8_s
f32x4.convert_i32x4_u)
(func (export "i32x4.extend_low_i16x8_u") (param v128) (result v128)
local.get 0
i32x4.extend_low_i16x8_u
f32x4.convert_i32x4_u)
(func (export "i64x2.extend_low_i32x4_s") (param v128) (result v128)
local.get 0
i64x2.extend_low_i32x4_s
f32x4.convert_i32x4_u)
(func (export "i64x2.extend_low_i32x4_u") (param v128) (result v128)
local.get 0
i64x2.extend_low_i32x4_u
f32x4.convert_i32x4_u)
)

(assert_return (invoke "i16x8.extend_low_i8x16_s" (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000))
(v128.const f32x4 0 0 0 0))

(assert_return (invoke "i16x8.extend_low_i8x16_u" (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000))
(v128.const f32x4 0 0 0 0))

(assert_return (invoke "i32x4.extend_low_i16x8_s" (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000))
(v128.const f32x4 0 0 0 0))

(assert_return (invoke "i32x4.extend_low_i16x8_u" (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000))
(v128.const f32x4 0 0 0 0))

(assert_return (invoke "i64x2.extend_low_i32x4_s" (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000))
(v128.const f32x4 0 0 0 0))

(assert_return (invoke "i64x2.extend_low_i32x4_u" (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000))
(v128.const f32x4 0 0 0 0))

0 comments on commit 0771abf

Please sign in to comment.