Skip to content

Commit

Permalink
Implements f64x2.convert_low_i32x4_u for x64
Browse files Browse the repository at this point in the history
  • Loading branch information
jlb6740 committed Jun 14, 2021
1 parent 1770880 commit 7a4fb14
Show file tree
Hide file tree
Showing 9 changed files with 79 additions and 4 deletions.
13 changes: 13 additions & 0 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4411,6 +4411,19 @@ pub(crate) fn define(
.operands_out(vec![a]),
);

ig.push(
Inst::new(
"fcvt_low_from_uint",
r#"
Converts packed unsigned doubleword integers to packed double precision floating point.
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);

let WideInt = &TypeVar::new(
"WideInt",
"An integer type with lanes from `i16` upwards",
Expand Down
1 change: 1 addition & 0 deletions cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3193,6 +3193,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

Opcode::TlsValue => unimplemented!("tls_value"),
Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
Opcode::FcvtLowFromUint => unimplemented!("FcvtLowFromUint"),
Opcode::FvpromoteLow => unimplemented!("FvpromoteLow"),
Opcode::Fvdemote => unimplemented!("Fvdemote"),
}
Expand Down
1 change: 1 addition & 0 deletions cranelift/codegen/src/isa/s390x/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2549,6 +2549,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::UwidenLow
| Opcode::UwidenHigh
| Opcode::WideningPairwiseDotProductS
| Opcode::FcvtLowFromUint
| Opcode::FvpromoteLow
| Opcode::Fvdemote => {
// TODO
Expand Down
3 changes: 3 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,7 @@ pub enum SseOpcode {
Subsd,
Ucomiss,
Ucomisd,
Unpcklps,
Xorps,
Xorpd,
}
Expand Down Expand Up @@ -675,6 +676,7 @@ impl SseOpcode {
| SseOpcode::Subps
| SseOpcode::Subss
| SseOpcode::Ucomiss
| SseOpcode::Unpcklps
| SseOpcode::Xorps => SSE,

SseOpcode::Addpd
Expand Down Expand Up @@ -993,6 +995,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Subsd => "subsd",
SseOpcode::Ucomiss => "ucomiss",
SseOpcode::Ucomisd => "ucomisd",
SseOpcode::Unpcklps => "unpcklps",
SseOpcode::Xorps => "xorps",
SseOpcode::Xorpd => "xorpd",
};
Expand Down
1 change: 1 addition & 0 deletions cranelift/codegen/src/isa/x64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1529,6 +1529,7 @@ pub(crate) fn emit(
SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2),
SseOpcode::Subsd => (LegacyPrefixes::_F2, 0x0F5C, 2),
SseOpcode::Unpcklps => (LegacyPrefixes::None, 0x0F14, 2),
SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2),
SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2),
_ => unimplemented!("Opcode {:?} not implemented", op),
Expand Down
52 changes: 52 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4131,6 +4131,58 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
dst,
));
}
Opcode::FcvtLowFromUint => {
// Algorithm uses unpcklps to help create a float that is equivalent
// 0x1.0p52 + double(src). 0x1.0p52 is unique because at this exponent
// every value of the mantissa represents a corresponding uint32 number.
// When we subtract 0x1.0p52 we are left with double(src).
let src = put_input_in_reg(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let uint_mask = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();

ctx.emit(Inst::gen_move(dst, src, types::I32X4));

static UINT_MASK: [u8; 16] = [
0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00,
];

let uint_mask_const = ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK));

ctx.emit(Inst::xmm_load_const(
uint_mask_const,
uint_mask,
types::I32X4,
));

// Creates 0x1.0p52 + double(src)
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Unpcklps,
RegMem::from(uint_mask),
dst,
));

static UINT_MASK_HIGH: [u8; 16] = [
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x30, 0x43,
];

let uint_mask_high_const =
ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH));
let uint_mask_high = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(
uint_mask_high_const,
uint_mask_high,
types::I32X4,
));

// 0x1.0p52 + double(src) - 0x1.0p52
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Subpd,
RegMem::from(uint_mask_high),
dst,
));
}
Opcode::FcvtFromUint => {
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let ty = ty.unwrap();
Expand Down
6 changes: 3 additions & 3 deletions cranelift/codegen/src/isa/x64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@ use self::inst::EmitInfo;

use super::TargetIsa;
use crate::ir::{condcodes::IntCC, Function};
#[cfg(feature = "unwind")]
use crate::isa::unwind::systemv;
use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings};
use crate::isa::Builder as IsaBuilder;
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
use crate::result::CodegenResult;
use crate::settings::{self as shared_settings, Flags};
use alloc::{boxed::Box, vec::Vec};
use core::hash::{Hash, Hasher};

use regalloc::{PrettyPrint, RealRegUniverse, Reg};
use target_lexicon::Triple;

#[cfg(feature = "unwind")]
use crate::isa::unwind::systemv;

mod abi;
pub mod encoding;
mod inst;
Expand Down
1 change: 1 addition & 0 deletions cranelift/interpreter/src/step.rs
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ where
Opcode::FcvtFromUint => unimplemented!("FcvtFromUint"),
Opcode::FcvtFromSint => unimplemented!("FcvtFromSint"),
Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
Opcode::FcvtLowFromUint => unimplemented!("FcvtLowFromUint"),
Opcode::FvpromoteLow => unimplemented!("FvpromoteLow"),
Opcode::Fvdemote => unimplemented!("Fvdemote"),
Opcode::Isplit => unimplemented!("Isplit"),
Expand Down
5 changes: 4 additions & 1 deletion cranelift/wasm/src/code_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1779,6 +1779,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let a = pop1_with_bitcast(state, I32X4, builder);
state.push1(builder.ins().fcvt_low_from_sint(F64X2, a));
}
Operator::F64x2ConvertLowI32x4U => {
let a = pop1_with_bitcast(state, I32X4, builder);
state.push1(builder.ins().fcvt_low_from_uint(F64X2, a));
}
Operator::F64x2PromoteLowF32x4 => {
let a = pop1_with_bitcast(state, F32X4, builder);
state.push1(builder.ins().fvpromote_low(a));
Expand Down Expand Up @@ -1892,7 +1896,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::I16x8ExtAddPairwiseI8x16U
| Operator::I32x4ExtAddPairwiseI16x8S
| Operator::I32x4ExtAddPairwiseI16x8U
| Operator::F64x2ConvertLowI32x4U
| Operator::I32x4TruncSatF64x2SZero
| Operator::I32x4TruncSatF64x2UZero => {
return Err(wasm_unsupported!("proposed simd operator {:?}", op));
Expand Down

0 comments on commit 7a4fb14

Please sign in to comment.