Skip to content

Commit

Permalink
x64: lower abs.i64x2 to VPABSQ when available
Browse files Browse the repository at this point in the history
  • Loading branch information
abrown committed Apr 15, 2021
1 parent 48d7c77 commit 2f73d66
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 23 deletions.
4 changes: 0 additions & 4 deletions cranelift/codegen/src/isa/x64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -460,9 +460,7 @@ pub(crate) enum InstructionSet {
BMI1,
#[allow(dead_code)] // never constructed (yet).
BMI2,
#[allow(dead_code)]
AVX512F,
#[allow(dead_code)]
AVX512VL,
}

Expand Down Expand Up @@ -995,13 +993,11 @@ impl fmt::Display for SseOpcode {

#[derive(Clone)]
pub enum Avx512Opcode {
#[allow(dead_code)]
Vpabsq,
}

impl Avx512Opcode {
/// Which `InstructionSet`s support the opcode?
#[allow(dead_code)]
pub(crate) fn available_from(&self) -> SmallVec<[InstructionSet; 2]> {
match self {
Avx512Opcode::Vpabsq => smallvec![InstructionSet::AVX512F, InstructionSet::AVX512VL],
Expand Down
20 changes: 20 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ use crate::isa::x64::inst::args::*;
use crate::isa::x64::inst::*;
use crate::machinst::{inst_common, MachBuffer, MachInstEmit, MachLabel};
use core::convert::TryInto;
use cranelift_codegen_shared::isa::x86::EncodingBits;
use encoding::evex::{encode_evex, EvexContext, EvexMasking};
use encoding::rex::{
emit_simm, emit_std_enc_enc, emit_std_enc_mem, emit_std_reg_mem, emit_std_reg_reg, int_reg_enc,
low8_will_sign_extend_to_32, low8_will_sign_extend_to_64, reg_enc, LegacyPrefixes, RexFlags,
Expand Down Expand Up @@ -1404,6 +1406,24 @@ pub(crate) fn emit(
};
}

Inst::XmmUnaryRmREvex { op, src, dst } => {
let bits = match op {
&Avx512Opcode::Vpabsq => EncodingBits::new(&[0x66, 0x0f, 0x38, 0x1f], 0, 1),
};
match src {
RegMem::Reg { reg: src } => encode_evex(
bits,
dst.to_reg().get_hw_encoding(),
0,
src.get_hw_encoding(),
EvexContext::v128(),
EvexMasking::default(),
sink,
),
_ => todo!(),
}
}

Inst::XmmRmR {
op,
src: src_e,
Expand Down
7 changes: 7 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3865,6 +3865,12 @@ fn test_x64_emit() {
"cvtdq2pd %xmm2, %xmm8",
));

insns.push((
Inst::xmm_unary_rm_r_evex(Avx512Opcode::Vpabsq, RegMem::reg(xmm2), w_xmm8),
"6272FD081FC2",
"vpabsq %xmm2, %xmm8",
));

// Xmm to int conversions, and conversely.

insns.push((
Expand Down Expand Up @@ -4276,6 +4282,7 @@ fn test_x64_emit() {
let mut isa_flag_builder = x64::settings::builder();
isa_flag_builder.enable("has_ssse3").unwrap();
isa_flag_builder.enable("has_sse41").unwrap();
isa_flag_builder.enable("has_avx512f").unwrap();
let isa_flags = x64::settings::Flags::new(&flags, isa_flag_builder);

let rru = regs::create_reg_universe_systemv(&flags);
Expand Down
30 changes: 29 additions & 1 deletion cranelift/codegen/src/isa/x64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,12 @@ pub enum Inst {
dst: Writable<Reg>,
},

XmmUnaryRmREvex {
op: Avx512Opcode,
src: RegMem,
dst: Writable<Reg>,
},

/// XMM (scalar or vector) unary op (from xmm to reg/mem): stores, movd, movq
XmmMovRM {
op: SseOpcode,
Expand Down Expand Up @@ -571,6 +577,8 @@ impl Inst {
| Inst::XmmRmRImm { op, .. }
| Inst::XmmToGpr { op, .. }
| Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()],

Inst::XmmUnaryRmREvex { op, .. } => op.available_from(),
}
}
}
Expand Down Expand Up @@ -705,6 +713,12 @@ impl Inst {
Inst::XmmUnaryRmR { op, src, dst }
}

pub(crate) fn xmm_unary_rm_r_evex(op: Avx512Opcode, src: RegMem, dst: Writable<Reg>) -> Inst {
src.assert_regclass_is(RegClass::V128);
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
Inst::XmmUnaryRmREvex { op, src, dst }
}

pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Self {
src.assert_regclass_is(RegClass::V128);
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
Expand Down Expand Up @@ -1391,6 +1405,13 @@ impl PrettyPrint for Inst {
show_ireg_sized(dst.to_reg(), mb_rru, 8),
),

Inst::XmmUnaryRmREvex { op, src, dst, .. } => format!(
"{} {}, {}",
ljustify(op.to_string()),
src.show_rru_sized(mb_rru, 8),
show_ireg_sized(dst.to_reg(), mb_rru, 8),
),

Inst::XmmMovRM { op, src, dst, .. } => format!(
"{} {}, {}",
ljustify(op.to_string()),
Expand Down Expand Up @@ -1863,7 +1884,9 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_def(Writable::from_reg(regs::rdx()));
}
},
Inst::UnaryRmR { src, dst, .. } | Inst::XmmUnaryRmR { src, dst, .. } => {
Inst::UnaryRmR { src, dst, .. }
| Inst::XmmUnaryRmR { src, dst, .. }
| Inst::XmmUnaryRmREvex { src, dst, .. } => {
src.get_regs_as_uses(collector);
collector.add_def(*dst);
}
Expand Down Expand Up @@ -2210,6 +2233,11 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
ref mut dst,
..
}
| Inst::XmmUnaryRmREvex {
ref mut src,
ref mut dst,
..
}
| Inst::UnaryRmR {
ref mut src,
ref mut dst,
Expand Down
40 changes: 22 additions & 18 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1855,25 +1855,29 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let ty = ty.unwrap();
if ty == types::I64X2 {
// This lowering could be a single instruction with AVX512F/VL's VPABSQ instruction.
// Instead, we use a separate register, `tmp`, to contain the results of `0 - src`
// and then blend in those results with `BLENDVPD` if the MSB of `tmp` was set to 1
// (i.e. if `tmp` was negative or, conversely, if `src` was originally positive).
if isa_flags.use_avx512f_simd() || isa_flags.use_avx512vl_simd() {
ctx.emit(Inst::xmm_unary_rm_r_evex(Avx512Opcode::Vpabsq, src, dst));
} else {
// If `VPABSQ` from AVX512 is unavailable, we use a separate register, `tmp`, to
// contain the results of `0 - src` and then blend in those results with
// `BLENDVPD` if the MSB of `tmp` was set to 1 (i.e. if `tmp` was negative or,
// conversely, if `src` was originally positive).

// Emit all 0s into the `tmp` register.
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), tmp));
// Subtract the lanes from 0 and set up `dst`.
ctx.emit(Inst::xmm_rm_r(SseOpcode::Psubq, src.clone(), tmp));
ctx.emit(Inst::gen_move(dst, tmp.to_reg(), ty));
// Choose the subtracted lanes when `tmp` has an MSB of 1. BLENDVPD's semantics
// require the "choice" mask to be in XMM0.
ctx.emit(Inst::gen_move(
Writable::from_reg(regs::xmm0()),
tmp.to_reg(),
ty,
));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Blendvpd, src, dst));
// Emit all 0s into the `tmp` register.
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), tmp));
// Subtract the lanes from 0 and set up `dst`.
ctx.emit(Inst::xmm_rm_r(SseOpcode::Psubq, src.clone(), tmp));
ctx.emit(Inst::gen_move(dst, tmp.to_reg(), ty));
// Choose the subtracted lanes when `tmp` has an MSB of 1. BLENDVPD's semantics
// require the "choice" mask to be in XMM0.
ctx.emit(Inst::gen_move(
Writable::from_reg(regs::xmm0()),
tmp.to_reg(),
ty,
));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Blendvpd, src, dst));
}
} else if ty.is_vector() {
let opcode = match ty {
types::I8X16 => SseOpcode::Pabsb,
Expand Down

0 comments on commit 2f73d66

Please sign in to comment.