Skip to content

Commit

Permalink
Use regalloc constraints for sse blend operations
Browse files Browse the repository at this point in the history
  • Loading branch information
elliottt committed Nov 11, 2022
1 parent 49ed79f commit f05681c
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 67 deletions.
44 changes: 20 additions & 24 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,17 @@
(src2 XmmMem)
(dst WritableXmm))

;; XMM (scalar or vector) blend op. The mask is used to blend between
;; src1 and src2. This differs from a use of `XmmRmR` as the mask is
;; implicitly in register xmm0; this special case exists to allow us to
;; communicate the constraint on the `mask` register to regalloc2.
(XmmRmRBlend
(op SseOpcode)
(src1 Xmm)
(src2 XmmMem)
(mask Xmm)
(dst WritableXmm))

;; XMM (scalar or vector) binary op that relies on the VEX prefix.
(XmmRmRVex (op AvxOpcode)
(src1 Xmm)
Expand Down Expand Up @@ -1359,9 +1370,6 @@
;; rely on something like `Inst::mov_mitosis` to put an operand into the
;; appropriate physical register for whatever reason.

(decl xmm0 () WritableXmm)
(extern constructor xmm0 xmm0)

;;;; Helpers for determining the register class of a value type ;;;;;;;;;;;;;;;;

(type RegisterClass
Expand Down Expand Up @@ -2432,33 +2440,21 @@
;; Priority 0 because multi_lane overlaps with the previous two type patterns.
(rule 0 (sse_mov_op (multi_lane _bits _lanes)) (SseOpcode.Movdqa))

(decl xmm_rm_r_blend (SseOpcode Xmm XmmMem Xmm) Xmm)
(rule (xmm_rm_r_blend op src1 src2 mask)
(let ((dst WritableXmm (temp_writable_xmm))
(_ Unit (emit (MInst.XmmRmRBlend op src1 src2 mask dst))))
dst))

;; Helper for creating `blendvp{d,s}` and `pblendvb` instructions.
(decl x64_blend (Type XmmMem XmmMem Xmm) Xmm)
(decl x64_blend (Type Xmm XmmMem Xmm) Xmm)
(rule (x64_blend ty mask src1 src2)
;; Move the mask into `xmm0`, as blend instructions implicitly operate on
;; that register. (This kind of thing would normally happen inside of
;; `Inst::mov_mitosis`, but has to happen here, where we still have the
;; mask register, because the mask is implicit and doesn't appear in the
;; `Inst` itself.)
(let ((mask2 WritableXmm (xmm0))
(_ Unit (emit (MInst.XmmUnaryRmR (sse_mov_op ty)
mask
mask2))))
(xmm_rm_r ty (sse_blend_op ty) src2 src1)))
(xmm_rm_r_blend (sse_blend_op ty) src2 src1 mask))

;; Helper for creating `blendvpd` instructions.
(decl x64_blendvpd (Xmm XmmMem Xmm) Xmm)
(rule (x64_blendvpd src1 src2 mask)
;; Move the mask into `xmm0`, as `blendvpd` implicitly operates on that
;; register. (This kind of thing would normally happen inside of
;; `Inst::mov_mitosis`, but has to happen here, where we still have the
;; mask register, because the mask is implicit and doesn't appear in the
;; `Inst` itself.)
(let ((mask2 WritableXmm (xmm0))
(_ Unit (emit (MInst.XmmUnaryRmR (SseOpcode.Movapd)
mask
mask2))))
(xmm_rm_r $F64X2 (SseOpcode.Blendvpd) src1 src2)))
(xmm_rm_r_blend (SseOpcode.Blendvpd) src1 src2 mask))

;; Helper for creating `movsd` instructions.
(decl x64_movsd_regmove (Xmm XmmMem) Xmm)
Expand Down
30 changes: 27 additions & 3 deletions cranelift/codegen/src/isa/x64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1820,8 +1820,6 @@ pub(crate) fn emit(
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
SseOpcode::Blendvps => (LegacyPrefixes::_66, 0x0F3814, 3),
SseOpcode::Blendvpd => (LegacyPrefixes::_66, 0x0F3815, 3),
SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2),
SseOpcode::Divss => (LegacyPrefixes::_F3, 0x0F5E, 2),
Expand Down Expand Up @@ -1859,7 +1857,6 @@ pub(crate) fn emit(
SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
SseOpcode::Pblendvb => (LegacyPrefixes::_66, 0x0F3810, 3),
SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2),
SseOpcode::Pcmpeqw => (LegacyPrefixes::_66, 0x0F75, 2),
SseOpcode::Pcmpeqd => (LegacyPrefixes::_66, 0x0F76, 2),
Expand Down Expand Up @@ -1924,6 +1921,33 @@ pub(crate) fn emit(
}
}

Inst::XmmRmRBlend { op, src1, src2, dst, mask } => {
let src1 = allocs.next(src1.to_reg());
let mask = allocs.next(mask.to_reg());
debug_assert_eq!(mask, regs::xmm0());
let reg_g = allocs.next(dst.to_reg().to_reg());
debug_assert_eq!(src1, reg_g);
let src_e = src2.clone().to_reg_mem().with_allocs(allocs);

let rex = RexFlags::clear_w();
let (prefix, opcode, length) = match op {
SseOpcode::Blendvps => (LegacyPrefixes::_66, 0x0F3814, 3),
SseOpcode::Blendvpd => (LegacyPrefixes::_66, 0x0F3815, 3),
SseOpcode::Pblendvb => (LegacyPrefixes::_66, 0x0F3810, 3),
_ => unimplemented!("Opcode {:?} not implemented", op),
};

match src_e {
RegMem::Reg { reg: reg_e } => {
emit_std_reg_reg(sink, prefix, opcode, length, reg_g, reg_e, rex);
}
RegMem::Mem { addr } => {
let addr = &addr.finalize(state, sink);
emit_std_reg_mem(sink, info, prefix, opcode, length, reg_g, addr, rex, 0);
}
}
}

Inst::XmmRmRVex {
op,
src1,
Expand Down
38 changes: 26 additions & 12 deletions cranelift/codegen/src/isa/x64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ impl Inst {
| Inst::XmmMovRM { op, .. }
| Inst::XmmRmiReg { opcode: op, .. }
| Inst::XmmRmR { op, .. }
| Inst::XmmRmRBlend { op, .. }
| Inst::XmmRmRImm { op, .. }
| Inst::XmmToGpr { op, .. }
| Inst::XmmUnaryRmRImm { op, .. }
Expand Down Expand Up @@ -938,6 +939,14 @@ impl PrettyPrint for Inst {
format!("{} {}, {}, {}", ljustify(op.to_string()), src1, src2, dst)
}

Inst::XmmRmRBlend { op, src1, src2, mask, dst } => {
let src1 = pretty_print_reg(src1.to_reg(), 8, allocs);
let mask = pretty_print_reg(mask.to_reg(), 8, allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
let src2 = src2.pretty_print(8, allocs);
format!("{} {}, {}, {}, <{}>", ljustify(op.to_string()), src1, src2, dst, mask)
}

Inst::XmmRmRVex {
op,
src1,
Expand Down Expand Up @@ -1765,27 +1774,32 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
src.get_operands(collector);
}
Inst::XmmRmR {
src1,
src2,
dst,
op,
..
src1, src2, dst, ..
} => {
if inst.produces_const() {
collector.reg_def(dst.to_writable_reg());
} else {
collector.reg_use(src1.to_reg());
collector.reg_reuse_def(dst.to_writable_reg(), 0);
src2.get_operands(collector);

// Some instructions have an implicit use of XMM0.
if *op == SseOpcode::Blendvpd
}
}
Inst::XmmRmRBlend {
src1,
src2,
mask,
dst,
op,
} => {
assert!(
*op == SseOpcode::Blendvpd
|| *op == SseOpcode::Blendvps
|| *op == SseOpcode::Pblendvb
{
collector.reg_use(regs::xmm0());
}
}
);
collector.reg_use(src1.to_reg());
collector.reg_fixed_use(mask.to_reg(), regs::xmm0());
collector.reg_reuse_def(dst.to_writable_reg(), 0);
src2.get_operands(collector);
}
Inst::XmmRmRVex {
op,
Expand Down
5 changes: 0 additions & 5 deletions cranelift/codegen/src/isa/x64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -336,11 +336,6 @@ impl Context for IsleContext<'_, '_, '_, MInst, Flags, IsaFlags, 6> {
0b00_00_00_00 | lane << 4
}

#[inline]
fn xmm0(&mut self) -> WritableXmm {
WritableXmm::from_reg(Xmm::new(regs::xmm0()).unwrap())
}

#[inline]
fn synthetic_amode_to_reg_mem(&mut self, addr: &SyntheticAmode) -> RegMem {
RegMem::mem(addr.clone())
Expand Down
28 changes: 14 additions & 14 deletions cranelift/filetests/filetests/isa/x64/simd-bitselect.clif
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ block0(v0: i8x16, v1: i8x16):
; pcmpeqb %xmm4, %xmm1, %xmm4
; movdqa %xmm0, %xmm7
; movdqa %xmm4, %xmm0
; movdqa %xmm1, %xmm5
; pblendvb %xmm5, %xmm7, %xmm5
; movdqa %xmm5, %xmm0
; movdqa %xmm1, %xmm4
; pblendvb %xmm4, %xmm7, %xmm4, <%xmm0>
; movdqa %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
Expand All @@ -34,9 +34,9 @@ block0(v0: f32x4, v1: f32x4, v2: i32x4, v3: i32x4):
; movq %rsp, %rbp
; block0:
; cmpps $0, %xmm0, %xmm1, %xmm0
; movdqa %xmm3, %xmm7
; pblendvb %xmm7, %xmm2, %xmm7
; movdqa %xmm7, %xmm0
; movdqa %xmm3, %xmm6
; pblendvb %xmm6, %xmm2, %xmm6, <%xmm0>
; movdqa %xmm6, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
Expand Down Expand Up @@ -72,10 +72,10 @@ block0(v0: i8x16, v1: i8x16):
; block0:
; movdqa %xmm0, %xmm5
; movdqu const(0), %xmm0
; movdqa %xmm5, %xmm7
; movdqa %xmm1, %xmm5
; pblendvb %xmm5, %xmm7, %xmm5
; movdqa %xmm5, %xmm0
; movdqa %xmm5, %xmm6
; movdqa %xmm1, %xmm4
; pblendvb %xmm4, %xmm6, %xmm4, <%xmm0>
; movdqa %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
Expand All @@ -92,10 +92,10 @@ block0(v0: i16x8, v1: i16x8):
; block0:
; movdqa %xmm0, %xmm5
; movdqu const(0), %xmm0
; movdqa %xmm5, %xmm7
; movdqa %xmm1, %xmm5
; pblendvb %xmm5, %xmm7, %xmm5
; movdqa %xmm5, %xmm0
; movdqa %xmm5, %xmm6
; movdqa %xmm1, %xmm4
; pblendvb %xmm4, %xmm6, %xmm4, <%xmm0>
; movdqa %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
Expand Down
18 changes: 9 additions & 9 deletions cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,9 @@ block0(v0: i16x8, v1: i16x8, v2: i16x8):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqa %xmm2, %xmm5
; pblendvb %xmm5, %xmm1, %xmm5
; movdqa %xmm5, %xmm0
; movdqa %xmm2, %xmm4
; pblendvb %xmm4, %xmm1, %xmm4, <%xmm0>
; movdqa %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
Expand All @@ -153,9 +153,9 @@ block0(v0: i32x4, v1: f32x4, v2: f32x4):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqa %xmm2, %xmm5
; blendvps %xmm5, %xmm1, %xmm5
; movdqa %xmm5, %xmm0
; movdqa %xmm2, %xmm4
; blendvps %xmm4, %xmm1, %xmm4, <%xmm0>
; movdqa %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
Expand All @@ -169,9 +169,9 @@ block0(v0: i64x2, v1: f64x2, v2: f64x2):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqa %xmm2, %xmm5
; blendvpd %xmm5, %xmm1, %xmm5
; movdqa %xmm5, %xmm0
; movdqa %xmm2, %xmm4
; blendvpd %xmm4, %xmm1, %xmm4, <%xmm0>
; movdqa %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
Expand Down

0 comments on commit f05681c

Please sign in to comment.