Skip to content

Commit

Permalink
cranelift: Port vselect over to ISLE on x64
Browse files Browse the repository at this point in the history
  • Loading branch information
fitzgen committed Jan 6, 2022
1 parent 7fd78da commit 056f7c2
Show file tree
Hide file tree
Showing 7 changed files with 224 additions and 166 deletions.
22 changes: 22 additions & 0 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -954,6 +954,28 @@
(rule (pandn src1 src2)
(xmm_rm_r $F64X2 (SseOpcode.Pandn) src1 src2))

(decl sse_blend_op (Type) SseOpcode)
(rule (sse_blend_op $F32X4) (SseOpcode.Blendvps))
(rule (sse_blend_op $F64X2) (SseOpcode.Blendvpd))
(rule (sse_blend_op (multi_lane _bits _lanes)) (SseOpcode.Pblendvb))

(decl sse_mov_op (Type) SseOpcode)
(rule (sse_mov_op $F32X4) (SseOpcode.Movaps))
(rule (sse_mov_op $F64X2) (SseOpcode.Movapd))
(rule (sse_mov_op (multi_lane _bits _lanes)) (SseOpcode.Movdqa))

;; Helper for creating `blendvp{d,s}` and `pblendvb` instructions.
(decl sse_blend (Type RegMem RegMem Reg) Reg)
(rule (sse_blend ty mask src1 src2)
;; Move the mask into `xmm0`, as blend instructions implicitly operate on
;; that register. (This kind of thing would normally happen inside of
;; `Inst::mov_mitosis`, but has to happen here, where we still have the
;; mask register, because the mask is implicit and doesn't appear in the
;; `Inst` itself.)
(let ((mask2 WritableReg (xmm0))
(_ Unit (emit (MInst.XmmUnaryRmR (sse_mov_op ty) mask mask2))))
(xmm_rm_r ty (sse_blend_op ty) src2 src1)))

;; Helper for creating `blendvpd` instructions.
(decl blendvpd (Reg RegMem Reg) Reg)
(rule (blendvpd src1 src2 mask)
Expand Down
9 changes: 9 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1050,6 +1050,15 @@
(b Reg (sse_and_not ty cond_reg (put_in_reg_mem if_false))))
(value_reg (sse_or ty b (RegMem.Reg a)))))

;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty @ (multi_lane _bits _lanes)
(vselect condition if_true if_false)))
(value_reg (sse_blend ty
(put_in_reg_mem condition)
(put_in_reg_mem if_true)
(put_in_reg if_false))))

;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (insertlane vec @ (value_type ty) val (u8_from_uimm8 idx)))
Expand Down
65 changes: 5 additions & 60 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1515,6 +1515,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
match op {
Opcode::Iconst
| Opcode::Bconst
| Opcode::F32const
| Opcode::F64const
| Opcode::Null
| Opcode::Iadd
| Opcode::IaddIfcout
Expand All @@ -1535,50 +1537,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Imin
| Opcode::Umin
| Opcode::Bnot
| Opcode::Bitselect => implemented_in_isle(ctx),

Opcode::Vselect => {
let ty = ty.unwrap();
let condition = put_input_in_reg(ctx, inputs[0]);
let condition_ty = ctx.input_ty(insn, 0);
let if_true = input_to_reg_mem(ctx, inputs[1]);
let if_false = put_input_in_reg(ctx, inputs[2]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();

if ty.is_vector() {
// `vselect` relies on the bit representation of the condition:
// vector boolean types are defined in Cranelift to be all 1s or
// all 0s. This lowering relies on that fact to use x86's
// variable blend instructions, which look at the _high_bit_ of
// the condition mask. All the bits of vector booleans will
// match (all 1s or all 0s), so we can just use the high bit.
assert!(condition_ty.lane_type().is_bool());

// Variable blend instructions expect the condition mask to be
// in XMM0.
let xmm0 = Writable::from_reg(regs::xmm0());
ctx.emit(Inst::gen_move(xmm0, condition, ty));

// Match up the source and destination registers for regalloc.
ctx.emit(Inst::gen_move(dst, if_false, ty));

// Technically PBLENDVB would work in all cases (since the bytes
// inside the mask will be all 1s or 0s we can blend
// byte-by-byte instead of word-by-word, e.g.) but
// type-specialized versions are included here for clarity when
// troubleshooting and due to slight improvements in
// latency/throughput on certain processor families.
let opcode = match condition_ty {
types::B64X2 => SseOpcode::Blendvpd,
types::B32X4 => SseOpcode::Blendvps,
types::B16X8 | types::B8X16 => SseOpcode::Pblendvb,
_ => unimplemented!("unable lower vselect for type: {}", condition_ty),
};
ctx.emit(Inst::xmm_rm_r(opcode, if_true, dst));
} else {
unimplemented!("no lowering for scalar vselect instruction")
}
}
| Opcode::Bitselect
| Opcode::Vselect => implemented_in_isle(ctx),

Opcode::Ishl | Opcode::Ushr | Opcode::Sshr | Opcode::Rotl | Opcode::Rotr => {
let dst_ty = ctx.output_ty(insn, 0);
Expand Down Expand Up @@ -3254,22 +3214,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
};
}

Opcode::F64const => {
unreachable!(
"implemented in ISLE: inst = `{}`, type = `{:?}`",
ctx.dfg().display_inst(insn),
ty
);
}

Opcode::F32const => {
unreachable!(
"implemented in ISLE: inst = `{}`, type = `{:?}`",
ctx.dfg().display_inst(insn),
ty
);
}

Opcode::WideningPairwiseDotProductS => {
let lhs = put_input_in_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
Expand Down Expand Up @@ -5927,6 +5871,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
println!("Did not match fcvt input!");
}
}

// Unimplemented opcodes below. These are not currently used by Wasm
// lowering or other known embeddings, but should be either supported or
// removed eventually.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
src/clif.isle f176ef3bba99365
src/prelude.isle babc931e5dc5b4cf
src/isa/x64/inst.isle fb5d3ac8e68c46d2
src/isa/x64/lower.isle 5d66b88a371d4d70
src/isa/x64/inst.isle bc5fc626492752c8
src/isa/x64/lower.isle 33e94300f4c08455
Loading

0 comments on commit 056f7c2

Please sign in to comment.