Skip to content

Commit

Permalink
ISLE: implement x64 lowering for band_not in ISLE
Browse files Browse the repository at this point in the history
  • Loading branch information
fitzgen committed Nov 11, 2021
1 parent 33fcd6b commit bfbf2f2
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 31 deletions.
15 changes: 15 additions & 0 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,21 @@
(rule (punpcklwd src1 src2)
(xmm_rm_r $I16X8 (SseOpcode.Punpcklwd) src1 src2))

;; Helper for creating `andnps` instructions.
(decl andnps (Reg RegMem) Reg)
(rule (andnps src1 src2)
(xmm_rm_r $F32X4 (SseOpcode.Andnps) src1 src2))

;; Helper for creating `andnpd` instructions.
(decl andnpd (Reg RegMem) Reg)
(rule (andnpd src1 src2)
(xmm_rm_r $F64X2 (SseOpcode.Andnpd) src1 src2))

;; Helper for creating `pandn` instructions.
(decl pandn (Reg RegMem) Reg)
(rule (pandn src1 src2)
(xmm_rm_r $F64X2 (SseOpcode.Pandn) src1 src2))

;; Helper for creating `MInst.XmmRmRImm` instructions.
(decl xmm_rm_r_imm (SseOpcode Reg RegMem u8 OperandSize) Reg)
(rule (xmm_rm_r_imm op src1 src2 imm size)
Expand Down
19 changes: 19 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -926,3 +926,22 @@
0x50
(OperandSize.Size32))))
(value_reg (pmuludq x2 (RegMem.Reg y2)))))

;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Note the flipping of operands below. CLIF specifies
;;
;; band_not(x, y) = and(x, not(y))
;;
;; while x86 does
;;
;; pandn(x, y) = and(not(x), y)

(rule (lower (has_type $F32X4 (band_not x y)))
(value_reg (andnps (put_in_reg y) (put_in_reg_mem x))))

(rule (lower (has_type $F64X2 (band_not x y)))
(value_reg (andnpd (put_in_reg y) (put_in_reg_mem x))))

(rule (lower (has_type (multi_lane _bits _lanes) (band_not x y)))
(value_reg (pandn (put_in_reg y) (put_in_reg_mem x))))
20 changes: 2 additions & 18 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1519,31 +1519,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Band
| Opcode::Bor
| Opcode::Bxor
| Opcode::Imul => {
| Opcode::Imul
| Opcode::BandNot => {
unreachable!(
"implemented in ISLE: inst = `{}`, type = `{:?}`",
ctx.dfg().display_inst(insn),
ty
);
}

Opcode::BandNot => {
let ty = ty.unwrap();
debug_assert!(ty.is_vector() && ty.bytes() == 16);
let lhs = input_to_reg_mem(ctx, inputs[0]);
let rhs = put_input_in_reg(ctx, inputs[1]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let sse_op = match ty {
types::F32X4 => SseOpcode::Andnps,
types::F64X2 => SseOpcode::Andnpd,
_ => SseOpcode::Pandn,
};
// Note the flipping of operands: the `rhs` operand is used as the destination instead
// of the `lhs` as in the other bit operations above (e.g. `band`).
ctx.emit(Inst::gen_move(dst, rhs, ty));
ctx.emit(Inst::xmm_rm_r(sse_op, lhs, dst));
}

Opcode::Iabs => {
let src = input_to_reg_mem(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
Expand Down
86 changes: 73 additions & 13 deletions cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit bfbf2f2

Please sign in to comment.