Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

aarch64: Migrate bitrev/clz/cls/ctz to ISLE #3658

Merged
merged 1 commit into from
Jan 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1464,6 +1464,13 @@
(_ Unit (emit (MInst.AluRRRR op dst src1 src2 src3))))
(writable_reg_to_reg dst)))

;; Helper for emitting `MInst.BitRR` instructions.
(decl bit_rr (BitOp Reg) Reg)
(rule (bit_rr op src)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.BitRR op dst src))))
(writable_reg_to_reg dst)))

;; Helper for emitting `adds` instructions.
(decl add64_with_flags (Reg Reg) ProducesFlags)
(rule (add64_with_flags src1 src2)
Expand All @@ -1485,6 +1492,11 @@
(ProducesFlags.ProducesFlags (MInst.AluRRR (ALUOp.SubS64) dst src1 src2)
(writable_reg_to_reg dst))))

(decl cmp64_imm (Reg Imm12) ProducesFlags)
(rule (cmp64_imm src1 src2)
(ProducesFlags.ProducesFlags (MInst.AluRRImm12 (ALUOp.SubS64) (writable_zero_reg) src1 src2)
(zero_reg)))

;; Helper for emitting `sbc` instructions.
(decl sbc64 (Reg Reg) ConsumesFlags)
(rule (sbc64 src1 src2)
Expand Down Expand Up @@ -1837,6 +1849,9 @@
(decl orr32 (Reg Reg) Reg)
(rule (orr32 x y) (alu_rrr (ALUOp.Orr32) x y))

(decl orr32_imm (Reg ImmLogic) Reg)
(rule (orr32_imm x y) (alu_rr_imm_logic (ALUOp.Orr32) x y))

(decl orr64 (Reg Reg) Reg)
(rule (orr64 x y) (alu_rrr (ALUOp.Orr64) x y))

Expand Down Expand Up @@ -1884,6 +1899,38 @@
(decl rotr64_imm (Reg ImmShift) Reg)
(rule (rotr64_imm x y) (alu_rr_imm_shift (ALUOp.RotR64) x y))

;; Helpers for generating `rbit` instructions.

(decl rbit32 (Reg) Reg)
(rule (rbit32 x) (bit_rr (BitOp.RBit32) x))

(decl rbit64 (Reg) Reg)
(rule (rbit64 x) (bit_rr (BitOp.RBit64) x))

;; Helpers for generating `clz` instructions.

(decl clz32 (Reg) Reg)
(rule (clz32 x) (bit_rr (BitOp.Clz32) x))

(decl clz64 (Reg) Reg)
(rule (clz64 x) (bit_rr (BitOp.Clz64) x))

;; Helpers for generating `cls` instructions.

(decl cls32 (Reg) Reg)
(rule (cls32 x) (bit_rr (BitOp.Cls32) x))

(decl cls64 (Reg) Reg)
(rule (cls64 x) (bit_rr (BitOp.Cls64) x))

;; Helpers for generating `eon` instructions.

(decl eon32 (Reg Reg) Reg)
(rule (eon32 x y) (alu_rrr (ALUOp.EorNot32) x y))

(decl eon64 (Reg Reg) Reg)
(rule (eon64 x y) (alu_rrr (ALUOp.EorNot64) x y))

;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl imm (Type u64) Reg)
Expand Down
125 changes: 125 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1004,3 +1004,128 @@
(lo Reg (orr64 (value_regs_get rshift 0) (value_regs_get lshift 0)))
)
(value_regs lo hi)))

;;;; Rules for `bitrev` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Reversing an 8-bit value with a 32-bit bitrev instruction will place
;; the reversed result in the highest 8 bits, so we need to shift them down into
;; place.
(rule (lower (has_type $I8 (bitrev x)))
(value_reg (lsr32_imm (rbit32 (put_in_reg x)) (imm_shift_from_u8 24))))

;; Reversing an 16-bit value with a 32-bit bitrev instruction will place
;; the reversed result in the highest 16 bits, so we need to shift them down into
;; place.
(rule (lower (has_type $I16 (bitrev x)))
(value_reg (lsr32_imm (rbit32 (put_in_reg x)) (imm_shift_from_u8 16))))

(rule (lower (has_type $I32 (bitrev x)))
(value_reg (rbit32 (put_in_reg x))))

(rule (lower (has_type $I64 (bitrev x)))
(value_reg (rbit64 (put_in_reg x))))

(rule (lower (has_type $I128 (bitrev x)))
(let (
(val ValueRegs (put_in_regs x))
(lo_rev Reg (rbit64 (value_regs_get val 0)))
(hi_rev Reg (rbit64 (value_regs_get val 1)))
)
(value_regs hi_rev lo_rev)))

;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I8 (clz x)))
(value_reg (sub32_imm (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))

(rule (lower (has_type $I16 (clz x)))
(value_reg (sub32_imm (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))

(rule (lower (has_type $I32 (clz x)))
(value_reg (clz32 (put_in_reg x))))

(rule (lower (has_type $I64 (clz x)))
(value_reg (clz64 (put_in_reg x))))

(rule (lower (has_type $I128 (clz x)))
(lower_clz128 (put_in_regs x)))

;; clz hi_clz, hi
;; clz lo_clz, lo
;; lsr tmp, hi_clz, #6
;; madd dst_lo, lo_clz, tmp, hi_clz
;; mov dst_hi, 0
(decl lower_clz128 (ValueRegs) ValueRegs)
(rule (lower_clz128 val)
(let (
(hi_clz Reg (clz64 (value_regs_get val 1)))
(lo_clz Reg (clz64 (value_regs_get val 0)))
(tmp Reg (lsr64_imm hi_clz (imm_shift_from_u8 6)))
)
(value_regs (madd64 lo_clz tmp hi_clz) (imm $I64 0))))

;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Note that all `ctz` instructions are implemented by reversing the bits and
;; then using a `clz` instruction since the tail zeros are the same as the
;; leading zeros of the reversed value.

(rule (lower (has_type $I8 (ctz x)))
(value_reg (clz32 (orr32_imm (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x800000)))))

(rule (lower (has_type $I16 (ctz x)))
(value_reg (clz32 (orr32_imm (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x8000)))))

(rule (lower (has_type $I32 (ctz x)))
(value_reg (clz32 (rbit32 (put_in_reg x)))))

(rule (lower (has_type $I64 (ctz x)))
(value_reg (clz64 (rbit64 (put_in_reg x)))))

(rule (lower (has_type $I128 (ctz x)))
(let (
(val ValueRegs (put_in_regs x))
(lo Reg (rbit64 (value_regs_get val 0)))
(hi Reg (rbit64 (value_regs_get val 1)))
)
(lower_clz128 (value_regs hi lo))))

;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I8 (cls x)))
(value_reg (sub32_imm (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))

(rule (lower (has_type $I16 (cls x)))
(value_reg (sub32_imm (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))

(rule (lower (has_type $I32 (cls x)))
(value_reg (cls32 (put_in_reg x))))

(rule (lower (has_type $I64 (cls x)))
(value_reg (cls64 (put_in_reg x))))

;; cls lo_cls, lo
;; cls hi_cls, hi
;; eon sign_eq_eor, hi, lo
;; lsr sign_eq, sign_eq_eor, #63
;; madd lo_sign_bits, out_lo, sign_eq, sign_eq
;; cmp hi_cls, #63
;; csel maybe_lo, lo_sign_bits, xzr, eq
;; add out_lo, maybe_lo, hi_cls
;; mov out_hi, 0
(rule (lower (has_type $I128 (cls x)))
(let (
(val ValueRegs (put_in_regs x))
(lo Reg (value_regs_get val 0))
(hi Reg (value_regs_get val 1))
(lo_cls Reg (cls64 lo))
(hi_cls Reg (cls64 hi))
(sign_eq_eon Reg (eon64 hi lo))
(sign_eq Reg (lsr64_imm sign_eq_eon (imm_shift_from_u8 63)))
(lo_sign_bits Reg (madd64 lo_cls sign_eq sign_eq))
(maybe_lo Reg (with_flags_1
(cmp64_imm hi_cls (u8_into_imm12 63))
(csel (Cond.Eq) lo_sign_bits (zero_reg))
))
)
(value_regs (add64 maybe_lo hi_cls) (imm $I64 0))))
44 changes: 0 additions & 44 deletions cranelift/codegen/src/isa/aarch64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1533,50 +1533,6 @@ pub(crate) fn lower_load<
f(ctx, rd, elem_ty, mem)
}

pub(crate) fn emit_clz_i128<C: LowerCtx<I = Inst>>(
ctx: &mut C,
src: ValueRegs<Reg>,
dst: ValueRegs<Writable<Reg>>,
) {
let src_lo = src.regs()[0];
let src_hi = src.regs()[1];
let dst_lo = dst.regs()[0];
let dst_hi = dst.regs()[1];

// clz dst_hi, src_hi
// clz dst_lo, src_lo
// lsr tmp, dst_hi, #6
// madd dst_lo, dst_lo, tmp, dst_hi
// mov dst_hi, 0

let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();

ctx.emit(Inst::BitRR {
rd: dst_hi,
rn: src_hi,
op: BitOp::Clz64,
});
ctx.emit(Inst::BitRR {
rd: dst_lo,
rn: src_lo,
op: BitOp::Clz64,
});
ctx.emit(Inst::AluRRImmShift {
alu_op: ALUOp::Lsr64,
rd: tmp,
rn: dst_hi.to_reg(),
immshift: ImmShift::maybe_from_u64(6).unwrap(),
});
ctx.emit(Inst::AluRRRR {
alu_op: ALUOp3::MAdd64,
rd: dst_lo,
rn: dst_lo.to_reg(),
rm: tmp.to_reg(),
ra: dst_hi.to_reg(),
});
lower_constant_u64(ctx, dst_hi, 0);
}

//=============================================================================
// Lowering-backend trait implementation.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
src/clif.isle f176ef3bba99365
src/prelude.isle babc931e5dc5b4cf
src/isa/aarch64/inst.isle 36d057f98a944e4
src/isa/aarch64/lower.isle 43467df9d06b00ac
src/isa/aarch64/inst.isle 3ae25d431916bb81
src/isa/aarch64/lower.isle 5715ecb7c7a41164
Loading