Skip to content

Commit

Permalink
aarch64: Migrate bitrev/clz/cls/ctz to ISLE (#3658)
Browse files Browse the repository at this point in the history
This commit migrates these existing instructions to ISLE from the manual
lowerings implemented today. This was mostly straightforward but while I
was at it I fixed what appeared to be broken translations for I{8,16}
for `clz`, `cls`, and `ctz`. Previously the lowerings would produce
results as-if the input was 32-bits, but now I believe they all
correctly account for the bit-width.
  • Loading branch information
alexcrichton authored Jan 6, 2022
1 parent 7fd78da commit 72e2b7f
Show file tree
Hide file tree
Showing 9 changed files with 1,047 additions and 615 deletions.
47 changes: 47 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1464,6 +1464,13 @@
(_ Unit (emit (MInst.AluRRRR op dst src1 src2 src3))))
(writable_reg_to_reg dst)))

;; Helper for emitting `MInst.BitRR` instructions.
(decl bit_rr (BitOp Reg) Reg)
(rule (bit_rr op src)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.BitRR op dst src))))
(writable_reg_to_reg dst)))

;; Helper for emitting `adds` instructions.
(decl add64_with_flags (Reg Reg) ProducesFlags)
(rule (add64_with_flags src1 src2)
Expand All @@ -1485,6 +1492,11 @@
(ProducesFlags.ProducesFlags (MInst.AluRRR (ALUOp.SubS64) dst src1 src2)
(writable_reg_to_reg dst))))

(decl cmp64_imm (Reg Imm12) ProducesFlags)
(rule (cmp64_imm src1 src2)
(ProducesFlags.ProducesFlags (MInst.AluRRImm12 (ALUOp.SubS64) (writable_zero_reg) src1 src2)
(zero_reg)))

;; Helper for emitting `sbc` instructions.
(decl sbc64 (Reg Reg) ConsumesFlags)
(rule (sbc64 src1 src2)
Expand Down Expand Up @@ -1837,6 +1849,9 @@
(decl orr32 (Reg Reg) Reg)
(rule (orr32 x y) (alu_rrr (ALUOp.Orr32) x y))

(decl orr32_imm (Reg ImmLogic) Reg)
(rule (orr32_imm x y) (alu_rr_imm_logic (ALUOp.Orr32) x y))

(decl orr64 (Reg Reg) Reg)
(rule (orr64 x y) (alu_rrr (ALUOp.Orr64) x y))

Expand Down Expand Up @@ -1884,6 +1899,38 @@
(decl rotr64_imm (Reg ImmShift) Reg)
(rule (rotr64_imm x y) (alu_rr_imm_shift (ALUOp.RotR64) x y))

;; Helpers for generating `rbit` instructions.

(decl rbit32 (Reg) Reg)
(rule (rbit32 x) (bit_rr (BitOp.RBit32) x))

(decl rbit64 (Reg) Reg)
(rule (rbit64 x) (bit_rr (BitOp.RBit64) x))

;; Helpers for generating `clz` instructions.

(decl clz32 (Reg) Reg)
(rule (clz32 x) (bit_rr (BitOp.Clz32) x))

(decl clz64 (Reg) Reg)
(rule (clz64 x) (bit_rr (BitOp.Clz64) x))

;; Helpers for generating `cls` instructions.

(decl cls32 (Reg) Reg)
(rule (cls32 x) (bit_rr (BitOp.Cls32) x))

(decl cls64 (Reg) Reg)
(rule (cls64 x) (bit_rr (BitOp.Cls64) x))

;; Helpers for generating `eon` instructions.

(decl eon32 (Reg Reg) Reg)
(rule (eon32 x y) (alu_rrr (ALUOp.EorNot32) x y))

(decl eon64 (Reg Reg) Reg)
(rule (eon64 x y) (alu_rrr (ALUOp.EorNot64) x y))

;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl imm (Type u64) Reg)
Expand Down
125 changes: 125 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1004,3 +1004,128 @@
(lo Reg (orr64 (value_regs_get rshift 0) (value_regs_get lshift 0)))
)
(value_regs lo hi)))

;;;; Rules for `bitrev` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Reversing an 8-bit value with a 32-bit bitrev instruction will place
;; the reversed result in the highest 8 bits, so we need to shift them down into
;; place.
(rule (lower (has_type $I8 (bitrev x)))
(value_reg (lsr32_imm (rbit32 (put_in_reg x)) (imm_shift_from_u8 24))))

;; Reversing an 16-bit value with a 32-bit bitrev instruction will place
;; the reversed result in the highest 16 bits, so we need to shift them down into
;; place.
(rule (lower (has_type $I16 (bitrev x)))
(value_reg (lsr32_imm (rbit32 (put_in_reg x)) (imm_shift_from_u8 16))))

(rule (lower (has_type $I32 (bitrev x)))
(value_reg (rbit32 (put_in_reg x))))

(rule (lower (has_type $I64 (bitrev x)))
(value_reg (rbit64 (put_in_reg x))))

(rule (lower (has_type $I128 (bitrev x)))
(let (
(val ValueRegs (put_in_regs x))
(lo_rev Reg (rbit64 (value_regs_get val 0)))
(hi_rev Reg (rbit64 (value_regs_get val 1)))
)
(value_regs hi_rev lo_rev)))

;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I8 (clz x)))
(value_reg (sub32_imm (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))

(rule (lower (has_type $I16 (clz x)))
(value_reg (sub32_imm (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))

(rule (lower (has_type $I32 (clz x)))
(value_reg (clz32 (put_in_reg x))))

(rule (lower (has_type $I64 (clz x)))
(value_reg (clz64 (put_in_reg x))))

(rule (lower (has_type $I128 (clz x)))
(lower_clz128 (put_in_regs x)))

;; clz hi_clz, hi
;; clz lo_clz, lo
;; lsr tmp, hi_clz, #6
;; madd dst_lo, lo_clz, tmp, hi_clz
;; mov dst_hi, 0
(decl lower_clz128 (ValueRegs) ValueRegs)
(rule (lower_clz128 val)
(let (
(hi_clz Reg (clz64 (value_regs_get val 1)))
(lo_clz Reg (clz64 (value_regs_get val 0)))
(tmp Reg (lsr64_imm hi_clz (imm_shift_from_u8 6)))
)
(value_regs (madd64 lo_clz tmp hi_clz) (imm $I64 0))))

;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Note that all `ctz` instructions are implemented by reversing the bits and
;; then using a `clz` instruction since the tail zeros are the same as the
;; leading zeros of the reversed value.

(rule (lower (has_type $I8 (ctz x)))
(value_reg (clz32 (orr32_imm (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x800000)))))

(rule (lower (has_type $I16 (ctz x)))
(value_reg (clz32 (orr32_imm (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x8000)))))

(rule (lower (has_type $I32 (ctz x)))
(value_reg (clz32 (rbit32 (put_in_reg x)))))

(rule (lower (has_type $I64 (ctz x)))
(value_reg (clz64 (rbit64 (put_in_reg x)))))

(rule (lower (has_type $I128 (ctz x)))
(let (
(val ValueRegs (put_in_regs x))
(lo Reg (rbit64 (value_regs_get val 0)))
(hi Reg (rbit64 (value_regs_get val 1)))
)
(lower_clz128 (value_regs hi lo))))

;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I8 (cls x)))
(value_reg (sub32_imm (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))

(rule (lower (has_type $I16 (cls x)))
(value_reg (sub32_imm (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))

(rule (lower (has_type $I32 (cls x)))
(value_reg (cls32 (put_in_reg x))))

(rule (lower (has_type $I64 (cls x)))
(value_reg (cls64 (put_in_reg x))))

;; cls lo_cls, lo
;; cls hi_cls, hi
;; eon sign_eq_eor, hi, lo
;; lsr sign_eq, sign_eq_eor, #63
;; madd lo_sign_bits, out_lo, sign_eq, sign_eq
;; cmp hi_cls, #63
;; csel maybe_lo, lo_sign_bits, xzr, eq
;; add out_lo, maybe_lo, hi_cls
;; mov out_hi, 0
(rule (lower (has_type $I128 (cls x)))
(let (
(val ValueRegs (put_in_regs x))
(lo Reg (value_regs_get val 0))
(hi Reg (value_regs_get val 1))
(lo_cls Reg (cls64 lo))
(hi_cls Reg (cls64 hi))
(sign_eq_eon Reg (eon64 hi lo))
(sign_eq Reg (lsr64_imm sign_eq_eon (imm_shift_from_u8 63)))
(lo_sign_bits Reg (madd64 lo_cls sign_eq sign_eq))
(maybe_lo Reg (with_flags_1
(cmp64_imm hi_cls (u8_into_imm12 63))
(csel (Cond.Eq) lo_sign_bits (zero_reg))
))
)
(value_regs (add64 maybe_lo hi_cls) (imm $I64 0))))
44 changes: 0 additions & 44 deletions cranelift/codegen/src/isa/aarch64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1533,50 +1533,6 @@ pub(crate) fn lower_load<
f(ctx, rd, elem_ty, mem)
}

pub(crate) fn emit_clz_i128<C: LowerCtx<I = Inst>>(
ctx: &mut C,
src: ValueRegs<Reg>,
dst: ValueRegs<Writable<Reg>>,
) {
let src_lo = src.regs()[0];
let src_hi = src.regs()[1];
let dst_lo = dst.regs()[0];
let dst_hi = dst.regs()[1];

// clz dst_hi, src_hi
// clz dst_lo, src_lo
// lsr tmp, dst_hi, #6
// madd dst_lo, dst_lo, tmp, dst_hi
// mov dst_hi, 0

let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();

ctx.emit(Inst::BitRR {
rd: dst_hi,
rn: src_hi,
op: BitOp::Clz64,
});
ctx.emit(Inst::BitRR {
rd: dst_lo,
rn: src_lo,
op: BitOp::Clz64,
});
ctx.emit(Inst::AluRRImmShift {
alu_op: ALUOp::Lsr64,
rd: tmp,
rn: dst_hi.to_reg(),
immshift: ImmShift::maybe_from_u64(6).unwrap(),
});
ctx.emit(Inst::AluRRRR {
alu_op: ALUOp3::MAdd64,
rd: dst_lo,
rn: dst_lo.to_reg(),
rm: tmp.to_reg(),
ra: dst_hi.to_reg(),
});
lower_constant_u64(ctx, dst_hi, 0);
}

//=============================================================================
// Lowering-backend trait implementation.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
src/clif.isle f176ef3bba99365
src/prelude.isle babc931e5dc5b4cf
src/isa/aarch64/inst.isle 36d057f98a944e4
src/isa/aarch64/lower.isle 43467df9d06b00ac
src/isa/aarch64/inst.isle 3ae25d431916bb81
src/isa/aarch64/lower.isle 5715ecb7c7a41164
Loading

0 comments on commit 72e2b7f

Please sign in to comment.