Skip to content

Commit

Permalink
cranelift: port sshr to ISLE on x64
Browse files Browse the repository at this point in the history
  • Loading branch information
fitzgen committed Jan 11, 2022
1 parent 056f7c2 commit 375e336
Show file tree
Hide file tree
Showing 13 changed files with 942 additions and 446 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
src/clif.isle f176ef3bba99365
src/prelude.isle babc931e5dc5b4cf
src/prelude.isle e5e437b1aed2a1e1
src/isa/aarch64/inst.isle 36d057f98a944e4
src/isa/aarch64/lower.isle 43467df9d06b00ac
17 changes: 10 additions & 7 deletions cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

88 changes: 88 additions & 0 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
(dst WritableReg)
(imm u8)
(size OperandSize))
(XmmUninitializedValue (dst WritableReg))
(CmpRmiR (size OperandSize)
(opcode CmpOpcode)
(src RegMemImm)
Expand Down Expand Up @@ -292,6 +293,15 @@
(Mem (addr SyntheticAmode))
(Imm (simm32 u32))))

;; Put the given clif value into a `RegMemImm` operand.
;;
;; Asserts that the value fits into a single register, and doesn't require
;; multiple registers for its representation (like `i128` for example).
;;
;; As a side effect, this marks the value as used.
(decl put_in_reg_mem_imm (Value) RegMemImm)
(extern constructor put_in_reg_mem_imm put_in_reg_mem_imm)

(type RegMem extern
(enum
(Reg (reg Reg))
Expand Down Expand Up @@ -319,6 +329,15 @@
(enum (Imm8 (imm u8))
(Reg (reg Reg))))

;; Put the given clif value into a `Imm8Reg` operand.
;;
;; Asserts that the value fits into a single register, and doesn't require
;; multiple registers for its representation (like `i128` for example).
;;
;; As a side effect, this marks the value as used.
(decl put_in_imm8_reg (Value) Imm8Reg)
(extern constructor put_in_imm8_reg put_in_imm8_reg)

(type CC extern
(enum O
NO
Expand Down Expand Up @@ -494,6 +513,37 @@
wr))))
r))

;; Helper for creating an SSE register holding an `i64x2` from two `i64` values.
(decl make_i64x2_from_lanes (RegMem RegMem) Reg)
(rule (make_i64x2_from_lanes lo hi)
(let ((dst_w WritableReg (temp_writable_reg $I64X2))
(dst_r Reg (writable_reg_to_reg dst_w))
(_0 Unit (emit (MInst.XmmUninitializedValue dst_w)))
(_1 Unit (emit (MInst.XmmRmRImm (SseOpcode.Pinsrd)
dst_r
lo
dst_w
0
(OperandSize.Size64))))
(_2 Unit (emit (MInst.XmmRmRImm (SseOpcode.Pinsrd)
dst_r
hi
dst_w
1
(OperandSize.Size64)))))
dst_r))

;; Move a `RegMemImm.Reg` operand to an XMM register, if necessary.
(decl reg_mem_imm_to_xmm (RegMemImm) RegMemImm)
(rule (reg_mem_imm_to_xmm rmi @ (RegMemImm.Mem _)) rmi)
(rule (reg_mem_imm_to_xmm rmi @ (RegMemImm.Imm _)) rmi)
(rule (reg_mem_imm_to_xmm (RegMemImm.Reg r))
(RegMemImm.Reg (gpr_to_xmm $I8X16
(SseOpcode.Movd)
(RegMem.Reg r)
(OperandSize.Size32))))


;;;; Instruction Constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; These constructors create SSA-style `MInst`s. It is their responsibility to
Expand Down Expand Up @@ -1058,6 +1108,21 @@
(rule (pminud src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Pminud) src1 src2))

;; Helper for creating `punpcklbw` instructions.
(decl punpcklbw (Reg RegMem) Reg)
(rule (punpcklbw src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Punpcklbw) src1 src2))

;; Helper for creating `punpckhbw` instructions.
(decl punpckhbw (Reg RegMem) Reg)
(rule (punpckhbw src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Punpckhbw) src1 src2))

;; Helper for creating `packsswb` instructions.
(decl packsswb (Reg RegMem) Reg)
(rule (packsswb src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Packsswb) src1 src2))

;; Helper for creating `MInst.XmmRmRImm` instructions.
(decl xmm_rm_r_imm (SseOpcode Reg RegMem u8 OperandSize) Reg)
(rule (xmm_rm_r_imm op src1 src2 imm size)
Expand Down Expand Up @@ -1180,6 +1245,16 @@
(rule (psrlq src1 src2)
(xmm_rmi_reg (SseOpcode.Psrlq) src1 src2))

;; Helper for creating `psraw` instructions.
(decl psraw (Reg RegMemImm) Reg)
(rule (psraw src1 src2)
(xmm_rmi_reg (SseOpcode.Psraw) src1 src2))

;; Helper for creating `psrad` instructions.
(decl psrad (Reg RegMemImm) Reg)
(rule (psrad src1 src2)
(xmm_rmi_reg (SseOpcode.Psrad) src1 src2))

;; Helper for creating `MInst.MulHi` instructions.
;;
;; Returns the (lo, hi) register halves of the multiplication.
Expand Down Expand Up @@ -1252,6 +1327,19 @@
(rule (insertps src1 src2 lane)
(xmm_rm_r_imm (SseOpcode.Insertps) src1 src2 lane (OperandSize.Size32)))

;; Helper for creating `pextrd` instructions.
(decl pextrd (Type Reg u8) Reg)
(rule (pextrd ty src lane)
(let ((w_dst WritableReg (temp_writable_reg ty))
(r_dst Reg (writable_reg_to_reg w_dst))
(_ Unit (emit (MInst.XmmRmRImm (SseOpcode.Pextrd)
r_dst
(RegMem.Reg src)
w_dst
lane
(operand_size_of_type_32_64 (lane_type ty))))))
r_dst))

;; Helper for creating `not` instructions.
(decl not (Type Reg) Reg)
(rule (not ty src)
Expand Down
117 changes: 117 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,123 @@
(let ((amt_ Reg (lo_reg amt)))
(shr_i128 (put_in_regs src) amt_)))

;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; `i64` and smaller.

(rule (lower (has_type (fits_in_64 ty) (sshr src amt)))
(let ((src_ Reg (extend_to_reg src ty (ExtendKind.Sign)))
;; NB: Only the low bits of `amt` matter since we logically mask the
;; shift amount to the value's bit width.
(amt_ Reg (lo_reg amt)))
(value_reg (sar ty src_ (Imm8Reg.Reg amt_)))))

(rule (lower (has_type (fits_in_64 ty) (sshr src (imm8_from_value amt))))
(let ((src_ Reg (extend_to_reg src ty (ExtendKind.Sign))))
(value_reg (sar ty src_ amt))))

;; `i128`.

(decl sar_i128 (ValueRegs Reg) ValueRegs)
(rule (sar_i128 src amt)
;; Unpack the low/high halves of `src`.
(let ((src_lo Reg (value_regs_get src 0))
(src_hi Reg (value_regs_get src 1))
;; Do a shift of each half. NB: the low half uses an unsigned shift
;; because its MSB is not a sign bit.
(lo_shifted Reg (shr $I64 src_lo (Imm8Reg.Reg amt)))
(hi_shifted Reg (sar $I64 src_hi (Imm8Reg.Reg amt)))
;; `src_hi << (64 - amt)` are the bits to carry over from the low
;; half to the high half.
(carry Reg (shl $I64 src_hi (Imm8Reg.Reg (sub $I64 (imm $I64 64) (RegMemImm.Reg amt)))))
;; Nullify the carry if we are shifting by a multiple of 128.
(carry_ Reg (with_flags_1 (test (OperandSize.Size64) (RegMemImm.Imm 127) amt)
(cmove $I64 (CC.Z) (RegMem.Reg (imm $I64 0)) carry)))
;; Add the carry into the low half.
(lo_shifted_ Reg (or $I64 lo_shifted (RegMemImm.Reg carry_)))
;; Get all sign bits.
(sign_bits Reg (sar $I64 src_hi (Imm8Reg.Imm8 63))))
;; Combine the two shifted halves. However, if we are shifting by >= 64
;; (modulo 128), then the hi bits are all sign bits and the lo bits are
;; what would otherwise be our hi bits.
(with_flags_2 (test (OperandSize.Size64) (RegMemImm.Imm 64) amt)
(cmove $I64 (CC.Z) (RegMem.Reg lo_shifted_) hi_shifted)
(cmove $I64 (CC.Z) (RegMem.Reg hi_shifted) sign_bits))))

(rule (lower (has_type $I128 (sshr src amt)))
;; NB: Only the low bits of `amt` matter since we logically mask the shift
;; amount to the value's bit width.
(let ((amt_ Reg (lo_reg amt)))
(sar_i128 (put_in_regs src) amt_)))

;; SSE.

;; Since the x86 instruction set does not have an 8x16 shift instruction and the
;; approach used for `ishl` and `ushr` cannot be easily used (the masks do not
;; preserve the sign), we use a different approach here: separate the low and
;; high lanes, shift them separately, and merge them into the final result.
;;
;; Visually, this looks like the following, where `src.i8x16 = [s0, s1, ...,
;; s15]:
;;
;; lo.i16x8 = [(s0, s0), (s1, s1), ..., (s7, s7)]
;; shifted_lo.i16x8 = shift each lane of `low`
;; hi.i16x8 = [(s8, s8), (s9, s9), ..., (s15, s15)]
;; shifted_hi.i16x8 = shift each lane of `high`
;; result = [s0'', s1'', ..., s15'']
(rule (lower (has_type $I8X16 (sshr src amt @ (value_type amt_ty))))
(let ((src_ Reg (put_in_reg src))
;; In order for `packsswb` later to only use the high byte of each
;; 16x8 lane, we shift right an extra 8 bits, relying on `psraw` to
;; fill in the upper bits appropriately.
(amt_ RegMemImm (sshr_i8x16_bigger_shift amt_ty (put_in_reg_mem_imm amt)))
(lo Reg (punpcklbw src_ (RegMem.Reg src_)))
(shifted_lo Reg (psraw lo amt_))
(hi Reg (punpckhbw src_ (RegMem.Reg src_)))
(shifted_hi Reg (psraw hi amt_)))
(value_reg (packsswb shifted_lo (RegMem.Reg shifted_hi)))))

(decl sshr_i8x16_bigger_shift (Type RegMemImm) RegMemImm)
(rule (sshr_i8x16_bigger_shift _ty (RegMemImm.Imm i))
(RegMemImm.Imm (u32_add i 8)))
(rule (sshr_i8x16_bigger_shift ty (RegMemImm.Reg r))
(reg_mem_imm_to_xmm (RegMemImm.Reg (add ty r (RegMemImm.Imm 8)))))
(rule (sshr_i8x16_bigger_shift ty rmi @ (RegMemImm.Mem _m))
(reg_mem_imm_to_xmm (RegMemImm.Reg (add ty (imm ty 8) rmi))))

;; `sshr.{i16x8,i32x4}` can be a simple `psra{w,d}`, we just have to make sure
;; that if the shift amount is in a register, it is in an XMM register.
(rule (lower (has_type $I16X8 (sshr src amt)))
(value_reg (psraw (put_in_reg src)
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))
(rule (lower (has_type $I32X4 (sshr src amt)))
(value_reg (psrad (put_in_reg src)
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))

;; The `sshr.i64x2` CLIF instruction has no single x86 instruction in the older
;; feature sets. Newer ones like AVX512VL + AVX512F include `vpsraq`, a 128-bit
;; instruction that would fit here, but this backend does not currently have
;; support for EVEX encodings. To remedy this, we extract each 64-bit lane to a
;; GPR, shift each using a scalar instruction, and insert the shifted values
;; back in the `dst` XMM register.
;;
;; (TODO: when EVEX support is available, add an alternate lowering here).
(rule (lower (has_type $I64X2 (sshr src amt)))
(let ((src_ Reg (put_in_reg src))
(lo Reg (pextrd $I64 src_ 0))
(hi Reg (pextrd $I64 src_ 1))
(amt_ Imm8Reg (sshr_i64x2_shift_amount (put_in_imm8_reg amt)))
(shifted_lo Reg (sar $I64 lo amt_))
(shifted_hi Reg (sar $I64 hi amt_)))
(value_reg (make_i64x2_from_lanes (RegMem.Reg shifted_lo)
(RegMem.Reg shifted_hi)))))

;; Mask the shift amount according to Cranelift's semantics.
(decl sshr_i64x2_shift_amount (Imm8Reg) Imm8Reg)
(rule (sshr_i64x2_shift_amount reg @ (Imm8Reg.Reg _)) reg)
(rule (sshr_i64x2_shift_amount (Imm8Reg.Imm8 imm))
(Imm8Reg.Imm8 (u8_and imm 63)))

;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; `i16` and `i8`: we need to extend the shift amount, or mask the
Expand Down
Loading

0 comments on commit 375e336

Please sign in to comment.