Skip to content

Commit

Permalink
riscv64: Better AMode Matching (bytecodealliance#7079)
Browse files Browse the repository at this point in the history
* riscv64: Delete `int_load_op` helper

* riscv64: Delete `default_memflags`

* riscv64: Rename `gen_amode`

* riscv64: Better matching for amode

* riscv64: Delete `emit_load` helper

* riscv64: Rename some load variables

* riscv64: Merge `iadd` into amode computations
  • Loading branch information
afonso360 authored Sep 24, 2023
1 parent 4ba8b6c commit 38bc7e9
Show file tree
Hide file tree
Showing 26 changed files with 1,046 additions and 1,188 deletions.
131 changes: 68 additions & 63 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1664,10 +1664,10 @@

;; Otherwise we fall back to loading the immediate from the constant pool.
(rule 0 (imm (ty_int ty) c)
(emit_load
(gen_load
(gen_const_amode (emit_u64_le_const c))
(LoadOP.Ld)
(mem_flags_trusted)
(gen_const_amode (emit_u64_le_const c))))
(mem_flags_trusted)))

;; Imm12 Rules

Expand Down Expand Up @@ -2402,57 +2402,88 @@
(gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high)
)))

(decl gen_amode (Reg Offset32 Type) AMode)
(extern constructor gen_amode gen_amode)
;; Generates a AMode that points to a register plus an offset.
(decl gen_reg_offset_amode (Reg i64 Type) AMode)
(extern constructor gen_reg_offset_amode gen_reg_offset_amode)

;; Generates a AMode that an offset from the stack pointer.
(decl gen_sp_offset_amode (i64 Type) AMode)
(extern constructor gen_sp_offset_amode gen_sp_offset_amode)

;; Generates a AMode that an offset from the frame pointer.
(decl gen_fp_offset_amode (i64 Type) AMode)
(extern constructor gen_fp_offset_amode gen_fp_offset_amode)

;; Generates an AMode that points to a stack slot + offset.
(decl gen_stack_slot_amode (StackSlot i64 Type) AMode)
(extern constructor gen_stack_slot_amode gen_stack_slot_amode)

;; Generates a AMode that points to a constant in the constant pool.
(decl gen_const_amode (VCodeConstant) AMode)
(extern constructor gen_const_amode gen_const_amode)



;; Tries to match a Value + Offset into an AMode
(decl amode (Value i32 Type) AMode)
(rule 0 (amode addr offset ty) (amode_inner addr offset ty))

;; If we are adding a constant offset with an iadd we can instead make that
;; offset part of the amode offset.
;;
;; We can't recurse into `amode` again since that could cause stack overflows.
;; See: https://github.com/bytecodealliance/wasmtime/pull/6968
(rule 1 (amode (iadd addr (iconst (simm32 y))) offset ty)
(if-let new_offset (s32_add_fallible y offset))
(amode_inner addr new_offset ty))
(rule 2 (amode (iadd (iconst (simm32 x)) addr) offset ty)
(if-let new_offset (s32_add_fallible x offset))
(amode_inner addr new_offset ty))


;; These are the normal rules for generating an AMode.
(decl amode_inner (Value i32 Type) AMode)

;; In the simplest case we just lower into a Reg+Offset
(rule 0 (amode_inner r @ (value_type (ty_addr64 _)) offset ty)
(gen_reg_offset_amode r offset ty))

;; If the value is a `get_frame_pointer`, we can just use the offset from that.
(rule 1 (amode_inner (get_frame_pointer) offset ty)
(gen_fp_offset_amode offset ty))

;; If the value is a `get_stack_pointer`, we can just use the offset from that.
(rule 1 (amode_inner (get_stack_pointer) offset ty)
(gen_sp_offset_amode offset ty))

;; Similarly if the value is a `stack_addr` we can also turn that into an sp offset.
(rule 1 (amode_inner (stack_addr ss ss_offset) amode_offset ty)
(if-let combined_offset (s32_add_fallible ss_offset amode_offset))
(gen_stack_slot_amode ss combined_offset ty))




;; Returns a canonical type for a LoadOP. We only return I64 or F64.
(decl load_op_reg_type (LoadOP) Type)
(rule 1 (load_op_reg_type (LoadOP.Fld)) $F64)
(rule 1 (load_op_reg_type (LoadOP.Flw)) $F64)
(rule 0 (load_op_reg_type _) $I64)

(decl emit_load (LoadOP MemFlags AMode) Reg)
(rule (emit_load op flags from)
(let ((dst WritableReg (temp_writable_reg (load_op_reg_type op)))
(_ Unit (emit (MInst.Load dst op flags from))))
dst))

;; helper function to load from memory.
(decl gen_load (Reg Offset32 LoadOP MemFlags Type) Reg)
(rule (gen_load p offset op flags ty)
(emit_load op flags (gen_amode p offset $I64)))

(decl gen_load_128 (Reg Offset32 MemFlags) ValueRegs)
(rule (gen_load_128 p offset flags)
(let ((low Reg (gen_load p offset (LoadOP.Ld) flags $I64))
(high Reg (gen_load p (offset32_add offset 8) (LoadOP.Ld) flags $I64)))
(value_regs low high)))
(decl gen_load (AMode LoadOP MemFlags) Reg)
(rule (gen_load amode op flags)
(let ((dst WritableReg (temp_writable_reg (load_op_reg_type op)))
(_ Unit (emit (MInst.Load dst op flags amode))))
dst))

(decl default_memflags () MemFlags)
(extern constructor default_memflags default_memflags)
;; helper function to store to memory.
(decl gen_store (AMode StoreOP MemFlags Reg) InstOutput)
(rule (gen_store amode op flags src)
(side_effect (SideEffectNoResult.Inst (MInst.Store amode op flags src))))

(decl offset32_add (Offset32 i64) Offset32)
(extern constructor offset32_add offset32_add)

;; helper function to store to memory.
(decl gen_store (Reg Offset32 StoreOP MemFlags Reg) InstOutput)
(rule
(gen_store base offset op flags src)
(side_effect (SideEffectNoResult.Inst (MInst.Store (gen_amode base offset $I64) op flags src)))
)

(decl gen_store_128 (Reg Offset32 MemFlags ValueRegs) InstOutput)
(rule
(gen_store_128 p offset flags src)
(side_effect
(SideEffectNoResult.Inst2
(MInst.Store (gen_amode p offset $I64) (StoreOP.Sd) flags (value_regs_get src 0))
(MInst.Store (gen_amode p (offset32_add offset 8) $I64) (StoreOP.Sd) flags (value_regs_get src 1)))))

(decl valid_atomic_transaction (Type) Type)
(extern extractor valid_atomic_transaction valid_atomic_transaction)
Expand Down Expand Up @@ -2611,32 +2642,6 @@
(decl store_op (Type) StoreOP)
(extern constructor store_op store_op)

;; bool is "is_signed"
(decl int_load_op (bool u8) LoadOP)
(rule
(int_load_op $false 8)
(LoadOP.Lbu))

(rule
(int_load_op $true 8)
(LoadOP.Lb))

(rule
(int_load_op $false 16)
(LoadOP.Lhu))
(rule
(int_load_op $true 16)
(LoadOP.Lh))
(rule
(int_load_op $false 32)
(LoadOP.Lwu))
(rule
(int_load_op $true 32)
(LoadOP.Lw))

(rule
(int_load_op _ 64)
(LoadOP.Ld))

;;;; load extern name
(decl load_ext_name (ExternalName i64) Reg)
Expand Down
134 changes: 65 additions & 69 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1480,46 +1480,43 @@
(udf code))

;;;;; Rules for `uload8`;;;;;;;;;
(rule
(lower (uload8 flags p @ (value_type (ty_addr64 _)) offset))
(gen_load p offset (int_load_op $false 8) flags $I64))
(rule (lower (uload8 flags addr offset))
(gen_load (amode addr offset $I8) (LoadOP.Lbu) flags))

;;;;; Rules for `sload8`;;;;;;;;;
(rule
(lower (sload8 flags p @ (value_type (ty_addr64 _)) offset))
(gen_load p offset (int_load_op $true 8) flags $I64))
(rule (lower (sload8 flags addr offset))
(gen_load (amode addr offset $I8) (LoadOP.Lb) flags))

;;;;; Rules for `uload16`;;;;;;;;;
(rule
(lower (uload16 flags p @ (value_type (ty_addr64 _)) offset))
(gen_load p offset (int_load_op $false 16) flags $I64))
(rule (lower (uload16 flags addr offset))
(gen_load (amode addr offset $I16) (LoadOP.Lhu) flags))

;;;;; Rules for `iload16`;;;;;;;;;
(rule
(lower (sload16 flags p @ (value_type (ty_addr64 _)) offset))
(gen_load p offset (int_load_op $true 16) flags $I64))
(rule (lower (sload16 flags addr offset))
(gen_load (amode addr offset $I16) (LoadOP.Lh) flags))

;;;;; Rules for `uload32`;;;;;;;;;
(rule
(lower (uload32 flags p @ (value_type (ty_addr64 _)) offset))
(gen_load p offset (int_load_op $false 32) flags $I64))
(rule (lower (uload32 flags addr offset))
(gen_load (amode addr offset $I32) (LoadOP.Lwu) flags))

;;;;; Rules for `iload16`;;;;;;;;;
(rule
(lower (sload32 flags p @ (value_type (ty_addr64 _)) offset))
(gen_load p offset (int_load_op $true 32) flags $I64))
;;;;; Rules for `sload32`;;;;;;;;;
(rule (lower (sload32 flags addr offset))
(gen_load (amode addr offset $I32) (LoadOP.Lw) flags))

(rule
(lower (has_type ty (load flags p @ (value_type (ty_addr64 _)) offset)))
(gen_load p offset (load_op ty) flags ty)
)
;;;; for I128
(rule 1
(lower (has_type $I128 (load flags p @ (value_type (ty_addr64 _)) offset)))
(gen_load_128 p offset flags))
;;;;; Rules for `load`;;;;;;;;;
(rule (lower (has_type ty (load flags addr offset)))
(gen_load (amode addr offset ty) (load_op ty) flags))

(rule 2
(lower (has_type (ty_vec_fits_in_register ty) (load flags p @ (value_type (ty_addr64 _)) offset)))
(let ((eew VecElementWidth (element_width_from_type ty)))
(vec_load eew (VecAMode.UnitStride (gen_amode p offset $I64)) flags (unmasked) ty)))
(rule 1 (lower (has_type $I128 (load flags addr offset)))
(if-let offset_plus_8 (s32_add_fallible offset 8))
(let ((lo XReg (gen_load (amode addr offset $I64) (LoadOP.Ld) flags))
(hi XReg (gen_load (amode addr offset_plus_8 $I64) (LoadOP.Ld) flags)))
(value_regs lo hi)))

(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (load flags addr offset)))
(let ((eew VecElementWidth (element_width_from_type ty))
(amode AMode (amode addr offset ty)))
(vec_load eew (VecAMode.UnitStride amode) flags (unmasked) ty)))

;;;;; Rules for Load + Extend Combos ;;;;;;;;;

Expand All @@ -1528,72 +1525,72 @@
;; do a SEW/2 extension. This only reads half width elements from the source vector register
;; extends it, and writes the back the full register.

(decl gen_load64_extend (Type ExtendOp MemFlags XReg Offset32) VReg)
(decl gen_load64_extend (Type ExtendOp MemFlags AMode) VReg)

(rule (gen_load64_extend ty (ExtendOp.Signed) flags addr offset)
(rule (gen_load64_extend ty (ExtendOp.Signed) flags amode)
(let ((eew VecElementWidth (element_width_from_type $I64))
(load_state VState (vstate_from_type $I64))
(loaded VReg (vec_load eew (VecAMode.UnitStride (gen_amode addr offset $I64)) flags (unmasked) load_state)))
(loaded VReg (vec_load eew (VecAMode.UnitStride amode) flags (unmasked) load_state)))
(rv_vsext_vf2 loaded (unmasked) ty)))

(rule (gen_load64_extend ty (ExtendOp.Zero) flags addr offset)
(rule (gen_load64_extend ty (ExtendOp.Zero) flags amode)
(let ((eew VecElementWidth (element_width_from_type $I64))
(load_state VState (vstate_from_type $I64))
(loaded VReg (vec_load eew (VecAMode.UnitStride (gen_amode addr offset $I64)) flags (unmasked) load_state)))
(loaded VReg (vec_load eew (VecAMode.UnitStride amode) flags (unmasked) load_state)))
(rv_vzext_vf2 loaded (unmasked) ty)))

;;;;; Rules for `uload8x8`;;;;;;;;;;
(rule (lower (has_type (ty_vec_fits_in_register ty @ $I16X8) (uload8x8 flags addr @ (value_type (ty_addr64 _)) offset)))
(gen_load64_extend ty (ExtendOp.Zero) flags addr offset))
(rule (lower (has_type (ty_vec_fits_in_register ty @ $I16X8) (uload8x8 flags addr offset)))
(gen_load64_extend ty (ExtendOp.Zero) flags (amode addr offset ty)))

;;;;; Rules for `uload16x4`;;;;;;;;;
(rule (lower (has_type (ty_vec_fits_in_register ty @ $I32X4) (uload16x4 flags addr @ (value_type (ty_addr64 _)) offset)))
(gen_load64_extend ty (ExtendOp.Zero) flags addr offset))
(rule (lower (has_type (ty_vec_fits_in_register ty @ $I32X4) (uload16x4 flags addr offset)))
(gen_load64_extend ty (ExtendOp.Zero) flags (amode addr offset ty)))

;;;;; Rules for `uload32x2`;;;;;;;;;
(rule (lower (has_type (ty_vec_fits_in_register ty @ $I64X2) (uload32x2 flags addr @ (value_type (ty_addr64 _)) offset)))
(gen_load64_extend ty (ExtendOp.Zero) flags addr offset))
(rule (lower (has_type (ty_vec_fits_in_register ty @ $I64X2) (uload32x2 flags addr offset)))
(gen_load64_extend ty (ExtendOp.Zero) flags (amode addr offset ty)))

;;;;; Rules for `sload8x8`;;;;;;;;;;
(rule (lower (has_type (ty_vec_fits_in_register ty @ $I16X8) (sload8x8 flags addr @ (value_type (ty_addr64 _)) offset)))
(gen_load64_extend ty (ExtendOp.Signed) flags addr offset))
(rule (lower (has_type (ty_vec_fits_in_register ty @ $I16X8) (sload8x8 flags addr offset)))
(gen_load64_extend ty (ExtendOp.Signed) flags (amode addr offset ty)))

;;;;; Rules for `sload16x4`;;;;;;;;;
(rule (lower (has_type (ty_vec_fits_in_register ty @ $I32X4) (sload16x4 flags addr @ (value_type (ty_addr64 _)) offset)))
(gen_load64_extend ty (ExtendOp.Signed) flags addr offset))
(rule (lower (has_type (ty_vec_fits_in_register ty @ $I32X4) (sload16x4 flags addr offset)))
(gen_load64_extend ty (ExtendOp.Signed) flags (amode addr offset ty)))

;;;;; Rules for `sload32x2`;;;;;;;;;
(rule (lower (has_type (ty_vec_fits_in_register ty @ $I64X2) (sload32x2 flags addr @ (value_type (ty_addr64 _)) offset)))
(gen_load64_extend ty (ExtendOp.Signed) flags addr offset))
(rule (lower (has_type (ty_vec_fits_in_register ty @ $I64X2) (sload32x2 flags addr offset)))
(gen_load64_extend ty (ExtendOp.Signed) flags (amode addr offset ty)))

;;;;; Rules for `istore8`;;;;;;;;;
(rule
(lower (istore8 flags x p @ (value_type (ty_addr64 _)) offset))
(gen_store p offset (StoreOP.Sb) flags x))
(rule (lower (istore8 flags src addr offset))
(gen_store (amode addr offset $I8) (StoreOP.Sb) flags src))

;;;;; Rules for `istore16`;;;;;;;;;
(rule
(lower (istore16 flags x p @ (value_type (ty_addr64 _)) offset))
(gen_store p offset (StoreOP.Sh) flags x))
(rule (lower (istore16 flags src addr offset))
(gen_store (amode addr offset $I16) (StoreOP.Sh) flags src))

;;;;; Rules for `istore32`;;;;;;;;;
(rule
(lower (istore32 flags x p @ (value_type (ty_addr64 _)) offset))
(gen_store p offset (StoreOP.Sw) flags x))
(rule (lower (istore32 flags src addr offset))
(gen_store (amode addr offset $I32) (StoreOP.Sw) flags src))

;;;;; Rules for `store`;;;;;;;;;
(rule
(lower (store flags x @ (value_type ty) p @ (value_type (ty_addr64 _)) offset))
(gen_store p offset (store_op ty) flags x))
(rule (lower (store flags src @ (value_type ty) addr offset))
(gen_store (amode addr offset ty) (store_op ty) flags src))

;;; special for I128
(rule 1
(lower (store flags x @ (value_type $I128 ) p @ (value_type (ty_addr64 _)) offset))
(gen_store_128 p offset flags x))
(rule 1 (lower (store flags src @ (value_type $I128) addr offset))
(if-let offset_plus_8 (s32_add_fallible offset 8))
(let ((_ InstOutput (gen_store (amode addr offset $I64) (StoreOP.Sd) flags (value_regs_get src 0))))
(gen_store (amode addr offset_plus_8 $I64) (StoreOP.Sd) flags (value_regs_get src 1))))

(rule 2 (lower (store flags src @ (value_type (ty_vec_fits_in_register ty)) addr offset))
(let ((eew VecElementWidth (element_width_from_type ty))
(amode AMode (amode addr offset ty)))
(vec_store eew (VecAMode.UnitStride amode) src flags (unmasked) ty)))

(rule 2
(lower (store flags x @ (value_type (ty_vec_fits_in_register ty)) p @ (value_type (ty_addr64 _)) offset))
(let ((eew VecElementWidth (element_width_from_type ty)))
(vec_store eew (VecAMode.UnitStride (gen_amode p offset $I64)) x flags (unmasked) ty)))

;;;;; Rules for `icmp`;;;;;;;;;

(decl gen_icmp (IntCC ValueRegs ValueRegs Type) XReg)
(rule
Expand All @@ -1603,7 +1600,6 @@
(_ Unit (emit (MInst.Icmp cc result x y ty))))
result))

;;;;; Rules for `icmp`;;;;;;;;;
(rule 0 (lower (icmp cc x @ (value_type (ty_int ty)) y))
(lower_icmp cc x y ty))

Expand Down
Loading

0 comments on commit 38bc7e9

Please sign in to comment.