Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

riscv64: Support non 128bit vector sizes #6266

Merged
merged 5 commits into from
Apr 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cranelift/codegen/meta/src/cdsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ macro_rules! preset {
() => {
vec![]
};
($($x:ident)&&*) => {
($($x:tt)&&*) => {
{
let mut v = Vec::new();
$(
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/meta/src/gen_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ fn emit_types(fmt: &mut srcgen::Formatter) {

// Emit vector definitions for common SIMD sizes.
// Emit dynamic vector definitions.
for vec_size in &[64_u64, 128, 256, 512] {
for vec_size in &[16_u64, 32, 64, 128, 256, 512] {
emit_vectors(*vec_size, fmt);
emit_dynamic_vectors(*vec_size, fmt);
}
Expand Down
43 changes: 43 additions & 0 deletions cranelift/codegen/meta/src/isa/riscv64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,30 @@ use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder};

use crate::shared::Definitions as SharedDefinitions;

macro_rules! define_zvl_ext {
(DEF: $settings:expr, $size:expr) => {{
let name = concat!("has_zvl", $size, "b");
let desc = concat!("has extension Zvl", $size, "b?");
let comment = concat!(
"Zvl",
$size,
"b: Vector register has a minimum of ",
$size,
" bits"
);
$settings.add_bool(&name, &desc, &comment, false)
}};
($settings:expr, $size:expr $(, $implies:expr)*) => {{
let has_feature = define_zvl_ext!(DEF: $settings, $size);

let name = concat!("zvl", $size, "b");
let desc = concat!("Has a vector register size of at least ", $size, " bits");

let preset = $settings.add_preset(&name, &desc, preset!(has_feature $( && $implies )*));
(has_feature, preset)
}};
}

fn define_settings(_shared: &SettingGroup) -> SettingGroup {
let mut setting = SettingGroupBuilder::new("riscv64");

Expand Down Expand Up @@ -56,6 +80,25 @@ fn define_settings(_shared: &SettingGroup) -> SettingGroup {
false,
);

// Zvl*: Minimum Vector Length Standard Extensions
// These extension specifiy the minimum number of bits in a vector register.
// Since it is a minimum, Zvl64b implies Zvl32b, Zvl128b implies Zvl64b, etc.
// The V extension supports a maximum of 64K bits in a single register.
//
// See: https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#181-zvl-minimum-vector-length-standard-extensions
let (_, zvl32b) = define_zvl_ext!(setting, 32);
let (_, zvl64b) = define_zvl_ext!(setting, 64, zvl32b);
let (_, zvl128b) = define_zvl_ext!(setting, 128, zvl64b);
let (_, zvl256b) = define_zvl_ext!(setting, 256, zvl128b);
let (_, zvl512b) = define_zvl_ext!(setting, 512, zvl256b);
let (_, zvl1024b) = define_zvl_ext!(setting, 1024, zvl512b);
let (_, zvl2048b) = define_zvl_ext!(setting, 2048, zvl1024b);
let (_, zvl4096b) = define_zvl_ext!(setting, 4096, zvl2048b);
let (_, zvl8192b) = define_zvl_ext!(setting, 8192, zvl4096b);
let (_, zvl16384b) = define_zvl_ext!(setting, 16384, zvl8192b);
let (_, zvl32768b) = define_zvl_ext!(setting, 32768, zvl16384b);
let (_, _zvl65536b) = define_zvl_ext!(setting, 65536, zvl32768b);

setting.build()
}

Expand Down
20 changes: 19 additions & 1 deletion cranelift/codegen/src/isa/riscv64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,25 @@ impl MachInst for Inst {
F32 => Ok((&[RegClass::Float], &[F32])),
F64 => Ok((&[RegClass::Float], &[F64])),
I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
_ if ty.is_vector() && ty.bits() == 128 => Ok((&[RegClass::Float], &[types::I8X16])),
_ if ty.is_vector() => {
debug_assert!(ty.bits() <= 512);

// Here we only need to return a SIMD type with the same size as `ty`.
// We use these types for spills and reloads, so prefer types with lanes <= 31
// since that fits in the immediate field of `vsetivli`.
const SIMD_TYPES: [[Type; 1]; 6] = [
[types::I8X2],
[types::I8X4],
[types::I8X8],
[types::I8X16],
[types::I16X16],
[types::I32X16],
];
let idx = (ty.bytes().ilog2() - 1) as usize;
let ty = &SIMD_TYPES[idx][..];

Ok((&[RegClass::Float], ty))
}
_ => Err(CodegenError::Unsupported(format!(
"Unexpected SSA-value type: {}",
ty
Expand Down
8 changes: 8 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst_vector.isle
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,14 @@
(if-let $I64 (lane_type ty))
(VecElementWidth.E64))

(decl pure min_vec_reg_size () u64)
(extern constructor min_vec_reg_size min_vec_reg_size)

;; An extractor that matches any type that is known to fit in a single vector
;; register.
(decl ty_vec_fits_in_register (Type) Type)
(extern extractor ty_vec_fits_in_register ty_vec_fits_in_register)

;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; As noted in the RISC-V Vector Extension Specification, rs2 is the first
Expand Down
15 changes: 6 additions & 9 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@
;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Base case, simply adding things in registers.
(rule 0 (lower (has_type (fits_in_64 ty) (iadd x y)))
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x y)))
(rv_add x y))

;; Special cases for when one operand is an immediate that fits in 12 bits.
(rule 1 (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_value y))))
(rule 1 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x (imm12_from_value y))))
(alu_rr_imm12 (select_addi ty) x y))

(rule 2 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y)))
(rule 2 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd (imm12_from_value x) y)))
(alu_rr_imm12 (select_addi ty) y x))

;; Special case when one of the operands is uextended
Expand Down Expand Up @@ -98,8 +98,7 @@
(value_regs low high)))

;; SIMD Vectors
(rule 8 (lower (has_type (ty_vec128_int ty) (iadd x y)))
(if-let $true (has_v))
(rule 8 (lower (has_type (ty_vec_fits_in_register ty) (iadd x y)))
(rv_vadd_vv x y ty))

;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;;
Expand Down Expand Up @@ -815,8 +814,7 @@
(gen_load_128 p offset flags))

(rule 2
(lower (has_type (ty_vec128_int ty) (load flags p @ (value_type (ty_addr64 _)) offset)))
(if-let $true (has_v))
(lower (has_type (ty_vec_fits_in_register ty) (load flags p @ (value_type (ty_addr64 _)) offset)))
(let ((eew VecElementWidth (element_width_from_type ty)))
(vec_load eew (VecAMode.UnitStride (gen_amode p offset $I64)) flags ty)))

Expand Down Expand Up @@ -845,8 +843,7 @@
(gen_store_128 p offset flags x))

(rule 2
(lower (store flags x @ (value_type (ty_vec128_int ty)) p @ (value_type (ty_addr64 _)) offset))
(if-let $true (has_v))
(lower (store flags x @ (value_type (ty_vec_fits_in_register ty)) p @ (value_type (ty_addr64 _)) offset))
(let ((eew VecElementWidth (element_width_from_type ty)))
(vec_store eew (VecAMode.UnitStride (gen_amode p offset $I64)) x flags ty)))

Expand Down
96 changes: 78 additions & 18 deletions cranelift/codegen/src/isa/riscv64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,67 @@ type VecMachLabel = Vec<MachLabel>;
type VecArgPair = Vec<ArgPair>;
use crate::machinst::valueregs;

/// The main entry point for lowering with ISLE.
pub(crate) fn lower(
lower_ctx: &mut Lower<MInst>,
backend: &Riscv64Backend,
inst: Inst,
) -> Option<InstOutput> {
// TODO: reuse the ISLE context across lowerings so we can reuse its
// internal heap allocations.
let mut isle_ctx = IsleContext { lower_ctx, backend };
generated_code::constructor_lower(&mut isle_ctx, inst)
pub(crate) struct RV64IsleContext<'a, 'b, I, B>
where
I: VCodeInst,
B: LowerBackend,
{
pub lower_ctx: &'a mut Lower<'b, I>,
pub backend: &'a B,
/// Precalucated value for the minimum vector register size. Will be 0 if
/// vectors are not supported.
min_vec_reg_size: u64,
}

impl IsleContext<'_, '_, MInst, Riscv64Backend> {
impl<'a, 'b> RV64IsleContext<'a, 'b, MInst, Riscv64Backend> {
isle_prelude_method_helpers!(Riscv64ABICaller);

fn new(lower_ctx: &'a mut Lower<'b, MInst>, backend: &'a Riscv64Backend) -> Self {
Self {
lower_ctx,
backend,
min_vec_reg_size: Self::compute_min_vec_reg_size(backend),
}
}

fn compute_min_vec_reg_size(backend: &Riscv64Backend) -> u64 {
let flags = &backend.isa_flags;
let entries = [
(flags.has_zvl65536b(), 65536),
(flags.has_zvl32768b(), 32768),
(flags.has_zvl16384b(), 16384),
(flags.has_zvl8192b(), 8192),
(flags.has_zvl4096b(), 4096),
(flags.has_zvl2048b(), 2048),
(flags.has_zvl1024b(), 1024),
(flags.has_zvl512b(), 512),
(flags.has_zvl256b(), 256),
// In order to claim the Application Profile V extension, a minimum
// register size of 128 is required. i.e. V implies Zvl128b.
(flags.has_v(), 128),
(flags.has_zvl128b(), 128),
(flags.has_zvl64b(), 64),
(flags.has_zvl32b(), 32),
];

for (has_flag, size) in entries.into_iter() {
if has_flag {
return size;
}
}

return 0;
}

#[inline]
fn emit_list(&mut self, list: &SmallInstVec<MInst>) {
for i in list {
self.lower_ctx.emit(i.clone());
}
}
}

impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> {
isle_lower_prelude_methods!();
isle_prelude_caller_methods!(Riscv64MachineDeps, Riscv64ABICaller);

Expand Down Expand Up @@ -437,17 +481,33 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
fn vstate_from_type(&mut self, ty: Type) -> VState {
VState::from_type(ty)
}
}

impl IsleContext<'_, '_, MInst, Riscv64Backend> {
fn min_vec_reg_size(&mut self) -> u64 {
self.min_vec_reg_size
}

#[inline]
fn emit_list(&mut self, list: &SmallInstVec<MInst>) {
for i in list {
self.lower_ctx.emit(i.clone());
fn ty_vec_fits_in_register(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && (ty.bits() as u64) <= self.min_vec_reg_size() {
Some(ty)
} else {
None
}
}
}

/// The main entry point for lowering with ISLE.
pub(crate) fn lower(
lower_ctx: &mut Lower<MInst>,
backend: &Riscv64Backend,
inst: Inst,
) -> Option<InstOutput> {
// TODO: reuse the ISLE context across lowerings so we can reuse its
// internal heap allocations.
let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend);
generated_code::constructor_lower(&mut isle_ctx, inst)
}

/// The main entry point for branch lowering with ISLE.
pub(crate) fn lower_branch(
lower_ctx: &mut Lower<MInst>,
Expand All @@ -457,7 +517,7 @@ pub(crate) fn lower_branch(
) -> Option<()> {
// TODO: reuse the ISLE context across lowerings so we can reuse its
// internal heap allocations.
let mut isle_ctx = IsleContext { lower_ctx, backend };
let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend);
generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets.to_vec())
}

Expand Down
5 changes: 5 additions & 0 deletions cranelift/codegen/src/isle_prelude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,11 @@ macro_rules! isle_common_prelude_methods {
}
}

#[inline]
fn ty_int_ref_scalar_64_extract(&mut self, ty: Type) -> Option<Type> {
self.ty_int_ref_scalar_64(ty)
}

#[inline]
fn ty_32(&mut self, ty: Type) -> Option<Type> {
if ty.bits() == 32 {
Expand Down
5 changes: 3 additions & 2 deletions cranelift/codegen/src/prelude.isle
Original file line number Diff line number Diff line change
Expand Up @@ -340,10 +340,11 @@
(decl ty_64 (Type) Type)
(extern extractor ty_64 ty_64)

;; A pure constructor that only matches scalar integers, and references that can
;; fit in 64 bits.
;; A pure constructor/extractor that only matches scalar integers, and
;; references that can fit in 64 bits.
(decl pure partial ty_int_ref_scalar_64 (Type) Type)
(extern constructor ty_int_ref_scalar_64 ty_int_ref_scalar_64)
(extern extractor ty_int_ref_scalar_64 ty_int_ref_scalar_64_extract)

;; An extractor that matches 32- and 64-bit types only.
(decl ty_32_or_64 (Type) Type)
Expand Down
40 changes: 40 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/simd-iadd-big.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v has_zvl2048b



function %iadd_i64x4(i64x4, i64x4) -> i64x4 {
block0(v0:i64x4, v1:i64x4):
v2 = iadd v0, v1
return v2
}

; VCode:
; block0:
; vadd.vv v10,v11,v10 #avl=4, #vtype=(e64, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x82, 0xcd
; .byte 0x57, 0x05, 0xb5, 0x02
; ret

function %iadd_i64x8(i64x8, i64x8) -> i64x8 {
block0(v0:i64x8, v1:i64x8):
v2 = iadd v0, v1
return v2
}

; VCode:
; block0:
; vadd.vv v10,v11,v10 #avl=8, #vtype=(e64, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x84, 0xcd
; .byte 0x57, 0x05, 0xb5, 0x02
; ret

Loading