diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 15d2a1c6c108..5cbedb9a82de 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -73,6 +73,7 @@ x86 = [] arm64 = [] s390x = [] riscv64 = [] +zkasm = [] # Enable the ISA target for the host machine host-arch = [] @@ -81,7 +82,8 @@ all-arch = [ "x86", "arm64", "s390x", - "riscv64" + "riscv64", + "zkasm" ] # For dependent crates that want to serialize some parts of cranelift diff --git a/cranelift/codegen/build.rs b/cranelift/codegen/build.rs index 211b62177dd2..a634bd5209b2 100644 --- a/cranelift/codegen/build.rs +++ b/cranelift/codegen/build.rs @@ -33,7 +33,7 @@ fn main() { .cloned() .filter(|isa| { let env_key = format!("CARGO_FEATURE_{}", isa.to_string().to_uppercase()); - env::var(env_key).is_ok() + dbg!(env::var(dbg!(env_key)).is_ok()) }) .collect::>(); @@ -200,6 +200,8 @@ fn get_isle_compilations( let src_isa_risc_v = make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("isa").join("riscv64")); + let src_isa_zkasm = + make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("isa").join("zkasm")); // This is a set of ISLE compilation units. // // The format of each entry is: @@ -280,6 +282,17 @@ fn get_isle_compilations( ], untracked_inputs: vec![clif_lower_isle.clone()], }, + IsleCompilation { + output: out_dir.join("isle_zkasm.rs"), + inputs: vec![ + prelude_isle.clone(), + prelude_lower_isle.clone(), + src_isa_zkasm.join("inst.isle"), + src_isa_zkasm.join("inst_vector.isle"), + src_isa_zkasm.join("lower.isle"), + ], + untracked_inputs: vec![clif_lower_isle.clone()], + }, ], }) } diff --git a/cranelift/codegen/meta/src/isa/mod.rs b/cranelift/codegen/meta/src/isa/mod.rs index ecda9b83d054..37906d557352 100644 --- a/cranelift/codegen/meta/src/isa/mod.rs +++ b/cranelift/codegen/meta/src/isa/mod.rs @@ -6,6 +6,7 @@ mod arm64; mod riscv64; mod s390x; pub(crate) mod x86; +mod zkasm; /// Represents known ISA target. #[derive(PartialEq, Copy, Clone)] @@ -14,6 +15,7 @@ pub enum Isa { Arm64, S390x, Riscv64, + ZkAsm, } impl Isa { @@ -29,6 +31,7 @@ impl Isa { pub fn from_arch(arch: &str) -> Option { match arch { "aarch64" => Some(Isa::Arm64), + "sparc" | "zkasm" => Some(Isa::ZkAsm), "s390x" => Some(Isa::S390x), x if ["x86_64", "i386", "i586", "i686"].contains(&x) => Some(Isa::X86), "riscv64" | "riscv64gc" | "riscv64imac" => Some(Isa::Riscv64), @@ -38,7 +41,7 @@ impl Isa { /// Returns all supported isa targets. pub fn all() -> &'static [Isa] { - &[Isa::X86, Isa::Arm64, Isa::S390x, Isa::Riscv64] + &[Isa::X86, Isa::Arm64, Isa::S390x, Isa::Riscv64, Isa::ZkAsm] } } @@ -50,6 +53,7 @@ impl fmt::Display for Isa { Isa::Arm64 => write!(f, "arm64"), Isa::S390x => write!(f, "s390x"), Isa::Riscv64 => write!(f, "riscv64"), + Isa::ZkAsm => write!(f, "zkasm"), } } } @@ -61,6 +65,7 @@ pub(crate) fn define(isas: &[Isa]) -> Vec { Isa::Arm64 => arm64::define(), Isa::S390x => s390x::define(), Isa::Riscv64 => riscv64::define(), + Isa::ZkAsm => zkasm::define(), }) .collect() } diff --git a/cranelift/codegen/meta/src/isa/zkasm.rs b/cranelift/codegen/meta/src/isa/zkasm.rs new file mode 100644 index 000000000000..22ab97407540 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/zkasm.rs @@ -0,0 +1,101 @@ +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::settings::SettingGroupBuilder; + +macro_rules! define_zvl_ext { + (DEF: $settings:expr, $size:expr) => {{ + let name = concat!("has_zvl", $size, "b"); + let desc = concat!("has extension Zvl", $size, "b?"); + let comment = concat!( + "Zvl", + $size, + "b: Vector register has a minimum of ", + $size, + " bits" + ); + $settings.add_bool(&name, &desc, &comment, false) + }}; + ($settings:expr, $size:expr $(, $implies:expr)*) => {{ + let has_feature = define_zvl_ext!(DEF: $settings, $size); + + let name = concat!("zvl", $size, "b"); + let desc = concat!("Has a vector register size of at least ", $size, " bits"); + + let preset = $settings.add_preset(&name, &desc, preset!(has_feature $( && $implies )*)); + (has_feature, preset) + }}; +} + +pub(crate) fn define() -> TargetIsa { + let mut setting = SettingGroupBuilder::new("zkasm"); + + let _has_m = setting.add_bool("has_m", "has extension M?", "", false); + let _has_a = setting.add_bool("has_a", "has extension A?", "", false); + let _has_f = setting.add_bool("has_f", "has extension F?", "", false); + let _has_d = setting.add_bool("has_d", "has extension D?", "", false); + let _has_v = setting.add_bool("has_v", "has extension V?", "", false); + let _has_c = setting.add_bool("has_c", "has extension C?", "", false); + let _has_zbkb = setting.add_bool( + "has_zbkb", + "has extension zbkb?", + "Zbkb: Bit-manipulation for Cryptography", + false, + ); + let _has_zba = setting.add_bool( + "has_zba", + "has extension zba?", + "Zba: Address Generation", + false, + ); + let _has_zbb = setting.add_bool( + "has_zbb", + "has extension zbb?", + "Zbb: Basic bit-manipulation", + false, + ); + let _has_zbc = setting.add_bool( + "has_zbc", + "has extension zbc?", + "Zbc: Carry-less multiplication", + false, + ); + let _has_zbs = setting.add_bool( + "has_zbs", + "has extension zbs?", + "Zbs: Single-bit instructions", + false, + ); + + let _has_zicsr = setting.add_bool( + "has_zicsr", + "has extension zicsr?", + "Zicsr: Control and Status Register (CSR) Instructions", + false, + ); + let _has_zifencei = setting.add_bool( + "has_zifencei", + "has extension zifencei?", + "Zifencei: Instruction-Fetch Fence", + false, + ); + + // Zvl*: Minimum Vector Length Standard Extensions + // These extension specifiy the minimum number of bits in a vector register. + // Since it is a minimum, Zvl64b implies Zvl32b, Zvl128b implies Zvl64b, etc. + // The V extension supports a maximum of 64K bits in a single register. + // + // See: https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#181-zvl-minimum-vector-length-standard-extensions + let (_, zvl32b) = define_zvl_ext!(setting, 32); + let (_, zvl64b) = define_zvl_ext!(setting, 64, zvl32b); + let (_, zvl128b) = define_zvl_ext!(setting, 128, zvl64b); + let (_, zvl256b) = define_zvl_ext!(setting, 256, zvl128b); + let (_, zvl512b) = define_zvl_ext!(setting, 512, zvl256b); + let (_, zvl1024b) = define_zvl_ext!(setting, 1024, zvl512b); + let (_, zvl2048b) = define_zvl_ext!(setting, 2048, zvl1024b); + let (_, zvl4096b) = define_zvl_ext!(setting, 4096, zvl2048b); + let (_, zvl8192b) = define_zvl_ext!(setting, 8192, zvl4096b); + let (_, zvl16384b) = define_zvl_ext!(setting, 16384, zvl8192b); + let (_, zvl32768b) = define_zvl_ext!(setting, 32768, zvl16384b); + let (_, _zvl65536b) = define_zvl_ext!(setting, 65536, zvl32768b); + + TargetIsa::new("zkasm", setting.build()) +} diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 006a6b807d3d..92377395626a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -1158,6 +1158,7 @@ impl MachInst for Inst { } fn gen_block_start( + _block_index: usize, is_indirect_branch_target: bool, is_forward_edge_cfi_enabled: bool, ) -> Option { diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index 860fcbd24e14..5b2fffc57101 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -74,6 +74,9 @@ pub mod riscv64; #[cfg(feature = "s390x")] mod s390x; +#[cfg(feature = "zkasm")] +pub mod zkasm; + pub mod unwind; mod call_conv; @@ -103,6 +106,7 @@ pub fn lookup(triple: Triple) -> Result { Architecture::Aarch64 { .. } => isa_builder!(aarch64, (feature = "arm64"), triple), Architecture::S390x { .. } => isa_builder!(s390x, (feature = "s390x"), triple), Architecture::Riscv64 { .. } => isa_builder!(riscv64, (feature = "riscv64"), triple), + Architecture::Sparc { .. } => isa_builder!(zkasm, (feature = "zkasm"), triple), _ => Err(LookupError::Unsupported), } } @@ -110,7 +114,7 @@ pub fn lookup(triple: Triple) -> Result { /// The string names of all the supported, but possibly not enabled, architectures. The elements of /// this slice are suitable to be passed to the [lookup_by_name] function to obtain the default /// configuration for that architecture. -pub const ALL_ARCHITECTURES: &[&str] = &["x86_64", "aarch64", "s390x", "riscv64"]; +pub const ALL_ARCHITECTURES: &[&str] = &["x86_64", "aarch64", "s390x", "riscv64", "sparc"]; /// Look for a supported ISA with the given `name`. /// Return a builder that can create a corresponding `TargetIsa`. diff --git a/cranelift/codegen/src/isa/zkasm/abi.rs b/cranelift/codegen/src/isa/zkasm/abi.rs new file mode 100644 index 000000000000..d88031c0b117 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/abi.rs @@ -0,0 +1,961 @@ +//! Implementation of a standard Riscv64 ABI. + +use crate::ir; +use crate::ir::types::*; + +use crate::ir::ExternalName; +use crate::ir::MemFlags; +use crate::isa; + +use crate::isa::zkasm::{inst::EmitState, inst::*}; +use crate::isa::CallConv; +use crate::machinst::*; + +use crate::ir::types::I8; +use crate::ir::LibCall; +use crate::ir::Signature; +use crate::isa::zkasm::settings::Flags as RiscvFlags; +use crate::settings; +use crate::CodegenError; +use crate::CodegenResult; +use alloc::boxed::Box; +use alloc::vec::Vec; +use regalloc2::PRegSet; +use regs::x_reg; + +use smallvec::{smallvec, SmallVec}; + +/// Support for the Riscv64 ABI from the callee side (within a function body). +pub(crate) type Riscv64Callee = Callee; + +/// Support for the Riscv64 ABI from the caller side (at a callsite). +pub(crate) type Riscv64ABICallSite = CallSite; + +/// This is the limit for the size of argument and return-value areas on the +/// stack. We place a reasonable limit here to avoid integer overflow issues +/// with 32-bit arithmetic: for now, 128 MB. +static STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024; + +/// Riscv64-specific ABI behavior. This struct just serves as an implementation +/// point for the trait; it is never actually instantiated. +pub struct Riscv64MachineDeps; + +impl IsaFlags for RiscvFlags {} + +impl RiscvFlags { + pub(crate) fn min_vec_reg_size(&self) -> u64 { + let entries = [ + (self.has_zvl65536b(), 65536), + (self.has_zvl32768b(), 32768), + (self.has_zvl16384b(), 16384), + (self.has_zvl8192b(), 8192), + (self.has_zvl4096b(), 4096), + (self.has_zvl2048b(), 2048), + (self.has_zvl1024b(), 1024), + (self.has_zvl512b(), 512), + (self.has_zvl256b(), 256), + // In order to claim the Application Profile V extension, a minimum + // register size of 128 is required. i.e. V implies Zvl128b. + (self.has_v(), 128), + (self.has_zvl128b(), 128), + (self.has_zvl64b(), 64), + (self.has_zvl32b(), 32), + ]; + + for (has_flag, size) in entries.into_iter() { + if !has_flag { + continue; + } + + // Due to a limitation in regalloc2, we can't support types + // larger than 1024 bytes. So limit that here. + return std::cmp::min(size, 1024); + } + + return 0; + } +} + +impl ABIMachineSpec for Riscv64MachineDeps { + type I = Inst; + type F = RiscvFlags; + + fn word_bits() -> u32 { + 64 + } + + /// Return required stack alignment in bytes. + fn stack_align(_call_conv: isa::CallConv) -> u32 { + 1 + } + + fn compute_arg_locs<'a, I>( + call_conv: isa::CallConv, + _flags: &settings::Flags, + params: I, + args_or_rets: ArgsOrRets, + add_ret_area_ptr: bool, + mut args: ArgsAccumulator<'_>, + ) -> CodegenResult<(u32, Option)> + where + I: IntoIterator, + { + // All registers that can be used as parameters or rets. + // both start and end are included. + let (x_start, x_end, f_start, f_end) = match (call_conv, args_or_rets) { + (isa::CallConv::Tail, _) => (10, 11, 0, 0), + (_, ArgsOrRets::Args) => (10, 11, 0, 0), + (_, ArgsOrRets::Rets) => (10, 11, 0, 0), + }; + let mut next_x_reg = x_start; + let mut next_f_reg = f_start; + // Stack space. + let mut next_stack: u32 = 0; + + for param in params { + if let ir::ArgumentPurpose::StructArgument(size) = param.purpose { + let offset = next_stack; + assert!(size % 8 == 0, "StructArgument size is not properly aligned"); + next_stack += size; + args.push(ABIArg::StructArg { + pointer: None, + offset: offset as i64, + size: size as u64, + purpose: param.purpose, + }); + continue; + } + + // For now we pin VMContext register to `CTX` register of ZK ASM. + if let ir::ArgumentPurpose::VMContext = param.purpose { + let mut slots = ABIArgSlotVec::new(); + slots.push(ABIArgSlot::Reg { + reg: context_reg().to_real_reg().unwrap(), + ty: I32, + extension: param.extension, + }); + args.push(ABIArg::Slots { + slots, + purpose: param.purpose, + }); + continue; + } + + // Find regclass(es) of the register(s) used to store a value of this type. + let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?; + let mut slots = ABIArgSlotVec::new(); + for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) { + let next_reg = if (next_x_reg <= x_end) && *rc == RegClass::Int { + let x = Some(x_reg(next_x_reg)); + next_x_reg += 1; + x + } else if (next_f_reg <= f_end) && *rc == RegClass::Float { + let x = Some(f_reg(next_f_reg)); + next_f_reg += 1; + x + } else { + None + }; + if let Some(reg) = next_reg { + slots.push(ABIArgSlot::Reg { + reg: reg.to_real_reg().unwrap(), + ty: *reg_ty, + extension: param.extension, + }); + } else { + // Compute size and 16-byte stack alignment happens + // separately after all args. + let size = reg_ty.bits() / 8; + let size = std::cmp::max(size, 8); + // Align. + debug_assert!(size.is_power_of_two()); + next_stack = align_to(next_stack, size); + slots.push(ABIArgSlot::Stack { + offset: next_stack as i64, + ty: *reg_ty, + extension: param.extension, + }); + next_stack += size; + } + } + args.push(ABIArg::Slots { + slots, + purpose: param.purpose, + }); + } + let pos: Option = if add_ret_area_ptr { + assert!(ArgsOrRets::Args == args_or_rets); + if next_x_reg <= x_end { + let arg = ABIArg::reg( + x_reg(next_x_reg).to_real_reg().unwrap(), + I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + ); + args.push(arg); + } else { + let arg = ABIArg::stack( + next_stack as i64, + I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + ); + args.push(arg); + next_stack += 8; + } + Some(args.args().len() - 1) + } else { + None + }; + + next_stack = align_to(next_stack, Self::stack_align(call_conv)); + + // To avoid overflow issues, limit the arg/return size to something + // reasonable -- here, 128 MB. + if next_stack > STACK_ARG_RET_SIZE_LIMIT { + return Err(CodegenError::ImplLimitExceeded); + } + + Ok((next_stack, pos)) + } + + fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 { + // lr fp. + 16 + } + + fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Inst { + Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted()) + } + + fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst { + Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted()) + } + + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { + Inst::gen_move(to_reg, from_reg, ty) + } + + fn gen_extend( + to_reg: Writable, + from_reg: Reg, + signed: bool, + from_bits: u8, + to_bits: u8, + ) -> Inst { + assert!(from_bits < to_bits); + Inst::Extend { + rd: to_reg, + rn: from_reg, + signed, + from_bits, + to_bits, + } + } + + fn get_ext_mode( + _call_conv: isa::CallConv, + specified: ir::ArgumentExtension, + ) -> ir::ArgumentExtension { + specified + } + + fn gen_args(_isa_flags: &crate::isa::zkasm::settings::Flags, args: Vec) -> Inst { + Inst::Args { args } + } + + fn gen_ret( + _setup_frame: bool, + _isa_flags: &Self::F, + _call_conv: isa::CallConv, + rets: Vec, + stack_bytes_to_pop: u32, + ) -> Inst { + Inst::Ret { + rets, + stack_bytes_to_pop, + } + } + + fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg { + spilltmp_reg() + } + + fn gen_add_imm( + _call_conv: isa::CallConv, + into_reg: Writable, + from_reg: Reg, + imm: u32, + ) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + if let Some(imm12) = Imm12::maybe_from_u64(imm as u64) { + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: into_reg, + rs: from_reg, + imm12, + }); + } else { + insts.extend(Inst::load_constant_u32( + writable_spilltmp_reg2(), + imm as u64, + &mut |_| writable_spilltmp_reg2(), + )); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: into_reg, + rs1: spilltmp_reg2(), + rs2: from_reg, + }); + } + insts + } + + fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec { + let mut insts = SmallVec::new(); + insts.push(Inst::TrapIfC { + cc: IntCC::UnsignedLessThan, + rs1: stack_reg(), + rs2: limit_reg, + trap_code: ir::TrapCode::StackOverflow, + }); + insts + } + + fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable, _ty: Type) -> Inst { + Inst::LoadAddr { + rd: into_reg, + mem: mem.into(), + } + } + + fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Inst { + let mem = AMode::RegOffset(base, offset as i64, ty); + Inst::gen_load(into_reg, mem, ty, MemFlags::trusted()) + } + + fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst { + let mem = AMode::RegOffset(base, offset as i64, ty); + Inst::gen_store(mem, from_reg, ty, MemFlags::trusted()) + } + + fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec { + let mut insts = SmallVec::new(); + if amount == 0 { + return insts; + } + insts.push(Inst::AdjustSp { + amount: amount as i64, + }); + insts + } + + fn gen_nominal_sp_adj(offset: i32) -> Inst { + Inst::VirtualSPOffsetAdj { + amount: offset as i64, + } + } + + fn gen_prologue_frame_setup(_flags: &settings::Flags) -> SmallInstVec { + // add sp,sp,-16 ;; alloc stack space for fp. + // sd ra,8(sp) ;; save ra. + // sd fp,0(sp) ;; store old fp. + // mv fp,sp ;; set fp to sp. + let mut insts = SmallVec::new(); + insts.push(Inst::AdjustSp { amount: -1 }); + insts.push(Self::gen_store_stack( + StackAMode::SPOffset(0, I64), + link_reg(), + I64, + )); + // insts.push(Self::gen_store_stack( + // StackAMode::SPOffset(0, I64), + // fp_reg(), + // I64, + // )); + // insts.push(Inst::Mov { + // rd: writable_fp_reg(), + // rm: stack_reg(), + // ty: I64, + // }); + insts + } + /// reverse of gen_prologue_frame_setup. + fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec { + let mut insts = SmallVec::new(); + insts.push(Self::gen_load_stack( + StackAMode::SPOffset(0, I64), + writable_link_reg(), + I64, + )); + // insts.push(Self::gen_load_stack( + // StackAMode::SPOffset(0, I64), + // writable_fp_reg(), + // I64, + // )); + insts.push(Inst::AdjustSp { amount: 1 }); + insts + } + + fn gen_probestack(insts: &mut SmallInstVec, frame_size: u32) { + insts.extend(Inst::load_constant_u32( + writable_a0(), + frame_size as u64, + &mut |_| writable_a0(), + )); + insts.push(Inst::Call { + info: Box::new(CallInfo { + dest: ExternalName::LibCall(LibCall::Probestack), + uses: smallvec![CallArgPair { + vreg: a0(), + preg: a0(), + }], + defs: smallvec![], + clobbers: PRegSet::empty(), + opcode: Opcode::Call, + callee_callconv: CallConv::SystemV, + caller_callconv: CallConv::SystemV, + callee_pop_size: 0, + }), + }); + } + // Returns stack bytes used as well as instructions. Does not adjust + // nominal SP offset; abi_impl generic code will do that. + fn gen_clobber_save( + _call_conv: isa::CallConv, + _setup_frame: bool, + _flags: &settings::Flags, + clobbered_callee_saves: &[Writable], + fixed_frame_storage_size: u32, + _outgoing_args_size: u32, + ) -> (u64, SmallVec<[Inst; 16]>) { + let mut insts = SmallVec::new(); + let clobbered_size = compute_clobber_size(&clobbered_callee_saves); + // Adjust the stack pointer downward for clobbers and the function fixed + // frame (spillslots and storage slots). + let stack_size = fixed_frame_storage_size + clobbered_size; + // Each stack slot is 256 bit and can fit 8 u32 values. + let stack_size = stack_size / 8; + // Store each clobbered register in order at offsets from SP, + // placing them above the fixed frame slots. + if stack_size > 0 { + let mut cur_offset = 1; + for reg in clobbered_callee_saves { + let r_reg = reg.to_reg(); + let ty = match r_reg.class() { + RegClass::Int => I64, + RegClass::Float => F64, + RegClass::Vector => unimplemented!("Vector Clobber Saves"), + }; + insts.push(Self::gen_store_stack( + StackAMode::SPOffset(-(cur_offset as i64), ty), + real_reg_to_reg(reg.to_reg()), + ty, + )); + cur_offset += 1 + } + insts.push(Inst::AdjustSp { + amount: -(stack_size as i64), + }); + } + (clobbered_size as u64, insts) + } + + fn gen_clobber_restore( + call_conv: isa::CallConv, + sig: &Signature, + _flags: &settings::Flags, + clobbers: &[Writable], + fixed_frame_storage_size: u32, + _outgoing_args_size: u32, + ) -> SmallVec<[Inst; 16]> { + let mut insts = SmallVec::new(); + let clobbered_callee_saves = + Self::get_clobbered_callee_saves(call_conv, _flags, sig, clobbers); + let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered_callee_saves); + // Each stack slot is 256 bit and can fit 8 u32 values. + let stack_size = stack_size / 8; + if stack_size > 0 { + insts.push(Inst::AdjustSp { + amount: stack_size as i64, + }); + } + let mut cur_offset = 1; + for reg in &clobbered_callee_saves { + let rreg = reg.to_reg(); + let ty = match rreg.class() { + RegClass::Int => I64, + RegClass::Float => F64, + RegClass::Vector => unimplemented!("Vector Clobber Restores"), + }; + insts.push(Self::gen_load_stack( + StackAMode::SPOffset(-cur_offset, ty), + Writable::from_reg(real_reg_to_reg(reg.to_reg())), + ty, + )); + cur_offset += 1 + } + insts + } + + fn gen_call( + dest: &CallDest, + uses: CallArgList, + defs: CallRetList, + clobbers: PRegSet, + opcode: ir::Opcode, + _tmp: Writable, + callee_conv: isa::CallConv, + caller_conv: isa::CallConv, + callee_pop_size: u32, + ) -> SmallVec<[Self::I; 2]> { + let mut insts = SmallVec::new(); + match &dest { + &CallDest::ExtName(ref name, _) => insts.push(Inst::Call { + info: Box::new(CallInfo { + dest: name.clone(), + uses, + defs, + clobbers, + opcode, + caller_callconv: caller_conv, + callee_callconv: callee_conv, + callee_pop_size, + }), + }), + &CallDest::Reg(reg) => insts.push(Inst::CallInd { + info: Box::new(CallIndInfo { + rn: *reg, + uses, + defs, + clobbers, + opcode, + caller_callconv: caller_conv, + callee_callconv: callee_conv, + callee_pop_size, + }), + }), + } + insts + } + + fn gen_memcpy Writable>( + call_conv: isa::CallConv, + dst: Reg, + src: Reg, + size: usize, + mut alloc_tmp: F, + ) -> SmallVec<[Self::I; 8]> { + let mut insts = SmallVec::new(); + let arg0 = Writable::from_reg(x_reg(10)); + let arg1 = Writable::from_reg(x_reg(11)); + let arg2 = Writable::from_reg(x_reg(12)); + let tmp = alloc_tmp(Self::word_type()); + insts.extend(Inst::load_constant_u64(tmp, size as u64, &mut alloc_tmp).into_iter()); + insts.push(Inst::Call { + info: Box::new(CallInfo { + dest: ExternalName::LibCall(LibCall::Memcpy), + uses: smallvec![ + CallArgPair { + vreg: dst, + preg: arg0.to_reg() + }, + CallArgPair { + vreg: src, + preg: arg1.to_reg() + }, + CallArgPair { + vreg: tmp.to_reg(), + preg: arg2.to_reg() + } + ], + defs: smallvec![], + clobbers: Self::get_regs_clobbered_by_call(call_conv), + opcode: Opcode::Call, + caller_callconv: call_conv, + callee_callconv: call_conv, + callee_pop_size: 0, + }), + }); + insts + } + + fn get_number_of_spillslots_for_value( + rc: RegClass, + _target_vector_bytes: u32, + isa_flags: &RiscvFlags, + ) -> u32 { + // We allocate in terms of 8-byte slots. + match rc { + RegClass::Int => 1, + RegClass::Float => 1, + RegClass::Vector => (isa_flags.min_vec_reg_size() / 8) as u32, + } + } + + /// Get the current virtual-SP offset from an instruction-emission state. + fn get_virtual_sp_offset_from_state(s: &EmitState) -> i64 { + s.virtual_sp_offset + } + + /// Get the nominal-SP-to-FP offset from an instruction-emission state. + fn get_nominal_sp_to_fp(s: &EmitState) -> i64 { + s.nominal_sp_to_fp + } + + fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegSet { + if call_conv_of_callee == isa::CallConv::Tail { + TAIL_CLOBBERS + } else { + DEFAULT_CLOBBERS + } + } + + fn get_clobbered_callee_saves( + call_conv: isa::CallConv, + _flags: &settings::Flags, + _sig: &Signature, + regs: &[Writable], + ) -> Vec> { + let mut regs: Vec> = regs + .iter() + .cloned() + .filter(|r| is_reg_saved_in_prologue(call_conv, r.to_reg())) + .collect(); + + regs.sort(); + regs + } + + fn is_frame_setup_needed( + is_leaf: bool, + stack_args_size: u32, + num_clobbered_callee_saves: usize, + fixed_frame_storage_size: u32, + ) -> bool { + !is_leaf + // The function arguments that are passed on the stack are addressed + // relative to the Frame Pointer. + || stack_args_size > 0 + || num_clobbered_callee_saves > 0 + || fixed_frame_storage_size > 0 + } + + fn gen_inline_probestack( + insts: &mut SmallInstVec, + call_conv: isa::CallConv, + frame_size: u32, + guard_size: u32, + ) { + // Unroll at most n consecutive probes, before falling back to using a loop + const PROBE_MAX_UNROLL: u32 = 3; + // Number of probes that we need to perform + let probe_count = align_to(frame_size, guard_size) / guard_size; + + if probe_count <= PROBE_MAX_UNROLL { + Self::gen_probestack_unroll(insts, guard_size, probe_count) + } else { + Self::gen_probestack_loop(insts, call_conv, guard_size, probe_count) + } + } +} + +impl Riscv64ABICallSite { + pub fn emit_return_call(mut self, ctx: &mut Lower, args: isle::ValueSlice) { + let (new_stack_arg_size, old_stack_arg_size) = + self.emit_temporary_tail_call_frame(ctx, args); + + let dest = self.dest().clone(); + let opcode = self.opcode(); + let uses = self.take_uses(); + let info = Box::new(ReturnCallInfo { + uses, + opcode, + old_stack_arg_size, + new_stack_arg_size, + }); + + match dest { + CallDest::ExtName(name, RelocDistance::Near) => { + ctx.emit(Inst::ReturnCall { + callee: Box::new(name), + info, + }); + } + CallDest::ExtName(name, RelocDistance::Far) => { + let callee = ctx.alloc_tmp(ir::types::I64).only_reg().unwrap(); + ctx.emit(Inst::LoadExtName { + rd: callee, + name: Box::new(name), + offset: 0, + }); + ctx.emit(Inst::ReturnCallInd { + callee: callee.to_reg(), + info, + }); + } + CallDest::Reg(callee) => ctx.emit(Inst::ReturnCallInd { callee, info }), + } + } +} + +// TODO(akashin): Figure out the correct clobbering convention. +const CALLEE_SAVE_X_REG: [bool; 32] = [ + false, false, false, false, false, false, false, false, // 0-7 + false, false, false, false, false, false, false, false, // 8-15 + false, false, false, false, false, false, false, false, // 16-23 + false, false, false, false, false, false, false, false, // 24-31 +]; +const CALLEE_SAVE_F_REG: [bool; 32] = [ + false, false, false, false, false, false, false, false, // 0-7 + true, false, false, false, false, false, false, false, // 8-15 + false, false, true, true, true, true, true, true, // 16-23 + true, true, true, true, false, false, false, false, // 24-31 +]; + +/// This should be the registers that must be saved by callee. +#[inline] +fn is_reg_saved_in_prologue(conv: CallConv, reg: RealReg) -> bool { + if conv == CallConv::Tail { + return false; + } + + match reg.class() { + RegClass::Int => CALLEE_SAVE_X_REG[reg.hw_enc() as usize], + RegClass::Float => CALLEE_SAVE_F_REG[reg.hw_enc() as usize], + // All vector registers are caller saved. + RegClass::Vector => false, + } +} + +fn compute_clobber_size(clobbers: &[Writable]) -> u32 { + let mut clobbered_size = 0; + for reg in clobbers { + match reg.to_reg().class() { + RegClass::Int => { + clobbered_size += 8; + } + RegClass::Float => { + clobbered_size += 8; + } + RegClass::Vector => unimplemented!("Vector Size Clobbered"), + } + } + align_to(clobbered_size, 16) +} + +const fn default_clobbers() -> PRegSet { + PRegSet::empty() + .with(px_reg(1)) + .with(px_reg(5)) + .with(px_reg(6)) + .with(px_reg(7)) + .with(px_reg(10)) + .with(px_reg(11)) + // CTX register is not clobbered. + // .with(px_reg(12)) + .with(px_reg(13)) + .with(px_reg(14)) + .with(px_reg(15)) + .with(px_reg(16)) + .with(px_reg(17)) + .with(px_reg(28)) + .with(px_reg(29)) + .with(px_reg(30)) + .with(px_reg(31)) + // F Regs + // .with(pf_reg(0)) + // .with(pf_reg(1)) + // .with(pf_reg(2)) + // .with(pf_reg(3)) + // .with(pf_reg(4)) + // .with(pf_reg(5)) + // .with(pf_reg(6)) + // .with(pf_reg(7)) + // .with(pf_reg(9)) + // .with(pf_reg(10)) + // .with(pf_reg(11)) + // .with(pf_reg(12)) + // .with(pf_reg(13)) + // .with(pf_reg(14)) + // .with(pf_reg(15)) + // .with(pf_reg(16)) + // .with(pf_reg(17)) + // .with(pf_reg(28)) + // .with(pf_reg(29)) + // .with(pf_reg(30)) + // .with(pf_reg(31)) + // V Regs - All vector regs get clobbered + // .with(pv_reg(0)) + // .with(pv_reg(1)) + // .with(pv_reg(2)) + // .with(pv_reg(3)) + // .with(pv_reg(4)) + // .with(pv_reg(5)) + // .with(pv_reg(6)) + // .with(pv_reg(7)) + // .with(pv_reg(8)) + // .with(pv_reg(9)) + // .with(pv_reg(10)) + // .with(pv_reg(11)) + // .with(pv_reg(12)) + // .with(pv_reg(13)) + // .with(pv_reg(14)) + // .with(pv_reg(15)) + // .with(pv_reg(16)) + // .with(pv_reg(17)) + // .with(pv_reg(18)) + // .with(pv_reg(19)) + // .with(pv_reg(20)) + // .with(pv_reg(21)) + // .with(pv_reg(22)) + // .with(pv_reg(23)) + // .with(pv_reg(24)) + // .with(pv_reg(25)) + // .with(pv_reg(26)) + // .with(pv_reg(27)) + // .with(pv_reg(28)) + // .with(pv_reg(29)) + // .with(pv_reg(30)) + // .with(pv_reg(31)) +} + +const DEFAULT_CLOBBERS: PRegSet = default_clobbers(); + +// All allocatable registers are clobbered by calls using the `tail` calling +// convention. +const fn tail_clobbers() -> PRegSet { + PRegSet::empty() + // `x0` is the zero register, and not allocatable. + .with(px_reg(1)) + // `x2` is the stack pointer, `x3` is the global pointer, and `x4` is + // the thread pointer. None are allocatable. + .with(px_reg(5)) + .with(px_reg(6)) + .with(px_reg(7)) + // `x8` is the frame pointer, and not allocatable. + .with(px_reg(9)) + .with(px_reg(10)) + .with(px_reg(10)) + .with(px_reg(11)) + .with(px_reg(12)) + .with(px_reg(13)) + .with(px_reg(14)) + .with(px_reg(15)) + .with(px_reg(16)) + .with(px_reg(17)) + .with(px_reg(18)) + .with(px_reg(19)) + .with(px_reg(20)) + .with(px_reg(21)) + .with(px_reg(22)) + .with(px_reg(23)) + .with(px_reg(24)) + .with(px_reg(25)) + .with(px_reg(26)) + .with(px_reg(27)) + .with(px_reg(28)) + .with(px_reg(29)) + // `x30` and `x31` are reserved as scratch registers, and are not + // allocatable. + // + // F Regs + // .with(pf_reg(0)) + // .with(pf_reg(1)) + // .with(pf_reg(2)) + // .with(pf_reg(3)) + // .with(pf_reg(4)) + // .with(pf_reg(5)) + // .with(pf_reg(6)) + // .with(pf_reg(7)) + // .with(pf_reg(9)) + // .with(pf_reg(10)) + // .with(pf_reg(11)) + // .with(pf_reg(12)) + // .with(pf_reg(13)) + // .with(pf_reg(14)) + // .with(pf_reg(15)) + // .with(pf_reg(16)) + // .with(pf_reg(17)) + // .with(pf_reg(18)) + // .with(pf_reg(19)) + // .with(pf_reg(20)) + // .with(pf_reg(21)) + // .with(pf_reg(22)) + // .with(pf_reg(23)) + // .with(pf_reg(24)) + // .with(pf_reg(25)) + // .with(pf_reg(26)) + // .with(pf_reg(27)) + // .with(pf_reg(28)) + // .with(pf_reg(29)) + // .with(pf_reg(30)) + // .with(pf_reg(31)) + // V Regs + // .with(pv_reg(0)) + // .with(pv_reg(1)) + // .with(pv_reg(2)) + // .with(pv_reg(3)) + // .with(pv_reg(4)) + // .with(pv_reg(5)) + // .with(pv_reg(6)) + // .with(pv_reg(7)) + // .with(pv_reg(8)) + // .with(pv_reg(9)) + // .with(pv_reg(10)) + // .with(pv_reg(11)) + // .with(pv_reg(12)) + // .with(pv_reg(13)) + // .with(pv_reg(14)) + // .with(pv_reg(15)) + // .with(pv_reg(16)) + // .with(pv_reg(17)) + // .with(pv_reg(18)) + // .with(pv_reg(19)) + // .with(pv_reg(20)) + // .with(pv_reg(21)) + // .with(pv_reg(22)) + // .with(pv_reg(23)) + // .with(pv_reg(24)) + // .with(pv_reg(25)) + // .with(pv_reg(26)) + // .with(pv_reg(27)) + // .with(pv_reg(28)) + // .with(pv_reg(29)) + // .with(pv_reg(30)) + // .with(pv_reg(31)) +} + +const TAIL_CLOBBERS: PRegSet = tail_clobbers(); + +impl Riscv64MachineDeps { + fn gen_probestack_unroll(insts: &mut SmallInstVec, guard_size: u32, probe_count: u32) { + insts.reserve(probe_count as usize); + for i in 0..probe_count { + let offset = (guard_size * (i + 1)) as i64; + insts.push(Self::gen_store_stack( + StackAMode::SPOffset(-offset, I8), + zero_reg(), + I32, + )); + } + } + + fn gen_probestack_loop( + insts: &mut SmallInstVec, + call_conv: isa::CallConv, + guard_size: u32, + probe_count: u32, + ) { + // Must be a caller-saved register that is not an argument. + let tmp = match call_conv { + isa::CallConv::Tail => Writable::from_reg(x_reg(1)), + _ => Writable::from_reg(x_reg(28)), // t3 + }; + insts.push(Inst::StackProbeLoop { + guard_size, + probe_count, + tmp, + }); + } +} diff --git a/cranelift/codegen/src/isa/zkasm/inst.isle b/cranelift/codegen/src/isa/zkasm/inst.isle new file mode 100644 index 000000000000..fb040a036493 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst.isle @@ -0,0 +1,3007 @@ +;; Instruction formats. +(type MInst + (enum + ;; A no-op of zero size. + (Nop0) + (Nop4) + + ;; Label to output at the beginning of a block + (Label + (imm usize)) + + ;; load immediate + (Lui + (rd WritableReg) + (imm Imm20)) + + (LoadConst32 + (rd WritableReg) + (imm u32)) + + (LoadConst64 + (rd WritableReg) + (imm u64)) + + (Auipc + (rd WritableReg) + (imm Imm20)) + + ;; An ALU operation with one register sources and a register destination. + (FpuRR + (alu_op FpuOPRR) + (frm OptionFloatRoundingMode) + (rd WritableReg) + (rs Reg)) + + + ;; An ALU operation with two register sources and a register destination. + (AluRRR + (alu_op AluOPRRR) + (rd WritableReg) + (rs1 Reg) + (rs2 Reg)) + + ;; An ALU operation with two register sources and a register destination. + (FpuRRR + (alu_op FpuOPRRR) + (frm OptionFloatRoundingMode) + (rd WritableReg) + (rs1 Reg) + (rs2 Reg)) + + ;; An ALU operation with three register sources and a register destination. + (FpuRRRR + (alu_op FpuOPRRRR) + (frm OptionFloatRoundingMode) + (rd WritableReg) + (rs1 Reg) + (rs2 Reg) + (rs3 Reg)) + + ;; An ALU operation with a register source and an immediate-12 source, and a register + ;; destination. + (AluRRImm12 + (alu_op AluOPRRI) + (rd WritableReg) + (rs Reg) + (imm12 Imm12)) + + ;; An load + (Load + (rd WritableReg) + (op LoadOP) + (flags MemFlags) + (from AMode)) + ;; An Store + (Store + (to AMode) + (op StoreOP) + (flags MemFlags) + (src Reg)) + + ;; A pseudo-instruction that captures register arguments in vregs. + (Args + (args VecArgPair)) + + (Ret (rets VecRetPair) + (stack_bytes_to_pop u32)) + + (Extend + (rd WritableReg) + (rn Reg) + (signed bool) + (from_bits u8) + (to_bits u8)) + + (AdjustSp + (amount i64)) + (Call + (info BoxCallInfo)) + + ;; A machine indirect-call instruction. + (CallInd + (info BoxCallIndInfo)) + + ;; A direct return-call macro instruction. + (ReturnCall + (callee BoxExternalName) + (info BoxReturnCallInfo)) + + ;; An indirect return-call macro instruction. + (ReturnCallInd + (callee Reg) + (info BoxReturnCallInfo)) + + (TrapIf + (test Reg) + (trap_code TrapCode)) + + ;; use a simple compare to decide to cause trap or not. + (TrapIfC + (rs1 Reg) + (rs2 Reg) + (cc IntCC) + (trap_code TrapCode)) + + (Jal + ;; (rd WritableReg) don't use + (dest BranchTarget)) + + (CondBr + (taken BranchTarget) + (not_taken BranchTarget) + (kind IntegerCompare)) + + ;; Load an inline symbol reference. + (LoadExtName + (rd WritableReg) + (name BoxExternalName) + (offset i64)) + + ;; Load address referenced by `mem` into `rd`. + (LoadAddr + (rd WritableReg) + (mem AMode)) + + ;; Marker, no-op in generated code: SP "virtual offset" is adjusted. This + ;; controls how AMode::NominalSPOffset args are lowered. + (VirtualSPOffsetAdj + (amount i64)) + + ;; A MOV instruction. These are encoded as OrR's (AluRRR form) but we + ;; keep them separate at the `Inst` level for better pretty-printing + ;; and faster `is_move()` logic. + (Mov + (rd WritableReg) + (rm Reg) + (ty Type)) + + ;; A MOV instruction, but where the source register is a non-allocatable + ;; PReg. It's important that the register be non-allocatable, as regalloc2 + ;; will not see it as used. + (MovFromPReg + (rd WritableReg) + (rm PReg)) + + (Fence + (pred FenceReq) + (succ FenceReq)) + + (FenceI) + + (ECall) + + (EBreak) + + ;; An instruction guaranteed to always be undefined and to trigger an illegal instruction at + ;; runtime. + (Udf + (trap_code TrapCode)) + ;; a jump and link register operation + (Jalr + ;;Plain unconditional jumps (assembler pseudo-op J) are encoded as a JAL with rd=x0. + (rd WritableReg) + (base Reg) + (offset Imm12)) + + ;; atomic operations. + (Atomic + (op AtomicOP) + (rd WritableReg) + (addr Reg) + (src Reg) + (amo AMO)) + ;; an atomic store + (AtomicStore + (src Reg) + (ty Type) + (p Reg)) + ;; an atomic load. + (AtomicLoad + (rd WritableReg) + (ty Type) + (p Reg)) + + ;; an atomic nand need using loop to implement. + (AtomicRmwLoop + (offset Reg) + (op AtomicRmwOp) + (dst WritableReg) + (ty Type) + (p Reg) + (x Reg) + (t0 WritableReg)) + + ;; select x or y base on condition + (Select + (dst VecWritableReg) + (ty Type) + (condition Reg) + (x ValueRegs) + (y ValueRegs)) + + (BrTable + (index Reg) + (tmp1 WritableReg) + (tmp2 WritableReg) + (targets VecBranchTarget)) + + ;; atomic compare and set operation + (AtomicCas + (offset Reg) + (t0 WritableReg) + (dst WritableReg) + (e Reg) + (addr Reg) + (v Reg) + (ty Type)) + ;; select x or y base on op_code + (IntSelect + (op IntSelectOP) + (dst VecWritableReg) + (x ValueRegs) + (y ValueRegs) + (ty Type)) + ;; an integer compare. + (Icmp + (cc IntCC) + (rd WritableReg) + (a ValueRegs) + (b ValueRegs) + (ty Type)) + ;; select a reg base on condition. + ;; very useful because in lowering stage we can not have condition branch. + (SelectReg + (rd WritableReg) + (rs1 Reg) + (rs2 Reg) + (condition IntegerCompare)) + ;; + (FcvtToInt + (is_sat bool) + (rd WritableReg) + (tmp WritableReg) ;; a float register to load bounds. + (rs Reg) + (is_signed bool) + (in_type Type) + (out_type Type)) + + (RawData (data VecU8)) + + ;; An unwind pseudo-instruction. + (Unwind + (inst UnwindInst)) + + ;; A dummy use, useful to keep a value alive. + (DummyUse + (reg Reg)) + ;;; + (FloatRound + (op FloatRoundOP) + (rd WritableReg) + (int_tmp WritableReg) + (f_tmp WritableReg) + (rs Reg) + (ty Type)) + ;;;; FMax + (FloatSelect + (op FloatSelectOP) + (rd WritableReg) + ;; a integer register + (tmp WritableReg) + (rs1 Reg) + (rs2 Reg) + (ty Type)) + + ;; popcnt if target doesn't support extension B + ;; use iteration to implement. + (Popcnt + (sum WritableReg) + (step WritableReg) + (tmp WritableReg) + (rs Reg) + (ty Type)) + + ;;; counting leading or trailing zeros. + (Cltz + ;; leading or trailing. + (leading bool) + (sum WritableReg) + (step WritableReg) + (tmp WritableReg) + (rs Reg) + (ty Type)) + ;; Byte-reverse register + (Rev8 + (rs Reg) + (step WritableReg) + (tmp WritableReg) + (rd WritableReg)) + ;; + (Brev8 + (rs Reg) + (ty Type) + (step WritableReg) + (tmp WritableReg) + (tmp2 WritableReg) + (rd WritableReg)) + (StackProbeLoop + (guard_size u32) + (probe_count u32) + (tmp WritableReg)) + + (VecAluRRRR + (op VecAluOpRRRR) + (vd WritableReg) + (vd_src Reg) + (vs2 Reg) + (vs1 Reg) + (mask VecOpMasking) + (vstate VState)) + + (VecAluRRRImm5 + (op VecAluOpRRRImm5) + (vd WritableReg) + (vd_src Reg) + (vs2 Reg) + (imm Imm5) + (mask VecOpMasking) + (vstate VState)) + + (VecAluRRR + (op VecAluOpRRR) + (vd WritableReg) + (vs2 Reg) + (vs1 Reg) + (mask VecOpMasking) + (vstate VState)) + + (VecAluRRImm5 + (op VecAluOpRRImm5) + (vd WritableReg) + (vs2 Reg) + (imm Imm5) + (mask VecOpMasking) + (vstate VState)) + + (VecAluRR + (op VecAluOpRR) + (vd WritableReg) + (vs Reg) + (mask VecOpMasking) + (vstate VState)) + + (VecAluRImm5 + (op VecAluOpRImm5) + (vd WritableReg) + (imm Imm5) + (mask VecOpMasking) + (vstate VState)) + + (VecSetState + (rd WritableReg) + (vstate VState)) + + (VecLoad + (eew VecElementWidth) + (to WritableReg) + (from VecAMode) + (flags MemFlags) + (mask VecOpMasking) + (vstate VState)) + + (VecStore + (eew VecElementWidth) + (to VecAMode) + (from Reg) + (flags MemFlags) + (mask VecOpMasking) + (vstate VState)) + + ;; An addition with 2 32-bit immediates. + (AddImm32 + (rd WritableReg) + (src1 Imm32) + (src2 Imm32)) + + ;; A multiplication with 2 32-bit immediates. + (MulImm32 + (rd WritableReg) + (src1 Imm32) + (src2 Imm32)) + +)) + + +(type FloatSelectOP (enum + (Max) + (Min) +)) + +(type FloatRoundOP (enum + (Nearest) + (Ceil) + (Floor) + (Trunc) +)) + +(type IntSelectOP (enum + (Smax) + (Umax) + (Smin) + (Umin) +)) + +(type AtomicOP (enum + (LrW) + (ScW) + (AmoswapW) + (AmoaddW) + (AmoxorW) + (AmoandW) + (AmoorW) + (AmominW) + (AmomaxW) + (AmominuW) + (AmomaxuW) + (LrD) + (ScD) + (AmoswapD) + (AmoaddD) + (AmoxorD) + (AmoandD) + (AmoorD) + (AmominD) + (AmomaxD) + (AmominuD) + (AmomaxuD) +)) + +(type FpuOPRRRR (enum + ;; float32 + (FmaddS) + (FmsubS) + (FnmsubS) + (FnmaddS) + ;; float64 + (FmaddD) + (FmsubD) + (FnmsubD) + (FnmaddD) +)) + +(type FClassResult (enum + ;;0 rs1 is −∞. + (NegInfinite) + ;; 1 rs1 is a negative normal number. + (NegNormal) + ;; 2 rs1 is a negative subnormal number. + (NegSubNormal) + ;; 3 rs1 is −0. + (NegZero) + ;; 4 rs1 is +0. + (PosZero) + ;; 5 rs1 is a positive subnormal number. + (PosSubNormal) + ;; 6 rs1 is a positive normal number. + (PosNormal) + ;; 7 rs1 is +∞. + (PosInfinite) + ;; 8 rs1 is a signaling NaN. + (SNaN) + ;; 9 rs1 is a quiet NaN. + (QNaN) +)) + +(type FpuOPRR (enum + ;; RV32F Standard Extension + (FsqrtS) + (FcvtWS) + (FcvtWuS) + (FmvXW) + (FclassS) + (FcvtSw) + (FcvtSwU) + (FmvWX) + + + ;; RV64F Standard Extension (in addition to RV32F) + (FcvtLS) + (FcvtLuS) + (FcvtSL) + (FcvtSLU) + + + ;; RV64D Standard Extension (in addition to RV32D) + (FcvtLD) + (FcvtLuD) + (FmvXD) + (FcvtDL) + (FcvtDLu) + (FmvDX) + + ;; RV32D Standard Extension + (FsqrtD) + (FcvtSD) + (FcvtDS) + (FclassD) + (FcvtWD) + (FcvtWuD) + (FcvtDW) + (FcvtDWU) + ;; bitmapip + +)) + +(type LoadOP (enum + (Lb) + (Lh) + (Lw) + (Lbu) + (Lhu) + (Lwu) + (Ld) + (Flw) + (Fld) +)) + +(type StoreOP (enum + (Sb) + (Sh) + (Sw) + (Sd) + (Fsw) + (Fsd) +)) + +(type AluOPRRR (enum + ;; base set + (Add) + (Sub) + (Sll) + (Slt) + (SltU) + (Sgt) + (Sgtu) + (Xor) + (Srl) + (Sra) + (Or) + (And) + + ;; RV64I Base Instruction Set (in addition to RV32I) + (Addw) + (Subw) + (Sllw) + (Srlw) + (Sraw) + + + ;;RV32M Standard Extension + (Mul) + (Mulh) + (Mulhsu) + (Mulhu) + (Div) + (DivU) + (Rem) + (RemU) + + ;; RV64M Standard Extension (in addition to RV32M) + (Mulw) + (Divw) + (Divuw) + (Remw) + (Remuw) + + ;; Zba: Address Generation Instructions + (Adduw) + (Sh1add) + (Sh1adduw) + (Sh2add) + (Sh2adduw) + (Sh3add) + (Sh3adduw) + + ;; Zbb: Bit Manipulation Instructions + (Andn) + (Orn) + (Xnor) + (Max) + (Maxu) + (Min) + (Minu) + (Rol) + (Rolw) + (Ror) + (Rorw) + + ;; Zbs: Single-bit instructions + (Bclr) + (Bext) + (Binv) + (Bset) + + ;; Zbc: Carry-less multiplication + (Clmul) + (Clmulh) + (Clmulr) + + ;; Zbkb: Bit-manipulation for Cryptography + (Pack) + (Packw) + (Packh) +)) + + +(type FpuOPRRR (enum + ;; RV32F Standard Extension + (FaddS) + (FsubS) + (FmulS) + (FdivS) + + (FsgnjS) + (FsgnjnS) + (FsgnjxS) + (FminS) + (FmaxS) + (FeqS) + (FltS) + (FleS) + + ;; RV32D Standard Extension + (FaddD) + (FsubD) + (FmulD) + (FdivD) + (FsgnjD) + (FsgnjnD) + (FsgnjxD) + (FminD) + (FmaxD) + (FeqD) + (FltD) + (FleD) +)) + + + +(type AluOPRRI (enum + ;; Base ISA + (Addi) + (Slti) + (SltiU) + (Xori) + (Ori) + (Andi) + (Slli) + (Srli) + (Srai) + (Addiw) + (Slliw) + (SrliW) + (Sraiw) + + ;; Zba: Address Generation Instructions + (SlliUw) + + ;; Zbb: Bit Manipulation Instructions + (Clz) + (Clzw) + (Ctz) + (Ctzw) + (Cpop) + (Cpopw) + (Sextb) + (Sexth) + (Zexth) + (Rori) + (Roriw) + (Rev8) + (Brev8) + (Orcb) + + ;; Zbs: Single-bit instructions + (Bclri) + (Bexti) + (Binvi) + (Bseti) +)) + + +(type FRM (enum + ;; Round to Nearest, ties to Even + (RNE) + ;; Round towards Zero + (RTZ) + ;; Round Down (towards −∞) + (RDN) + ;; Round Up (towards +∞) + (RUP) + ;; Round to Nearest, ties to Max Magnitude + (RMM) + ;; In instruction’s rm field, selects dynamic rounding mode; + ;;In Rounding Mode register, Invalid. + (Fcsr) +)) + +(type FFlagsException (enum + ;; Invalid Operation + (NV) + ;; Divide by Zero + (DZ) + ;; Overflow + (OF) + ;; Underflow + (UF) + ;; Inexact + (NX) +)) + +;;;; input output read write +;;;; SI SO SR SW +;;;; PI PO PR PW +;;;; lowest four bit are used. +(type FenceReq (primitive u8)) + +(type VecBranchTarget (primitive VecBranchTarget)) +(type BoxCallInfo (primitive BoxCallInfo)) +(type BoxCallIndInfo (primitive BoxCallIndInfo)) +(type BoxReturnCallInfo (primitive BoxReturnCallInfo)) +(type IntegerCompare (primitive IntegerCompare)) +(type AMode (primitive AMode)) +(type OptionReg (primitive OptionReg)) +(type OptionImm12 (primitive OptionImm12)) +(type OptionUimm5 (primitive OptionUimm5)) +(type Imm12 (primitive Imm12)) +(type Imm32 (primitive Imm32)) +(type UImm5 (primitive UImm5)) +(type Imm5 (primitive Imm5)) +(type Imm20 (primitive Imm20)) +(type Imm3 (primitive Imm3)) +(type BranchTarget (primitive BranchTarget)) +(type OptionFloatRoundingMode (primitive OptionFloatRoundingMode)) +(type VecU8 (primitive VecU8)) +(type AMO (primitive AMO)) +(type VecMachLabel extern (enum)) + + +;;;; Newtypes for Different Register Classes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(type XReg (primitive XReg)) +(type WritableXReg (primitive WritableXReg)) +(type FReg (primitive FReg)) +(type WritableFReg (primitive WritableFReg)) +(type VReg (primitive VReg)) +(type WritableVReg (primitive WritableVReg)) + +;; Construct a new `XReg` from a `Reg`. +;; +;; Asserts that the register has a Integer RegClass. +(decl xreg_new (Reg) XReg) +(extern constructor xreg_new xreg_new) +(convert Reg XReg xreg_new) + +;; Construct a new `WritableXReg` from a `WritableReg`. +;; +;; Asserts that the register has a Integer RegClass. +(decl writable_xreg_new (WritableReg) WritableXReg) +(extern constructor writable_xreg_new writable_xreg_new) +(convert WritableReg WritableXReg writable_xreg_new) + +;; Put a value into a XReg. +;; +;; Asserts that the value goes into a XReg. +(decl put_in_xreg (Value) XReg) +(rule (put_in_xreg val) (xreg_new (put_in_reg val))) +(convert Value XReg put_in_xreg) + +;; Construct an `InstOutput` out of a single XReg register. +(decl output_xreg (XReg) InstOutput) +(rule (output_xreg x) (output_reg x)) +(convert XReg InstOutput output_xreg) + +;; Convert a `WritableXReg` to an `XReg`. +(decl pure writable_xreg_to_xreg (WritableXReg) XReg) +(extern constructor writable_xreg_to_xreg writable_xreg_to_xreg) +(convert WritableXReg XReg writable_xreg_to_xreg) + +;; Convert a `WritableXReg` to an `WritableReg`. +(decl pure writable_xreg_to_writable_reg (WritableXReg) WritableReg) +(extern constructor writable_xreg_to_writable_reg writable_xreg_to_writable_reg) +(convert WritableXReg WritableReg writable_xreg_to_writable_reg) + +;; Convert a `WritableXReg` to an `Reg`. +(decl pure writable_xreg_to_reg (WritableXReg) Reg) +(rule (writable_xreg_to_reg x) (writable_xreg_to_writable_reg x)) +(convert WritableXReg Reg writable_xreg_to_reg) + +;; Convert an `XReg` to a `Reg`. +(decl pure xreg_to_reg (XReg) Reg) +(extern constructor xreg_to_reg xreg_to_reg) +(convert XReg Reg xreg_to_reg) + +;; Convert a `XReg` to a `ValueRegs`. +(decl xreg_to_value_regs (XReg) ValueRegs) +(rule (xreg_to_value_regs x) (value_reg x)) +(convert XReg ValueRegs xreg_to_reg) + +;; Convert a `WritableXReg` to a `ValueRegs`. +(decl writable_xreg_to_value_regs (WritableXReg) ValueRegs) +(rule (writable_xreg_to_value_regs x) (value_reg x)) +(convert WritableXReg ValueRegs writable_xreg_to_value_regs) + +;; Allocates a new `WritableXReg`. +(decl temp_writable_xreg () WritableXReg) +(rule (temp_writable_xreg) (temp_writable_reg $I64)) + + +;; Construct a new `FReg` from a `Reg`. +;; +;; Asserts that the register has a Float RegClass. +(decl freg_new (Reg) FReg) +(extern constructor freg_new freg_new) +(convert Reg FReg freg_new) + +;; Construct a new `WritableFReg` from a `WritableReg`. +;; +;; Asserts that the register has a Float RegClass. +(decl writable_freg_new (WritableReg) WritableFReg) +(extern constructor writable_freg_new writable_freg_new) +(convert WritableReg WritableFReg writable_freg_new) + +;; Put a value into a FReg. +;; +;; Asserts that the value goes into a FReg. +(decl put_in_freg (Value) FReg) +(rule (put_in_freg val) (freg_new (put_in_reg val))) +(convert Value FReg put_in_freg) + +;; Construct an `InstOutput` out of a single FReg register. +(decl output_freg (FReg) InstOutput) +(rule (output_freg x) (output_reg x)) +(convert FReg InstOutput output_freg) + +;; Convert a `WritableFReg` to an `FReg`. +(decl pure writable_freg_to_freg (WritableFReg) FReg) +(extern constructor writable_freg_to_freg writable_freg_to_freg) +(convert WritableFReg FReg writable_freg_to_freg) + +;; Convert a `WritableFReg` to an `WritableReg`. +(decl pure writable_freg_to_writable_reg (WritableFReg) WritableReg) +(extern constructor writable_freg_to_writable_reg writable_freg_to_writable_reg) +(convert WritableFReg WritableReg writable_freg_to_writable_reg) + +;; Convert a `WritableFReg` to an `Reg`. +(decl pure writable_freg_to_reg (WritableFReg) Reg) +(rule (writable_freg_to_reg x) (writable_freg_to_writable_reg x)) +(convert WritableFReg Reg writable_freg_to_reg) + +;; Convert an `FReg` to a `Reg`. +(decl pure freg_to_reg (FReg) Reg) +(extern constructor freg_to_reg freg_to_reg) +(convert FReg Reg freg_to_reg) + +;; Convert a `FReg` to a `ValueRegs`. +(decl freg_to_value_regs (FReg) ValueRegs) +(rule (freg_to_value_regs x) (value_reg x)) +(convert FReg ValueRegs xreg_to_reg) + +;; Convert a `WritableFReg` to a `ValueRegs`. +(decl writable_freg_to_value_regs (WritableFReg) ValueRegs) +(rule (writable_freg_to_value_regs x) (value_reg x)) +(convert WritableFReg ValueRegs writable_freg_to_value_regs) + +;; Allocates a new `WritableFReg`. +(decl temp_writable_freg () WritableFReg) +(rule (temp_writable_freg) (temp_writable_reg $F64)) + + + +;; Construct a new `VReg` from a `Reg`. +;; +;; Asserts that the register has a Vector RegClass. +(decl vreg_new (Reg) VReg) +(extern constructor vreg_new vreg_new) +(convert Reg VReg vreg_new) + +;; Construct a new `WritableVReg` from a `WritableReg`. +;; +;; Asserts that the register has a Vector RegClass. +(decl writable_vreg_new (WritableReg) WritableVReg) +(extern constructor writable_vreg_new writable_vreg_new) +(convert WritableReg WritableVReg writable_vreg_new) + +;; Put a value into a VReg. +;; +;; Asserts that the value goes into a VReg. +(decl put_in_vreg (Value) VReg) +(rule (put_in_vreg val) (vreg_new (put_in_reg val))) +(convert Value VReg put_in_vreg) + +;; Construct an `InstOutput` out of a single VReg register. +(decl output_vreg (VReg) InstOutput) +(rule (output_vreg x) (output_reg x)) +(convert VReg InstOutput output_vreg) + +;; Convert a `WritableVReg` to an `VReg`. +(decl pure writable_vreg_to_vreg (WritableVReg) VReg) +(extern constructor writable_vreg_to_vreg writable_vreg_to_vreg) +(convert WritableVReg VReg writable_vreg_to_vreg) + +;; Convert a `WritableVReg` to an `WritableReg`. +(decl pure writable_vreg_to_writable_reg (WritableVReg) WritableReg) +(extern constructor writable_vreg_to_writable_reg writable_vreg_to_writable_reg) +(convert WritableVReg WritableReg writable_vreg_to_writable_reg) + +;; Convert a `WritableVReg` to an `Reg`. +(decl pure writable_vreg_to_reg (WritableVReg) Reg) +(rule (writable_vreg_to_reg x) (writable_vreg_to_writable_reg x)) +(convert WritableVReg Reg writable_vreg_to_reg) + +;; Convert an `VReg` to a `Reg`. +(decl pure vreg_to_reg (VReg) Reg) +(extern constructor vreg_to_reg vreg_to_reg) +(convert VReg Reg vreg_to_reg) + +;; Convert a `VReg` to a `ValueRegs`. +(decl vreg_to_value_regs (VReg) ValueRegs) +(rule (vreg_to_value_regs x) (value_reg x)) +(convert VReg ValueRegs xreg_to_reg) + +;; Convert a `WritableVReg` to a `ValueRegs`. +(decl writable_vreg_to_value_regs (WritableVReg) ValueRegs) +(rule (writable_vreg_to_value_regs x) (value_reg x)) +(convert WritableVReg ValueRegs writable_vreg_to_value_regs) + +;; Allocates a new `WritableVReg`. +(decl temp_writable_vreg () WritableVReg) +(rule (temp_writable_vreg) (temp_writable_reg $I8X16)) + + +;; Converters + +(convert u8 i32 u8_as_i32) +(decl u8_as_i32 (u8) i32) +(extern constructor u8_as_i32 u8_as_i32) + +;; ISA Extension helpers + +(decl pure has_v () bool) +(extern constructor has_v has_v) + +(decl pure has_zbkb () bool) +(extern constructor has_zbkb has_zbkb) + +(decl pure has_zba () bool) +(extern constructor has_zba has_zba) + +(decl pure has_zbb () bool) +(extern constructor has_zbb has_zbb) + +(decl pure has_zbc () bool) +(extern constructor has_zbc has_zbc) + +(decl pure has_zbs () bool) +(extern constructor has_zbs has_zbs) + +(decl gen_float_round (FloatRoundOP Reg Type) Reg) +(rule + (gen_float_round op rs ty) + (let + ((rd WritableReg (temp_writable_reg ty)) + (tmp WritableXReg (temp_writable_xreg)) + (tmp2 WritableFReg (temp_writable_freg)) + (_ Unit (emit (MInst.FloatRound op rd tmp tmp2 rs ty)))) + (writable_reg_to_reg rd))) + +(decl gen_float_select (FloatSelectOP Reg Reg Type) Reg) +(rule + (gen_float_select op x y ty) + (let + ((rd WritableReg (temp_writable_reg ty)) + (tmp WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.FloatSelect op rd tmp x y ty)))) + (writable_reg_to_reg rd))) + + +;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; RV32I Base Integer Instruction Set + +(decl zk_add (Imm32 Imm32) XReg) +(rule (zk_add imm1 imm2) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.AddImm32 dst imm1 imm2)))) + dst)) + +(decl zk_mul (Imm32 Imm32) XReg) +(rule (zk_mul imm1 imm2) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.MulImm32 dst imm1 imm2)))) + dst)) + +;; Helper for emitting the `add` instruction. +;; rd ← rs1 + rs2 +(decl rv_add (XReg XReg) XReg) +(rule (rv_add rs1 rs2) + (alu_rrr (AluOPRRR.Add) rs1 rs2)) + +;; Helper for emitting the `addi` ("Add Immediate") instruction. +;; rd ← rs1 + sext(imm) +(decl rv_addi (XReg Imm12) XReg) +(rule (rv_addi rs1 imm) + (alu_rr_imm12 (AluOPRRI.Addi) rs1 imm)) + +;; Helper for emitting the `sub` instruction. +;; rd ← rs1 - rs2 +(decl rv_sub (XReg XReg) XReg) +(rule (rv_sub rs1 rs2) + (alu_rrr (AluOPRRR.Sub) rs1 rs2)) + +;; Helper for emitting the `neg` instruction. +;; This instruction is a mnemonic for `sub rd, zero, rs1`. +(decl rv_neg (XReg) XReg) +(rule (rv_neg rs1) + (alu_rrr (AluOPRRR.Sub) (zero_reg) rs1)) + +;; Helper for emitting the `sll` ("Shift Left Logical") instruction. +;; rd ← rs1 << rs2 +(decl rv_sll (XReg XReg) XReg) +(rule (rv_sll rs1 rs2) + (alu_rrr (AluOPRRR.Sll) rs1 rs2)) + +;; Helper for emitting the `slli` ("Shift Left Logical Immediate") instruction. +;; rd ← rs1 << uext(imm) +(decl rv_slli (XReg Imm12) XReg) +(rule (rv_slli rs1 imm) + (alu_rr_imm12 (AluOPRRI.Slli) rs1 imm)) + +;; Helper for emitting the `srl` ("Shift Right Logical") instruction. +;; rd ← rs1 >> rs2 +(decl rv_srl (XReg XReg) XReg) +(rule (rv_srl rs1 rs2) + (alu_rrr (AluOPRRR.Srl) rs1 rs2)) + +;; Helper for emitting the `srli` ("Shift Right Logical Immediate") instruction. +;; rd ← rs1 >> uext(imm) +(decl rv_srli (XReg Imm12) XReg) +(rule (rv_srli rs1 imm) + (alu_rr_imm12 (AluOPRRI.Srli) rs1 imm)) + +;; Helper for emitting the `sra` ("Shift Right Arithmetic") instruction. +;; rd ← rs1 >> rs2 +(decl rv_sra (XReg XReg) XReg) +(rule (rv_sra rs1 rs2) + (alu_rrr (AluOPRRR.Sra) rs1 rs2)) + +;; Helper for emitting the `srai` ("Shift Right Arithmetic Immediate") instruction. +;; rd ← rs1 >> uext(imm) +(decl rv_srai (XReg Imm12) XReg) +(rule (rv_srai rs1 imm) + (alu_rr_imm12 (AluOPRRI.Srai) rs1 imm)) + +;; Helper for emitting the `or` instruction. +;; rd ← rs1 ∨ rs2 +(decl rv_or (XReg XReg) XReg) +(rule (rv_or rs1 rs2) + (alu_rrr (AluOPRRR.Or) rs1 rs2)) + +;; Helper for emitting the `ori` ("Or Immediate") instruction. +;; rd ← rs1 ∨ uext(imm) +(decl rv_ori (XReg Imm12) XReg) +(rule (rv_ori rs1 imm) + (alu_rr_imm12 (AluOPRRI.Ori) rs1 imm)) + +;; Helper for emitting the `xor` instruction. +;; rd ← rs1 ⊕ rs2 +(decl rv_xor (XReg XReg) XReg) +(rule (rv_xor rs1 rs2) + (alu_rrr (AluOPRRR.Xor) rs1 rs2)) + +;; Helper for emitting the `xori` ("Exlusive Or Immediate") instruction. +;; rd ← rs1 ⊕ uext(imm) +(decl rv_xori (XReg Imm12) XReg) +(rule (rv_xori rs1 imm) + (alu_rr_imm12 (AluOPRRI.Xori) rs1 imm)) + +;; Helper for emitting the `not` instruction. +;; This instruction is a mnemonic for `xori rd, rs1, -1`. +(decl rv_not (XReg) XReg) +(rule (rv_not rs1) + (rv_xori rs1 (imm12_const -1))) + +;; Helper for emitting the `and` instruction. +;; rd ← rs1 ∧ rs2 +(decl rv_and (XReg XReg) XReg) +(rule (rv_and rs1 rs2) + (alu_rrr (AluOPRRR.And) rs1 rs2)) + +;; Helper for emitting the `andi` ("And Immediate") instruction. +;; rd ← rs1 ∧ uext(imm) +(decl rv_andi (XReg Imm12) XReg) +(rule (rv_andi rs1 imm) + (alu_rr_imm12 (AluOPRRI.Andi) rs1 imm)) + +;; Helper for emitting the `sltu` ("Set Less Than Unsigned") instruction. +;; rd ← rs1 < rs2 +(decl rv_sltu (XReg XReg) XReg) +(rule (rv_sltu rs1 rs2) + (alu_rrr (AluOPRRR.SltU) rs1 rs2)) + +;; Helper for emitting the `snez` instruction. +;; This instruction is a mnemonic for `sltu rd, zero, rs`. +(decl rv_snez (XReg) XReg) +(rule (rv_snez rs1) + (rv_sltu (zero_reg) rs1)) + +;; Helper for emiting the `sltiu` ("Set Less Than Immediate Unsigned") instruction. +;; rd ← rs1 < imm +(decl rv_sltiu (XReg Imm12) XReg) +(rule (rv_sltiu rs1 imm) + (alu_rr_imm12 (AluOPRRI.SltiU) rs1 imm)) + +;; Helper for emitting the `seqz` instruction. +;; This instruction is a mnemonic for `sltiu rd, rs, 1`. +(decl rv_seqz (XReg) XReg) +(rule (rv_seqz rs1) + (rv_sltiu rs1 (imm12_const 1))) + + +;; RV64I Base Integer Instruction Set +;; Unlike RV32I instructions these are only present in the 64bit ISA + +;; Helper for emitting the `addw` ("Add Word") instruction. +;; rd ← sext32(rs1) + sext32(rs2) +(decl rv_addw (XReg XReg) XReg) +(rule (rv_addw rs1 rs2) + (alu_rrr (AluOPRRR.Addw) rs1 rs2)) + +;; Helper for emitting the `addiw` ("Add Word Immediate") instruction. +;; rd ← sext32(rs1) + imm +(decl rv_addiw (XReg Imm12) XReg) +(rule (rv_addiw rs1 imm) + (alu_rr_imm12 (AluOPRRI.Addiw) rs1 imm)) + +;; Helper for emitting the `sext.w` ("Sign Extend Word") instruction. +;; This instruction is a mnemonic for `addiw rd, rs, zero`. +(decl rv_sextw (XReg) XReg) +(rule (rv_sextw rs1) + (rv_addiw rs1 (imm12_const 0))) + +;; Helper for emitting the `subw` ("Subtract Word") instruction. +;; rd ← sext32(rs1) - sext32(rs2) +(decl rv_subw (XReg XReg) XReg) +(rule (rv_subw rs1 rs2) + (alu_rrr (AluOPRRR.Subw) rs1 rs2)) + +;; Helper for emitting the `sllw` ("Shift Left Logical Word") instruction. +;; rd ← sext32(uext32(rs1) << rs2) +(decl rv_sllw (XReg XReg) XReg) +(rule (rv_sllw rs1 rs2) + (alu_rrr (AluOPRRR.Sllw) rs1 rs2)) + +;; Helper for emitting the `slliw` ("Shift Left Logical Immediate Word") instruction. +;; rd ← sext32(uext32(rs1) << imm) +(decl rv_slliw (XReg Imm12) XReg) +(rule (rv_slliw rs1 imm) + (alu_rr_imm12 (AluOPRRI.Slliw) rs1 imm)) + +;; Helper for emitting the `srlw` ("Shift Right Logical Word") instruction. +;; rd ← sext32(uext32(rs1) >> rs2) +(decl rv_srlw (XReg XReg) XReg) +(rule (rv_srlw rs1 rs2) + (alu_rrr (AluOPRRR.Srlw) rs1 rs2)) + +;; Helper for emitting the `srliw` ("Shift Right Logical Immediate Word") instruction. +;; rd ← sext32(uext32(rs1) >> imm) +(decl rv_srliw (XReg Imm12) XReg) +(rule (rv_srliw rs1 imm) + (alu_rr_imm12 (AluOPRRI.SrliW) rs1 imm)) + +;; Helper for emitting the `sraw` ("Shift Right Arithmetic Word") instruction. +;; rd ← sext32(rs1 >> rs2) +(decl rv_sraw (XReg XReg) XReg) +(rule (rv_sraw rs1 rs2) + (alu_rrr (AluOPRRR.Sraw) rs1 rs2)) + +;; Helper for emitting the `sraiw` ("Shift Right Arithmetic Immediate Word") instruction. +;; rd ← sext32(rs1 >> imm) +(decl rv_sraiw (XReg Imm12) XReg) +(rule (rv_sraiw rs1 imm) + (alu_rr_imm12 (AluOPRRI.Sraiw) rs1 imm)) + + +;; RV32M Extension +;; TODO: Enable these instructions only when we have the M extension + +;; Helper for emitting the `mul` instruction. +;; rd ← rs1 × rs2 +(decl rv_mul (XReg XReg) XReg) +(rule (rv_mul rs1 rs2) + (alu_rrr (AluOPRRR.Mul) rs1 rs2)) + +;; Helper for emitting the `mulh` ("Multiply High Signed Signed") instruction. +;; rd ← (sext(rs1) × sext(rs2)) » xlen +(decl rv_mulh (XReg XReg) XReg) +(rule (rv_mulh rs1 rs2) + (alu_rrr (AluOPRRR.Mulh) rs1 rs2)) + +;; Helper for emitting the `mulhu` ("Multiply High Unsigned Unsigned") instruction. +;; rd ← (uext(rs1) × uext(rs2)) » xlen +(decl rv_mulhu (XReg XReg) XReg) +(rule (rv_mulhu rs1 rs2) + (alu_rrr (AluOPRRR.Mulhu) rs1 rs2)) + +;; Helper for emitting the `div` instruction. +;; rd ← rs1 ÷ rs2 +(decl rv_div (XReg XReg) XReg) +(rule (rv_div rs1 rs2) + (alu_rrr (AluOPRRR.Div) rs1 rs2)) + +;; Helper for emitting the `divu` ("Divide Unsigned") instruction. +;; rd ← rs1 ÷ rs2 +(decl rv_divu (XReg XReg) XReg) +(rule (rv_divu rs1 rs2) + (alu_rrr (AluOPRRR.DivU) rs1 rs2)) + +;; Helper for emitting the `rem` instruction. +;; rd ← rs1 mod rs2 +(decl rv_rem (XReg XReg) XReg) +(rule (rv_rem rs1 rs2) + (alu_rrr (AluOPRRR.Rem) rs1 rs2)) + +;; Helper for emitting the `remu` ("Remainder Unsigned") instruction. +;; rd ← rs1 mod rs2 +(decl rv_remu (XReg XReg) XReg) +(rule (rv_remu rs1 rs2) + (alu_rrr (AluOPRRR.RemU) rs1 rs2)) + + + +;; RV64M Extension +;; TODO: Enable these instructions only when we have the M extension + +;; Helper for emitting the `mulw` ("Multiply Word") instruction. +;; rd ← uext32(rs1) × uext32(rs2) +(decl rv_mulw (XReg XReg) XReg) +(rule (rv_mulw rs1 rs2) + (alu_rrr (AluOPRRR.Mulw) rs1 rs2)) + +;; Helper for emitting the `divw` ("Divide Word") instruction. +;; rd ← sext32(rs1) ÷ sext32(rs2) +(decl rv_divw (XReg XReg) XReg) +(rule (rv_divw rs1 rs2) + (alu_rrr (AluOPRRR.Divw) rs1 rs2)) + +;; Helper for emitting the `divuw` ("Divide Unsigned Word") instruction. +;; rd ← uext32(rs1) ÷ uext32(rs2) +(decl rv_divuw (XReg XReg) XReg) +(rule (rv_divuw rs1 rs2) + (alu_rrr (AluOPRRR.Divuw) rs1 rs2)) + +;; Helper for emitting the `remw` ("Remainder Word") instruction. +;; rd ← sext32(rs1) mod sext32(rs2) +(decl rv_remw (XReg XReg) XReg) +(rule (rv_remw rs1 rs2) + (alu_rrr (AluOPRRR.Remw) rs1 rs2)) + +;; Helper for emitting the `remuw` ("Remainder Unsigned Word") instruction. +;; rd ← uext32(rs1) mod uext32(rs2) +(decl rv_remuw (XReg XReg) XReg) +(rule (rv_remuw rs1 rs2) + (alu_rrr (AluOPRRR.Remuw) rs1 rs2)) + + +;; F and D Extensions +;; TODO: Enable these instructions only when we have the F or D extensions + +;; Helper for emitting the `fadd` instruction. +(decl rv_fadd (Type FReg FReg) FReg) +(rule (rv_fadd $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FaddS) $F32 rs1 rs2)) +(rule (rv_fadd $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FaddD) $F64 rs1 rs2)) + +;; Helper for emitting the `fsub` instruction. +(decl rv_fsub (Type FReg FReg) FReg) +(rule (rv_fsub $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsubS) $F32 rs1 rs2)) +(rule (rv_fsub $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsubD) $F64 rs1 rs2)) + +;; Helper for emitting the `fmul` instruction. +(decl rv_fmul (Type FReg FReg) FReg) +(rule (rv_fmul $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FmulS) $F32 rs1 rs2)) +(rule (rv_fmul $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FmulD) $F64 rs1 rs2)) + +;; Helper for emitting the `fdiv` instruction. +(decl rv_fdiv (Type FReg FReg) FReg) +(rule (rv_fdiv $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FdivS) $F32 rs1 rs2)) +(rule (rv_fdiv $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FdivD) $F64 rs1 rs2)) + +;; Helper for emitting the `fsqrt` instruction. +(decl rv_fsqrt (Type FReg) FReg) +(rule (rv_fsqrt $F32 rs1) (fpu_rr (FpuOPRR.FsqrtS) $F32 rs1)) +(rule (rv_fsqrt $F64 rs1) (fpu_rr (FpuOPRR.FsqrtD) $F64 rs1)) + +;; Helper for emitting the `fmadd` instruction. +(decl rv_fmadd (Type FReg FReg FReg) FReg) +(rule (rv_fmadd $F32 rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddS) $F32 rs1 rs2 rs3)) +(rule (rv_fmadd $F64 rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddD) $F64 rs1 rs2 rs3)) + +;; Helper for emitting the `fmv.x.w` instruction. +(decl rv_fmvxw (FReg) XReg) +(rule (rv_fmvxw r) (fpu_rr (FpuOPRR.FmvXW) $I32 r)) + +;; Helper for emitting the `fmv.x.d` instruction. +(decl rv_fmvxd (FReg) XReg) +(rule (rv_fmvxd r) (fpu_rr (FpuOPRR.FmvXD) $I64 r)) + +;; Helper for emitting the `fmv.w.x` instruction. +(decl rv_fmvwx (XReg) FReg) +(rule (rv_fmvwx r) (fpu_rr (FpuOPRR.FmvWX) $F32 r)) + +;; Helper for emitting the `fmv.d.x` instruction. +(decl rv_fmvdx (XReg) FReg) +(rule (rv_fmvdx r) (fpu_rr (FpuOPRR.FmvDX) $F64 r)) + +;; Helper for emitting the `fcvt.d.s` ("Float Convert Double to Single") instruction. +(decl rv_fcvtds (FReg) FReg) +(rule (rv_fcvtds rs1) (fpu_rr (FpuOPRR.FcvtDS) $F32 rs1)) + +;; Helper for emitting the `fcvt.s.d` ("Float Convert Single to Double") instruction. +(decl rv_fcvtsd (FReg) FReg) +(rule (rv_fcvtsd rs1) (fpu_rr (FpuOPRR.FcvtSD) $F64 rs1)) + +;; Helper for emitting the `fsgnj` ("Floating Point Sign Injection") instruction. +;; The output of this instruction is `rs1` with the sign bit from `rs2` +;; This implements the `copysign` operation +(decl rv_fsgnj (Type FReg FReg) FReg) +(rule (rv_fsgnj $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjS) $F32 rs1 rs2)) +(rule (rv_fsgnj $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjD) $F64 rs1 rs2)) + +;; Helper for emitting the `fsgnjn` ("Floating Point Sign Injection Negated") instruction. +;; The output of this instruction is `rs1` with the negated sign bit from `rs2` +;; When `rs1 == rs2` this implements the `neg` operation +(decl rv_fsgnjn (Type FReg FReg) FReg) +(rule (rv_fsgnjn $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjnS) $F32 rs1 rs2)) +(rule (rv_fsgnjn $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjnD) $F64 rs1 rs2)) + +;; Helper for emitting the `fneg` ("Floating Point Negate") instruction. +;; This instruction is a mnemonic for `fsgnjn rd, rs1, rs1` +(decl rv_fneg (Type FReg) FReg) +(rule (rv_fneg ty rs1) (rv_fsgnjn ty rs1 rs1)) + +;; Helper for emitting the `fsgnjx` ("Floating Point Sign Injection Exclusive") instruction. +;; The output of this instruction is `rs1` with the XOR of the sign bits from `rs1` and `rs2`. +;; When `rs1 == rs2` this implements `fabs` +(decl rv_fsgnjx (Type FReg FReg) FReg) +(rule (rv_fsgnjx $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjxS) $F32 rs1 rs2)) +(rule (rv_fsgnjx $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjxD) $F64 rs1 rs2)) + +;; Helper for emitting the `fabs` ("Floating Point Absolute") instruction. +;; This instruction is a mnemonic for `fsgnjx rd, rs1, rs1` +(decl rv_fabs (Type FReg) FReg) +(rule (rv_fabs ty rs1) (rv_fsgnjx ty rs1 rs1)) + +;; Helper for emitting the `feq` ("Float Equal") instruction. +(decl rv_feq (Type FReg FReg) XReg) +(rule (rv_feq $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FeqS) $I64 rs1 rs2)) +(rule (rv_feq $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FeqD) $I64 rs1 rs2)) + +;; Helper for emitting the `flt` ("Float Less Than") instruction. +(decl rv_flt (Type FReg FReg) XReg) +(rule (rv_flt $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FltS) $I64 rs1 rs2)) +(rule (rv_flt $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FltD) $I64 rs1 rs2)) + +;; Helper for emitting the `fle` ("Float Less Than or Equal") instruction. +(decl rv_fle (Type FReg FReg) XReg) +(rule (rv_fle $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FleS) $I64 rs1 rs2)) +(rule (rv_fle $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FleD) $I64 rs1 rs2)) + +;; Helper for emitting the `fgt` ("Float Greater Than") instruction. +;; Note: The arguments are reversed +(decl rv_fgt (Type FReg FReg) XReg) +(rule (rv_fgt ty rs1 rs2) (rv_flt ty rs2 rs1)) + +;; Helper for emitting the `fge` ("Float Greater Than or Equal") instruction. +;; Note: The arguments are reversed +(decl rv_fge (Type FReg FReg) XReg) +(rule (rv_fge ty rs1 rs2) (rv_fle ty rs2 rs1)) + + +;; `Zba` Extension Instructions + +;; Helper for emitting the `adduw` ("Add Unsigned Word") instruction. +;; rd ← uext32(rs1) + uext32(rs2) +(decl rv_adduw (XReg XReg) XReg) +(rule (rv_adduw rs1 rs2) + (alu_rrr (AluOPRRR.Adduw) rs1 rs2)) + +;; Helper for emitting the `zext.w` ("Zero Extend Word") instruction. +;; This instruction is a mnemonic for `adduw rd, rs1, zero`. +;; rd ← uext32(rs1) +(decl rv_zextw (XReg) XReg) +(rule (rv_zextw rs1) + (rv_adduw rs1 (zero_reg))) + +;; Helper for emitting the `slli.uw` ("Shift Left Logical Immediate Unsigned Word") instruction. +;; rd ← uext32(rs1) << imm +(decl rv_slliuw (XReg Imm12) XReg) +(rule (rv_slliuw rs1 imm) + (alu_rr_imm12 (AluOPRRI.SlliUw) rs1 imm)) + + +;; `Zbb` Extension Instructions + +;; Helper for emitting the `andn` ("And Negated") instruction. +;; rd ← rs1 ∧ ~(rs2) +(decl rv_andn (XReg XReg) XReg) +(rule (rv_andn rs1 rs2) + (alu_rrr (AluOPRRR.Andn) rs1 rs2)) + +;; Helper for emitting the `orn` ("Or Negated") instruction. +;; rd ← rs1 ∨ ~(rs2) +(decl rv_orn (XReg XReg) XReg) +(rule (rv_orn rs1 rs2) + (alu_rrr (AluOPRRR.Orn) rs1 rs2)) + +;; Helper for emitting the `clz` ("Count Leading Zero Bits") instruction. +(decl rv_clz (XReg) XReg) +(rule (rv_clz rs1) + (alu_rr_funct12 (AluOPRRI.Clz) rs1)) + +;; Helper for emitting the `clzw` ("Count Leading Zero Bits in Word") instruction. +(decl rv_clzw (XReg) XReg) +(rule (rv_clzw rs1) + (alu_rr_funct12 (AluOPRRI.Clzw) rs1)) + +;; Helper for emitting the `ctz` ("Count Trailing Zero Bits") instruction. +(decl rv_ctz (XReg) XReg) +(rule (rv_ctz rs1) + (alu_rr_funct12 (AluOPRRI.Ctz) rs1)) + +;; Helper for emitting the `ctzw` ("Count Trailing Zero Bits in Word") instruction. +(decl rv_ctzw (XReg) XReg) +(rule (rv_ctzw rs1) + (alu_rr_funct12 (AluOPRRI.Ctzw) rs1)) + +;; Helper for emitting the `cpop` ("Count Population") instruction. +(decl rv_cpop (XReg) XReg) +(rule (rv_cpop rs1) + (alu_rr_funct12 (AluOPRRI.Cpop) rs1)) + +;; Helper for emitting the `max` instruction. +(decl rv_max (XReg XReg) XReg) +(rule (rv_max rs1 rs2) + (alu_rrr (AluOPRRR.Max) rs1 rs2)) + +;; Helper for emitting the `sext.b` instruction. +(decl rv_sextb (XReg) XReg) +(rule (rv_sextb rs1) + (alu_rr_imm12 (AluOPRRI.Sextb) rs1 (imm12_const 0))) + +;; Helper for emitting the `sext.h` instruction. +(decl rv_sexth (XReg) XReg) +(rule (rv_sexth rs1) + (alu_rr_imm12 (AluOPRRI.Sexth) rs1 (imm12_const 0))) + +;; Helper for emitting the `zext.h` instruction. +(decl rv_zexth (XReg) XReg) +(rule (rv_zexth rs1) + (alu_rr_imm12 (AluOPRRI.Zexth) rs1 (imm12_const 0))) + +;; Helper for emitting the `rol` ("Rotate Left") instruction. +(decl rv_rol (XReg XReg) XReg) +(rule (rv_rol rs1 rs2) + (alu_rrr (AluOPRRR.Rol) rs1 rs2)) + +;; Helper for emitting the `rolw` ("Rotate Left Word") instruction. +(decl rv_rolw (XReg XReg) XReg) +(rule (rv_rolw rs1 rs2) + (alu_rrr (AluOPRRR.Rolw) rs1 rs2)) + +;; Helper for emitting the `ror` ("Rotate Right") instruction. +(decl rv_ror (XReg XReg) XReg) +(rule (rv_ror rs1 rs2) + (alu_rrr (AluOPRRR.Ror) rs1 rs2)) + +;; Helper for emitting the `rorw` ("Rotate Right Word") instruction. +(decl rv_rorw (XReg XReg) XReg) +(rule (rv_rorw rs1 rs2) + (alu_rrr (AluOPRRR.Rorw) rs1 rs2)) + +;; Helper for emitting the `rev8` ("Byte Reverse") instruction. +(decl rv_rev8 (XReg) XReg) +(rule (rv_rev8 rs1) + (alu_rr_funct12 (AluOPRRI.Rev8) rs1)) + +;; Helper for emitting the `brev8` ("Bit Reverse Inside Bytes") instruction. +;; TODO: This instruction is mentioned in some older versions of the +;; spec, but has since disappeared, we should follow up on this. +;; It probably was renamed to `rev.b` which seems to be the closest match. +(decl rv_brev8 (XReg) XReg) +(rule (rv_brev8 rs1) + (alu_rr_funct12 (AluOPRRI.Brev8) rs1)) + +;; Helper for emitting the `bseti` ("Single-Bit Set Immediate") instruction. +(decl rv_bseti (XReg Imm12) XReg) +(rule (rv_bseti rs1 imm) + (alu_rr_imm12 (AluOPRRI.Bseti) rs1 imm)) + + +;; `Zbkb` Extension Instructions + +;; Helper for emitting the `pack` ("Pack low halves of registers") instruction. +(decl rv_pack (XReg XReg) XReg) +(rule (rv_pack rs1 rs2) + (alu_rrr (AluOPRRR.Pack) rs1 rs2)) + +;; Helper for emitting the `packw` ("Pack low 16-bits of registers") instruction. +(decl rv_packw (XReg XReg) XReg) +(rule (rv_packw rs1 rs2) + (alu_rrr (AluOPRRR.Packw) rs1 rs2)) + + + + +;; Generate a mask for the bit-width of the given type +(decl pure shift_mask (Type) u64) +(rule (shift_mask ty) (u64_sub (ty_bits (lane_type ty)) 1)) + +;; for load immediate +(decl imm (Type u64) Reg) +(extern constructor imm imm) + +;; Imm12 Rules + +(decl pure imm12_zero () Imm12) +(rule + (imm12_zero) + (imm12_const 0)) + +(decl pure imm12_const (i32) Imm12) +(extern constructor imm12_const imm12_const) + +(decl load_imm12 (i32) Reg) +(rule + (load_imm12 x) + (rv_addi (zero_reg) (imm12_const x))) + +;; for load immediate +(decl imm_from_bits (u64) Imm12) +(extern constructor imm_from_bits imm_from_bits) + +(decl imm_from_neg_bits (i64) Imm12) +(extern constructor imm_from_neg_bits imm_from_neg_bits) + +(decl imm12_const_add (i32 i32) Imm12) +(extern constructor imm12_const_add imm12_const_add) + +(decl imm12_and (Imm12 u64) Imm12) +(extern constructor imm12_and imm12_and) + +;; Helper for get negative of Imm12 +(decl neg_imm12 (Imm12) Imm12) +(extern constructor neg_imm12 neg_imm12) + +;; Imm12 Extractors + +;; Helper to go directly from a `Value`, when it's an `iconst`, to an `Imm12`. +(decl imm12_from_value (Imm12) Value) +(extractor + (imm12_from_value n) + (def_inst (iconst (u64_from_imm64 (imm12_from_u64 n))))) + +(decl imm32_from_value (Imm32) Value) +(extractor + (imm32_from_value n) + (def_inst (iconst (u64_from_imm64 (imm32_from_u64 n))))) + +(decl imm12_from_u64 (Imm12) u64) +(extern extractor imm12_from_u64 imm12_from_u64) + +(decl imm32_from_u64 (Imm32) u64) +(extern extractor imm32_from_u64 imm32_from_u64) + +(decl pure partial u64_to_imm12 (u64) Imm12) +(rule (u64_to_imm12 (imm12_from_u64 n)) n) + + +;; Imm5 Extractors + +(decl imm5_from_u64 (Imm5) u64) +(extern extractor imm5_from_u64 imm5_from_u64) + +;; Construct a Imm5 from an i8 +(decl pure partial imm5_from_i8 (i8) Imm5) +(extern constructor imm5_from_i8 imm5_from_i8) + +;; Extractor that matches a `Value` equivalent to a replicated Imm5 on all lanes. +;; TODO(#6527): Try matching vconst here as well +(decl replicated_imm5 (Imm5) Value) +(extractor (replicated_imm5 n) + (def_inst (splat (iconst (u64_from_imm64 (imm5_from_u64 n)))))) + +;; UImm5 Helpers + +;; Extractor that matches a `Value` equivalent to a replicated UImm5 on all lanes. +;; TODO(#6527): Try matching vconst here as well +(decl replicated_uimm5 (UImm5) Value) +(extractor (replicated_uimm5 n) + (def_inst (splat (uimm5_from_value n)))) + +;; Helper to go directly from a `Value`, when it's an `iconst`, to an `UImm5`. +(decl uimm5_from_value (UImm5) Value) +(extractor (uimm5_from_value n) + (iconst (u64_from_imm64 (uimm5_from_u64 n)))) + +;; Extract a `UImm5` from an `u8`. +(decl pure partial uimm5_from_u8 (UImm5) u8) +(extern extractor uimm5_from_u8 uimm5_from_u8) + +;; Extract a `UImm5` from an `u64`. +(decl pure partial uimm5_from_u64 (UImm5) u64) +(extern extractor uimm5_from_u64 uimm5_from_u64) + +;; Convert a `u64` into an `UImm5` +(decl pure partial u64_to_uimm5 (u64) UImm5) +(rule (u64_to_uimm5 (uimm5_from_u64 n)) n) + +(decl uimm5_bitcast_to_imm5 (UImm5) Imm5) +(extern constructor uimm5_bitcast_to_imm5 uimm5_bitcast_to_imm5) + +;; Float Helpers + +;; Returns the bitpattern of the Canonical NaN for the given type. +(decl pure canonical_nan_u64 (Type) u64) +(rule (canonical_nan_u64 $F32) 0x7fc00000) +(rule (canonical_nan_u64 $F64) 0x7ff8000000000000) + +(decl gen_default_frm () OptionFloatRoundingMode) +(extern constructor gen_default_frm gen_default_frm) + +;; Helper for emitting `MInst.FpuRR` instructions. +(decl fpu_rr (FpuOPRR Type Reg) Reg) +(rule (fpu_rr op ty src) + (let ((dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.FpuRR op (gen_default_frm) dst src)))) + dst)) + +;; Helper for emitting `MInst.AluRRR` instructions. +(decl alu_rrr (AluOPRRR Reg Reg) Reg) +(rule (alu_rrr op src1 src2) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.AluRRR op dst src1 src2)))) + dst)) + + +(decl pack_float_rounding_mode (FRM) OptionFloatRoundingMode) +(extern constructor pack_float_rounding_mode pack_float_rounding_mode) + +;; Helper for emitting `MInst.AluRRR` instructions. +(decl fpu_rrr (FpuOPRRR Type Reg Reg) Reg) +(rule (fpu_rrr op ty src1 src2) + (let ((dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.FpuRRR op (gen_default_frm) dst src1 src2)))) + dst)) + + +;; Helper for emitting `MInst.FpuRRRR` instructions. +(decl fpu_rrrr (FpuOPRRRR Type Reg Reg Reg) Reg) +(rule (fpu_rrrr op ty src1 src2 src3) + (let ((dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.FpuRRRR op (gen_default_frm) dst src1 src2 src3)))) + dst)) + + +;; Helper for emitting `MInst.AluRRImm12` instructions. +(decl alu_rr_imm12 (AluOPRRI Reg Imm12) Reg) +(rule (alu_rr_imm12 op src imm) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.AluRRImm12 op dst src imm)))) + dst)) + +;; some instruction use imm12 as funct12. +;; so we don't need the imm12 paramter. +(decl alu_rr_funct12 (AluOPRRI Reg) Reg) +(rule (alu_rr_funct12 op src) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.AluRRImm12 op dst src (imm12_zero))))) + dst)) + +(decl select_addi (Type) AluOPRRI) +(rule 1 (select_addi (fits_in_32 ty)) (AluOPRRI.Addiw)) +(rule (select_addi (fits_in_64 ty)) (AluOPRRI.Addi)) + + +(decl gen_bnot (Type ValueRegs) ValueRegs) +(rule 2 (gen_bnot (ty_scalar_float ty) x) + (let ((val FReg (value_regs_get x 0)) + (x_val XReg (move_f_to_x val ty)) + (inverted XReg (rv_not x_val)) + (res FReg (move_x_to_f inverted (float_int_of_same_size ty)))) + (value_reg res))) + +(rule 1 (gen_bnot $I128 x) + (let ((lo XReg (rv_not (value_regs_get x 0))) + (hi XReg (rv_not (value_regs_get x 1)))) + (value_regs lo hi))) + +(rule 0 (gen_bnot (ty_int_ref_scalar_64 _) x) + (rv_not (value_regs_get x 0))) + + +(decl gen_and (Type ValueRegs ValueRegs) ValueRegs) +(rule 1 (gen_and $I128 x y) + (value_regs + (rv_and (value_regs_get x 0) (value_regs_get y 0)) + (rv_and (value_regs_get x 1) (value_regs_get y 1)))) + +(rule 0 (gen_and (fits_in_64 _) x y) + (rv_and (value_regs_get x 0) (value_regs_get y 0))) + + +(decl gen_andi (XReg u64) XReg) +(rule 1 (gen_andi x (imm12_from_u64 y)) + (rv_andi x y)) + +(rule 0 (gen_andi x y) + (rv_and x (imm $I64 y))) + + +(decl gen_or (Type ValueRegs ValueRegs) ValueRegs) +(rule 1 (gen_or $I128 x y) + (value_regs + (rv_or (value_regs_get x 0) (value_regs_get y 0)) + (rv_or (value_regs_get x 1) (value_regs_get y 1)))) + +(rule 0 (gen_or (fits_in_64 _) x y) + (rv_or (value_regs_get x 0) (value_regs_get y 0))) + + + +(decl gen_bswap (Type XReg) XReg) + +;; This is only here to make the rule below work. bswap.i8 isn't valid +(rule 0 (gen_bswap $I8 x) x) + +(rule 1 (gen_bswap (ty_int_ref_16_to_64 ty) x) + (if-let half_ty (ty_half_width ty)) + (if-let half_size (u64_to_imm12 (ty_bits half_ty))) + (let (;; This swaps the top bytes and zeroes the bottom bytes, so that + ;; we can or it with the bottom bytes later. + (swap_top XReg (gen_bswap half_ty x)) + (top XReg (rv_slli swap_top half_size)) + + ;; Get the top half, swap it, and zero extend it so we can `or` it + ;; with the bottom half. + (shifted XReg (rv_srli x half_size)) + (swap_bot XReg (gen_bswap half_ty shifted)) + (bot XReg (zext swap_bot half_ty $I64))) + (rv_or top bot))) + +;; With `zbb` we can use `rev8` and shift the result +(rule 2 (gen_bswap (int_fits_in_32 ty) x) + (if-let $true (has_zbb)) + (if-let shift_amt (u64_to_imm12 (u64_sub 64 (ty_bits ty)))) + (rv_srli (rv_rev8 x) shift_amt)) + +;; With `zbb` we can use `rev8` that does this +(rule 3 (gen_bswap $I64 x) + (if-let $true (has_zbb)) + (rv_rev8 x)) + + + +(decl lower_bit_reverse (Reg Type) Reg) + +(rule + (lower_bit_reverse r $I8) + (gen_brev8 r $I8)) + +(rule + (lower_bit_reverse r $I16) + (let + ((tmp XReg (gen_brev8 r $I16)) + (tmp2 XReg (gen_rev8 tmp)) + (result XReg (rv_srli tmp2 (imm12_const 48)))) + result)) + +(rule + (lower_bit_reverse r $I32) + (let + ((tmp XReg (gen_brev8 r $I32)) + (tmp2 XReg (gen_rev8 tmp)) + (result XReg (rv_srli tmp2 (imm12_const 32)))) + result)) + +(rule + (lower_bit_reverse r $I64) + (let + ((tmp XReg (gen_rev8 r))) + (gen_brev8 tmp $I64))) + + +(decl lower_ctz (Type Reg) Reg) +(rule (lower_ctz ty x) + (gen_cltz $false x ty)) + +(rule 1 (lower_ctz (fits_in_16 ty) x) + (if-let $true (has_zbb)) + (let ((tmp Reg (gen_bseti x (ty_bits ty)))) + (rv_ctzw tmp))) + +(rule 2 (lower_ctz $I32 x) + (if-let $true (has_zbb)) + (rv_ctzw x)) + +(rule 2 (lower_ctz $I64 x) + (if-let $true (has_zbb)) + (rv_ctz x)) + +;; Count trailing zeros from a i128 bit value. +;; We count both halves separately and conditionally add them if it makes sense. +(decl lower_ctz_128 (ValueRegs) ValueRegs) +(rule (lower_ctz_128 x) + (let ((x_lo XReg (value_regs_get x 0)) + (x_hi XReg (value_regs_get x 1)) + ;; Count both halves + (high XReg (lower_ctz $I64 x_hi)) + (low XReg (lower_ctz $I64 x_lo)) + ;; Only add the top half if the bottom is zero + (high XReg (gen_select_reg (IntCC.Equal) x_lo (zero_reg) high (zero_reg))) + (result XReg (rv_add low high))) + (extend result (ExtendOp.Zero) $I64 $I128))) + +(decl lower_clz (Type XReg) XReg) +(rule (lower_clz ty rs) + (gen_cltz $true rs ty)) + +(rule 1 (lower_clz (fits_in_16 ty) r) + (if-let $true (has_zbb)) + (let ((tmp XReg (zext r ty $I64)) + (count XReg (rv_clz tmp)) + ;; We always do the operation on the full 64-bit register, so subtract 64 from the result. + (result XReg (rv_addi count (imm12_const_add (ty_bits ty) -64)))) + result)) + +(rule 2 (lower_clz $I32 r) + (if-let $true (has_zbb)) + (rv_clzw r)) + +(rule 2 (lower_clz $I64 r) + (if-let $true (has_zbb)) + (rv_clz r)) + + +;; Count leading zeros from a i128 bit value. +;; We count both halves separately and conditionally add them if it makes sense. +(decl lower_clz_i128 (ValueRegs) ValueRegs) +(rule (lower_clz_i128 x) + (let ((x_lo XReg (value_regs_get x 0)) + (x_hi XReg (value_regs_get x 1)) + ;; Count both halves + (high XReg (lower_clz $I64 x_hi)) + (low XReg (lower_clz $I64 x_lo)) + ;; Only add the bottom zeros if the top half is zero + (low XReg (gen_select_reg (IntCC.Equal) x_hi (zero_reg) low (zero_reg))) + (result XReg (rv_add high low))) + (extend result (ExtendOp.Zero) $I64 $I128))) + + +(decl lower_cls (Type XReg) XReg) +(rule (lower_cls ty r) + (let ((tmp XReg (sext r ty $I64)) + (tmp2 XReg (gen_select_reg (IntCC.SignedLessThan) tmp (zero_reg) (rv_not tmp) tmp)) + (tmp3 XReg (lower_clz ty tmp2))) + (rv_addi tmp3 (imm12_const -1)))) + +;; If the sign bit is set, we count the leading zeros of the inverted value. +;; Otherwise we can just count the leading zeros of the original value. +;; Subtract 1 since the sign bit does not count. +(decl lower_cls_i128 (ValueRegs) ValueRegs) +(rule (lower_cls_i128 x) + (let ((low XReg (value_regs_get x 0)) + (high XReg (value_regs_get x 1)) + (low XReg (gen_select_reg (IntCC.SignedLessThan) high (zero_reg) (rv_not low) low)) + (high XReg (gen_select_reg (IntCC.SignedLessThan) high (zero_reg) (rv_not high) high)) + (tmp ValueRegs (lower_clz_i128 (value_regs low high))) + (count XReg (value_regs_get tmp 0)) + (result XReg (rv_addi count (imm12_const -1)))) + (extend result (ExtendOp.Zero) $I64 $I128))) + + +(decl gen_cltz (bool XReg Type) XReg) +(rule (gen_cltz leading rs ty) + (let ((tmp WritableXReg (temp_writable_xreg)) + (step WritableXReg (temp_writable_xreg)) + (sum WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Cltz leading sum step tmp rs ty)))) + sum)) + + +;; Extends an integer if it is smaller than 64 bits. +(decl ext_int_if_need (bool ValueRegs Type) ValueRegs) +;;; For values smaller than 64 bits, we need to extend them to 64 bits +(rule 0 (ext_int_if_need $true val (fits_in_32 (ty_int ty))) + (extend val (ExtendOp.Signed) ty $I64)) +(rule 0 (ext_int_if_need $false val (fits_in_32 (ty_int ty))) + (extend val (ExtendOp.Zero) ty $I64)) +;; If the value is larger than one machine register, we don't need to do anything +(rule 1 (ext_int_if_need _ r $I64) r) +(rule 2 (ext_int_if_need _ r $I128) r) + + +;; Performs a zero extension of the given value +(decl zext (XReg Type Type) XReg) +(rule (zext val from_ty (fits_in_64 to_ty)) (value_regs_get (extend val (ExtendOp.Zero) from_ty to_ty) 0)) + +;; Performs a signed extension of the given value +(decl sext (XReg Type Type) XReg) +(rule (sext val from_ty (fits_in_64 to_ty)) (value_regs_get (extend val (ExtendOp.Signed) from_ty to_ty) 0)) + +(type ExtendOp + (enum + (Zero) + (Signed))) + +;; Performs either a sign or zero extension of the given value +(decl extend (ValueRegs ExtendOp Type Type) ValueRegs) + +;;; Generic Rules Extending to I64 +(decl pure extend_shift_op (ExtendOp) AluOPRRI) +(rule (extend_shift_op (ExtendOp.Zero)) (AluOPRRI.Srli)) +(rule (extend_shift_op (ExtendOp.Signed)) (AluOPRRI.Srai)) + +;; In the most generic case, we shift left and then shift right. +;; The type of right shift is determined by the extend op. +; (rule 0 (extend val extend_op (fits_in_32 from_ty) (fits_in_64 to_ty)) +; (let ((val XReg (value_regs_get val 0)) +; (shift Imm12 (imm_from_bits (u64_sub 64 (ty_bits from_ty)))) +; (left XReg (rv_slli val shift)) +; (shift_op AluOPRRI (extend_shift_op extend_op)) +; (right XReg (alu_rr_imm12 shift_op left shift))) +; right)) + +;; Hacky no-op version. +(rule 0 (extend val extend_op (fits_in_32 from_ty) (fits_in_64 to_ty)) + (let ((right XReg (value_regs_get val 0))) + right)) + +;; If we are zero extending a U8 we can use a `andi` instruction. +(rule 1 (extend val (ExtendOp.Zero) $I8 (fits_in_64 to_ty)) + (let ((val XReg (value_regs_get val 0))) + (rv_andi val (imm12_const 255)))) + +;; When signed extending from 32 to 64 bits we can use a +;; `addiw val 0`. Also known as a `sext.w` +(rule 1 (extend val (ExtendOp.Signed) $I32 $I64) + (let ((val XReg (value_regs_get val 0))) + (rv_sextw val))) + + +;; No point in trying to use `packh` here to zero extend 8 bit values +;; since we can just use `andi` instead which is part of the base ISA. + +;; If we have the `zbkb` extension `packw` can be used to zero extend 16 bit values +(rule 1 (extend val (ExtendOp.Zero) $I16 (fits_in_64 _)) + (if-let $true (has_zbkb)) + (let ((val XReg (value_regs_get val 0))) + (rv_packw val (zero_reg)))) + +;; If we have the `zbkb` extension `pack` can be used to zero extend 32 bit registers +(rule 1 (extend val (ExtendOp.Zero) $I32 $I64) + (if-let $true (has_zbkb)) + (let ((val XReg (value_regs_get val 0))) + (rv_pack val (zero_reg)))) + + +;; If we have the `zbb` extension we can use the dedicated `sext.b` instruction. +(rule 1 (extend val (ExtendOp.Signed) $I8 (fits_in_64 _)) + (if-let $true (has_zbb)) + (let ((val XReg (value_regs_get val 0))) + (rv_sextb val))) + +;; If we have the `zbb` extension we can use the dedicated `sext.h` instruction. +(rule 1 (extend val (ExtendOp.Signed) $I16 (fits_in_64 _)) + (if-let $true (has_zbb)) + (let ((val XReg (value_regs_get val 0))) + (rv_sexth val))) + +;; If we have the `zbb` extension we can use the dedicated `zext.h` instruction. +(rule 2 (extend val (ExtendOp.Zero) $I16 (fits_in_64 _)) + (if-let $true (has_zbb)) + (let ((val XReg (value_regs_get val 0))) + (rv_zexth val))) + +;; With `zba` we have a `zext.w` instruction +(rule 2 (extend val (ExtendOp.Zero) $I32 $I64) + (if-let $true (has_zba)) + (let ((val XReg (value_regs_get val 0))) + (rv_zextw val))) + +;;; Signed rules extending to I128 +;; Extend the bottom part, and extract the sign bit from the bottom as the top +(rule 3 (extend val (ExtendOp.Signed) (fits_in_64 from_ty) $I128) + (let ((val XReg (value_regs_get val 0)) + (low XReg (sext val from_ty $I64)) + (high XReg (rv_srai low (imm12_const 63)))) + (value_regs low high))) + +;;; Unsigned rules extending to I128 +;; Extend the bottom register to I64 and then just zero out the top half. +(rule 3 (extend val (ExtendOp.Zero) (fits_in_64 from_ty) $I128) + (let ((val XReg (value_regs_get val 0)) + (low XReg (zext val from_ty $I64)) + (high XReg (load_u64_constant 0))) + (value_regs low high))) + +;; Catch all rule for ignoring extensions of the same type. +(rule 4 (extend val _ ty ty) val) + + + +(decl lower_b128_binary (AluOPRRR ValueRegs ValueRegs) ValueRegs) +(rule + (lower_b128_binary op a b) + (let + ( ;; low part. + (low XReg (alu_rrr op (value_regs_get a 0) (value_regs_get b 0))) + ;; high part. + (high XReg (alu_rrr op (value_regs_get a 1) (value_regs_get b 1)))) + (value_regs low high))) + +(decl lower_umlhi (Type XReg XReg) XReg) +(rule 1 (lower_umlhi $I64 rs1 rs2) + (rv_mulhu rs1 rs2)) + +(rule (lower_umlhi ty rs1 rs2) + (let + ((tmp XReg (rv_mul (zext rs1 ty $I64) (zext rs2 ty $I64)))) + (rv_srli tmp (imm12_const (ty_bits ty))))) + +(decl lower_smlhi (Type XReg XReg) XReg) +(rule 1 + (lower_smlhi $I64 rs1 rs2) + (rv_mulh rs1 rs2)) + +(rule + (lower_smlhi ty rs1 rs2) + (let + ((tmp XReg (rv_mul rs1 rs2))) + (rv_srli tmp (imm12_const (ty_bits ty))))) + + +(decl lower_rotl (Type XReg XReg) XReg) + +(rule 1 + (lower_rotl $I64 rs amount) + (if-let $true (has_zbb)) + (rv_rol rs amount)) + +(rule + (lower_rotl $I64 rs amount) + (if-let $false (has_zbb)) + (lower_rotl_shift $I64 rs amount)) + +(rule 1 + (lower_rotl $I32 rs amount) + (if-let $true (has_zbb)) + (rv_rolw rs amount)) + +(rule + (lower_rotl $I32 rs amount) + (if-let $false (has_zbb)) + (lower_rotl_shift $I32 rs amount)) + +(rule -1 + (lower_rotl ty rs amount) + (lower_rotl_shift ty rs amount)) + +;;; using shift to implement rotl. +(decl lower_rotl_shift (Type XReg XReg) XReg) + +;;; for I8 and I16 ... +(rule + (lower_rotl_shift ty rs amount) + (let + ((x ValueRegs (gen_shamt ty amount)) + (shamt Reg (value_regs_get x 0)) + (len_sub_shamt Reg (value_regs_get x 1)) + ;; + (part1 Reg (rv_sll rs shamt)) + ;; + (part2 Reg (rv_srl rs len_sub_shamt)) + (part3 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) part2))) + (rv_or part1 part3))) + + +;;;; construct shift amount.rotl on i128 will use shift to implement. So can call this function. +;;;; this will return shift amount and (ty_bits - "shift amount") +;;;; if ty_bits is greater than 64 like i128, then shmat will fallback to 64.because We are 64 bit platform. +(decl gen_shamt (Type XReg) ValueRegs) +(extern constructor gen_shamt gen_shamt) + +(decl lower_rotr (Type XReg XReg) XReg) + +(rule 1 + (lower_rotr $I64 rs amount) + (if-let $true (has_zbb)) + (rv_ror rs amount)) +(rule + (lower_rotr $I64 rs amount) + (if-let $false (has_zbb)) + (lower_rotr_shift $I64 rs amount)) + +(rule 1 + (lower_rotr $I32 rs amount) + (if-let $true (has_zbb)) + (rv_rorw rs amount)) + +(rule + (lower_rotr $I32 rs amount) + (if-let $false (has_zbb)) + (lower_rotr_shift $I32 rs amount)) + +(rule -1 + (lower_rotr ty rs amount) + (lower_rotr_shift ty rs amount)) + +(decl lower_rotr_shift (Type XReg XReg) XReg) + +;;; +(rule + (lower_rotr_shift ty rs amount) + (let + ((x ValueRegs (gen_shamt ty amount)) + (shamt XReg (value_regs_get x 0)) + (len_sub_shamt XReg (value_regs_get x 1)) + ;; + (part1 XReg (rv_srl rs shamt)) + ;; + (part2 XReg (rv_sll rs len_sub_shamt)) + ;; + (part3 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) part2))) + (rv_or part1 part3))) + + + +;; bseti: Set a single bit in a register, indexed by a constant. +(decl gen_bseti (Reg u64) Reg) +(rule (gen_bseti val bit) + (if-let $false (has_zbs)) + (if-let $false (u64_le bit 12)) + (let ((const XReg (load_u64_constant (u64_shl 1 bit)))) + (rv_or val const))) + +(rule (gen_bseti val bit) + (if-let $false (has_zbs)) + (if-let $true (u64_le bit 12)) + (rv_ori val (imm12_const (u64_as_i32 (u64_shl 1 bit))))) + +(rule (gen_bseti val bit) + (if-let $true (has_zbs)) + (rv_bseti val (imm12_const (u64_as_i32 bit)))) + + +(decl gen_popcnt (Reg Type) Reg) +(rule + (gen_popcnt rs ty) + (let + ((tmp WritableXReg (temp_writable_xreg)) + (step WritableXReg (temp_writable_xreg)) + (sum WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Popcnt sum step tmp rs ty)))) + (writable_reg_to_reg sum))) + +(decl lower_popcnt (XReg Type) XReg) +(rule 1 (lower_popcnt rs ty) + (if-let $true (has_zbb)) + (rv_cpop (zext rs ty $I64))) + +(rule (lower_popcnt rs ty) + (if-let $false (has_zbb)) + (gen_popcnt rs ty)) + +(decl lower_popcnt_i128 (ValueRegs) ValueRegs) +(rule + (lower_popcnt_i128 a) + (let + ( ;; low part. + (low XReg (lower_popcnt (value_regs_get a 0) $I64)) + ;; high part. + (high XReg (lower_popcnt (value_regs_get a 1) $I64)) + ;; add toghter. + (result XReg (rv_add low high))) + (value_regs result (load_u64_constant 0)))) + +(decl lower_i128_rotl (ValueRegs ValueRegs) ValueRegs) +(rule + (lower_i128_rotl x y) + (let + ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) + (shamt XReg (value_regs_get tmp 0)) + (len_sub_shamt XReg (value_regs_get tmp 1)) + ;; + (low_part1 XReg (rv_sll (value_regs_get x 0) shamt)) + (low_part2 XReg (rv_srl (value_regs_get x 1) len_sub_shamt)) + ;;; if shamt == 0 low_part2 will overflow we should zero instead. + (low_part3 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part2)) + (low XReg (rv_or low_part1 low_part3)) + ;; + (high_part1 XReg (rv_sll (value_regs_get x 1) shamt)) + (high_part2 XReg (rv_srl (value_regs_get x 0) len_sub_shamt)) + (high_part3 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) high_part2)) + (high XReg (rv_or high_part1 high_part3)) + ;; + (const64 XReg (load_u64_constant 64)) + (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127)))) + ;; right now we only rotate less than 64 bits. + ;; if shamt is greater than or equal 64 , we should switch low and high. + (value_regs + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high) + ))) + + +(decl lower_i128_rotr (ValueRegs ValueRegs) ValueRegs) +(rule + (lower_i128_rotr x y) + (let + ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) + (shamt XReg (value_regs_get tmp 0)) + (len_sub_shamt XReg (value_regs_get tmp 1)) + ;; + (low_part1 XReg (rv_srl (value_regs_get x 0) shamt)) + (low_part2 XReg (rv_sll (value_regs_get x 1) len_sub_shamt)) + ;;; if shamt == 0 low_part2 will overflow we should zero instead. + (low_part3 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part2)) + (low XReg (rv_or low_part1 low_part3)) + ;; + (high_part1 XReg (rv_srl (value_regs_get x 1) shamt)) + (high_part2 XReg (rv_sll (value_regs_get x 0) len_sub_shamt)) + (high_part3 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) high_part2)) + (high XReg (rv_or high_part1 high_part3)) + + ;; + (const64 XReg (load_u64_constant 64)) + (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127)))) + ;; right now we only rotate less than 64 bits. + ;; if shamt is greater than or equal 64 , we should switch low and high. + (value_regs + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high) + ))) + +(decl gen_amode (Reg Offset32 Type) AMode) +(extern constructor gen_amode gen_amode) + +;; Generates a AMode that points to a constant in the constant pool. +(decl gen_const_amode (VCodeConstant) AMode) +(extern constructor gen_const_amode gen_const_amode) + +(decl offset32_imm (i32) Offset32) +(extern constructor offset32_imm offset32_imm) + +;; helper function to load from memory. +(decl gen_load (Reg Offset32 LoadOP MemFlags Type) Reg) +(rule + (gen_load p offset op flags ty) + (let + ((tmp WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.Load tmp op flags (gen_amode p offset $I64))))) + tmp)) + +(decl gen_load_128 (Reg Offset32 MemFlags) ValueRegs) +(rule + (gen_load_128 p offset flags) + (let + ((low Reg (gen_load p offset (LoadOP.Ld) flags $I64)) + (high Reg (gen_load p (offset32_add offset 8) (LoadOP.Ld) flags $I64))) + (value_regs low high))) + +(decl default_memflags () MemFlags) +(extern constructor default_memflags default_memflags) + +(decl offset32_add (Offset32 i64) Offset32) +(extern constructor offset32_add offset32_add) + +;; helper function to store to memory. +(decl gen_store (Reg Offset32 StoreOP MemFlags Reg) InstOutput) +(rule + (gen_store base offset op flags src) + (side_effect (SideEffectNoResult.Inst (MInst.Store (gen_amode base offset $I64) op flags src))) +) + +(decl gen_store_128 (Reg Offset32 MemFlags ValueRegs) InstOutput) +(rule + (gen_store_128 p offset flags src) + (side_effect + (SideEffectNoResult.Inst2 + (MInst.Store (gen_amode p offset $I64) (StoreOP.Sd) flags (value_regs_get src 0)) + (MInst.Store (gen_amode p (offset32_add offset 8) $I64) (StoreOP.Sd) flags (value_regs_get src 1))))) + +(decl valid_atomic_transaction (Type) Type) +(extern extractor valid_atomic_transaction valid_atomic_transaction) + +;;helper function. +;;construct an atomic instruction. +(decl gen_atomic (AtomicOP Reg Reg AMO) Reg) +(rule + (gen_atomic op addr src amo) + (let + ((tmp WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Atomic op tmp addr src amo)))) + tmp)) + +;; helper function +(decl get_atomic_rmw_op (Type AtomicRmwOp) AtomicOP) +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Add)) + (AtomicOP.AmoaddW)) +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Add)) + (AtomicOP.AmoaddD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.And)) + (AtomicOP.AmoandW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.And)) + (AtomicOP.AmoandD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Or)) + (AtomicOP.AmoorW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Or)) + (AtomicOP.AmoorD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Smax)) + (AtomicOP.AmomaxW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Smax)) + (AtomicOP.AmomaxD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Smin)) + (AtomicOP.AmominW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Smin)) + (AtomicOP.AmominD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Umax)) + (AtomicOP.AmomaxuW) +) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Umax)) + (AtomicOP.AmomaxuD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Umin)) + (AtomicOP.AmominuW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Umin)) + (AtomicOP.AmominuD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Xchg)) + (AtomicOP.AmoswapW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Xchg)) + (AtomicOP.AmoswapD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Xor)) + (AtomicOP.AmoxorW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Xor)) + (AtomicOP.AmoxorD)) + +(decl atomic_amo () AMO) +(extern constructor atomic_amo atomic_amo) + + +(decl gen_atomic_load (Reg Type) Reg) +(rule + (gen_atomic_load p ty) + (let + ((tmp WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.AtomicLoad tmp ty p)))) + (writable_reg_to_reg tmp))) + +;;; +(decl gen_atomic_store (Reg Type Reg) InstOutput) +(rule + (gen_atomic_store p ty src) + (side_effect (SideEffectNoResult.Inst (MInst.AtomicStore src ty p))) +) + + +(decl gen_stack_addr (StackSlot Offset32) Reg) +(extern constructor gen_stack_addr gen_stack_addr) + +;; +(decl gen_select (Type Reg ValueRegs ValueRegs) ValueRegs) +(rule + (gen_select ty c x y) + (let + ((dst VecWritableReg (alloc_vec_writable ty)) + ;; + (reuslt VecWritableReg (vec_writable_clone dst)) + (_ Unit (emit (MInst.Select dst ty c x y)))) + (vec_writable_to_regs reuslt))) + +;; Parameters are "intcc compare_a compare_b rs1 rs2". +(decl gen_select_reg (IntCC XReg XReg Reg Reg) Reg) +(extern constructor gen_select_reg gen_select_reg) + +;; load a constant into reg. +(decl load_u64_constant (u64) Reg) +(extern constructor load_u64_constant load_u64_constant) + +;;; clone WritableReg +;;; if not rust compiler will complain about use moved value. +(decl vec_writable_clone (VecWritableReg) VecWritableReg) +(extern constructor vec_writable_clone vec_writable_clone) + +(decl vec_writable_to_regs (VecWritableReg) ValueRegs) +(extern constructor vec_writable_to_regs vec_writable_to_regs) + +(decl alloc_vec_writable (Type) VecWritableReg) +(extern constructor alloc_vec_writable alloc_vec_writable) + +(decl gen_int_select (Type IntSelectOP ValueRegs ValueRegs) ValueRegs) +(rule + (gen_int_select ty op x y) + (let + ( ;;; + (dst VecWritableReg (alloc_vec_writable ty)) + ;;; + (_ Unit (emit (MInst.IntSelect op (vec_writable_clone dst) x y ty)))) + (vec_writable_to_regs dst))) + +(decl udf (TrapCode) InstOutput) +(rule + (udf code) + (side_effect (SideEffectNoResult.Inst (MInst.Udf code)))) + +(decl load_op (Type) LoadOP) +(extern constructor load_op load_op) + +(decl store_op (Type) StoreOP) +(extern constructor store_op store_op) + +;; bool is "is_signed" +(decl int_load_op (bool u8) LoadOP) +(rule + (int_load_op $false 8) + (LoadOP.Lbu)) + +(rule + (int_load_op $true 8) + (LoadOP.Lb)) + +(rule + (int_load_op $false 16) + (LoadOP.Lhu)) +(rule + (int_load_op $true 16) + (LoadOP.Lh)) +(rule + (int_load_op $false 32) + (LoadOP.Lwu)) +(rule + (int_load_op $true 32) + (LoadOP.Lw)) + +(rule + (int_load_op _ 64) + (LoadOP.Ld)) + +;;;; load extern name +(decl load_ext_name (ExternalName i64) Reg) +(extern constructor load_ext_name load_ext_name) + +(decl int_convert_2_float_op (Type bool Type) FpuOPRR) +(extern constructor int_convert_2_float_op int_convert_2_float_op) + +;;;; +(decl gen_fcvt_int (bool FReg bool Type Type) XReg) +(rule + (gen_fcvt_int is_sat rs is_signed in_type out_type) + (let + ((result WritableReg (temp_writable_reg out_type)) + (tmp WritableFReg (temp_writable_freg)) + (_ Unit (emit (MInst.FcvtToInt is_sat result tmp rs is_signed in_type out_type)))) + (writable_reg_to_reg result))) + +;;; some float binary operation +;;; 1. need move into x reister. +;;; 2. do the operation. +;;; 3. move back. +(decl lower_float_binary (AluOPRRR FReg FReg Type) FReg) +(rule + (lower_float_binary op rs1 rs2 ty) + (let ((x_rs1 XReg (move_f_to_x rs1 ty)) + (x_rs2 XReg (move_f_to_x rs2 ty)) + (tmp XReg (alu_rrr op x_rs1 x_rs2))) + (move_x_to_f tmp (float_int_of_same_size ty)))) + + +;;; lower icmp +(decl lower_icmp (IntCC ValueRegs ValueRegs Type) Reg) +(rule 1 (lower_icmp cc x y ty) + (if (signed_cond_code cc)) + (gen_icmp cc (ext_int_if_need $true x ty) (ext_int_if_need $true y ty) ty)) +(rule (lower_icmp cc x y ty) + (gen_icmp cc (ext_int_if_need $false x ty) (ext_int_if_need $false y ty) ty)) + + +(decl i128_sub (ValueRegs ValueRegs) ValueRegs) +(rule + (i128_sub x y ) + (let + (;; low part. + (low XReg (rv_sub (value_regs_get x 0) (value_regs_get y 0))) + ;; compute borrow. + (borrow XReg (rv_sltu (value_regs_get x 0) low)) + ;; + (high_tmp XReg (rv_sub (value_regs_get x 1) (value_regs_get y 1))) + ;; + (high XReg (rv_sub high_tmp borrow))) + (value_regs low high))) + + +;;; Returns the sum in the first register, and the overflow test in the second. +(decl lower_uadd_overflow (XReg XReg Type) ValueRegs) + +(rule 1 + (lower_uadd_overflow x y $I64) + (let ((tmp XReg (rv_add x y)) + (test XReg (gen_icmp (IntCC.UnsignedLessThan) tmp x $I64))) + (value_regs tmp test))) + +(rule + (lower_uadd_overflow x y (fits_in_32 ty)) + (let ((tmp_x XReg (zext x ty $I64)) + (tmp_y XReg (zext y ty $I64)) + (sum XReg (rv_add tmp_x tmp_y)) + (test XReg (rv_srli sum (imm12_const (ty_bits ty))))) + (value_regs sum test))) + +(decl label_to_br_target (MachLabel) BranchTarget) +(extern constructor label_to_br_target label_to_br_target) + +(decl gen_jump (MachLabel) MInst) +(rule + (gen_jump v) + (MInst.Jal (label_to_br_target v))) + +(decl vec_label_get (VecMachLabel u8) MachLabel ) +(extern constructor vec_label_get vec_label_get) + +(decl partial lower_branch (Inst VecMachLabel) Unit) +(rule (lower_branch (jump _) targets ) + (emit_side_effect (SideEffectNoResult.Inst (gen_jump (vec_label_get targets 0))))) + +;;; cc a b targets Type +(decl lower_br_icmp (IntCC ValueRegs ValueRegs VecMachLabel Type) Unit) +(extern constructor lower_br_icmp lower_br_icmp) + +;; int scalar zero regs. +(decl int_zero_reg (Type) ValueRegs) +(extern constructor int_zero_reg int_zero_reg) + +(decl lower_cond_br (IntCC ValueRegs VecMachLabel Type) Unit) +(extern constructor lower_cond_br lower_cond_br) + +(decl intcc_to_extend_op (IntCC) ExtendOp) +(extern constructor intcc_to_extend_op intcc_to_extend_op) + +;; Normalize a value for comparision. +;; +;; This ensures that types smaller than a register don't accidentally +;; pass undefined high bits when being compared as a full register. +(decl normalize_cmp_value (Type ValueRegs ExtendOp) ValueRegs) + +(rule 1 (normalize_cmp_value (fits_in_32 ity) r op) + (extend r op ity $I64)) + +(rule (normalize_cmp_value $I64 r _) r) +(rule (normalize_cmp_value $I128 r _) r) + +(decl normalize_fcvt_from_int (XReg Type ExtendOp) XReg) +(rule 2 (normalize_fcvt_from_int r (fits_in_16 ty) op) + (value_regs_get (extend r op ty $I64) 0)) +(rule 1 (normalize_fcvt_from_int r _ _) + r) + +;; Convert a truthy value, possibly of more than one register (an +;; I128), to one register. If narrower than 64 bits, must have already +;; been masked (e.g. by `normalize_cmp_value`). +(decl truthy_to_reg (Type ValueRegs) XReg) +(rule 1 (truthy_to_reg (fits_in_64 _) regs) + (value_regs_get regs 0)) +(rule 0 (truthy_to_reg $I128 regs) + (let ((lo XReg (value_regs_get regs 0)) + (hi XReg (value_regs_get regs 1))) + (rv_or lo hi))) + +;; Default behavior for branching based on an input value. +(rule + (lower_branch (brif v @ (value_type ty) _ _) targets) + (lower_cond_br (IntCC.NotEqual) (normalize_cmp_value ty v (ExtendOp.Zero)) targets ty)) + +;; Special case for SI128 to reify the comparison value and branch on it. +(rule 2 + (lower_branch (brif v @ (value_type $I128) _ _) targets) + (let ((zero ValueRegs (value_regs (zero_reg) (zero_reg))) + (cmp XReg (gen_icmp (IntCC.NotEqual) v zero $I128))) + (lower_cond_br (IntCC.NotEqual) cmp targets $I64))) + +;; Branching on the result of an icmp +(rule 1 + (lower_branch (brif (maybe_uextend (icmp cc a @ (value_type ty) b)) _ _) targets) + (lower_br_icmp cc a b targets ty)) + +;; Branching on the result of an fcmp +(rule 1 + (lower_branch (brif (maybe_uextend (fcmp cc a @ (value_type ty) b)) _ _) targets) + (if-let $true (floatcc_unordered cc)) + (let ((then BranchTarget (label_to_br_target (vec_label_get targets 0))) + (else BranchTarget (label_to_br_target (vec_label_get targets 1)))) + (emit_side_effect (cond_br (emit_fcmp (floatcc_complement cc) ty a b) else then)))) + +(rule 1 + (lower_branch (brif (maybe_uextend (fcmp cc a @ (value_type ty) b)) _ _) targets) + (if-let $false (floatcc_unordered cc)) + (let ((then BranchTarget (label_to_br_target (vec_label_get targets 0))) + (else BranchTarget (label_to_br_target (vec_label_get targets 1)))) + (emit_side_effect (cond_br (emit_fcmp cc ty a b) then else)))) + +;;; +(decl lower_br_table (Reg VecMachLabel) Unit) +(extern constructor lower_br_table lower_br_table) + +(rule + (lower_branch (br_table index _) targets) + (lower_br_table index targets)) + +(decl load_ra () Reg) +(extern constructor load_ra load_ra) + + +;; Generates a bitcast instruction. +;; Args are: src, src_ty, dst_ty +(decl gen_bitcast (Reg Type Type) Reg) +(rule 1 (gen_bitcast r $F32 $I32) (rv_fmvxw r)) +(rule 1 (gen_bitcast r $F64 $I64) (rv_fmvxd r)) +(rule 1 (gen_bitcast r $I32 $F32) (rv_fmvwx r)) +(rule 1 (gen_bitcast r $I64 $F64) (rv_fmvdx r)) +(rule (gen_bitcast r _ _) r) + +(decl move_f_to_x (FReg Type) XReg) +(rule (move_f_to_x r $F32) (gen_bitcast r $F32 $I32)) +(rule (move_f_to_x r $F64) (gen_bitcast r $F64 $I64)) + +(decl move_x_to_f (XReg Type) FReg) +(rule (move_x_to_f r $I32) (gen_bitcast r $I32 $F32)) +(rule (move_x_to_f r $I64) (gen_bitcast r $I64 $F64)) + +(decl float_int_of_same_size (Type) Type) +(rule (float_int_of_same_size $F32) $I32) +(rule (float_int_of_same_size $F64) $I64) + + +(decl gen_rev8 (XReg) XReg) +(rule 1 + (gen_rev8 rs) + (if-let $true (has_zbb)) + (rv_rev8 rs)) + +(rule + (gen_rev8 rs) + (if-let $false (has_zbb)) + (let + ((rd WritableXReg (temp_writable_xreg)) + (tmp WritableXReg (temp_writable_xreg)) + (step WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Rev8 rs step tmp rd)))) + (writable_reg_to_reg rd))) + + +(decl gen_brev8 (Reg Type) Reg) +(rule 1 + (gen_brev8 rs _) + (if-let $true (has_zbkb)) + (rv_brev8 rs)) +(rule + (gen_brev8 rs ty) + (if-let $false (has_zbkb)) + (let + ((tmp WritableXReg (temp_writable_xreg)) + (tmp2 WritableXReg (temp_writable_xreg)) + (step WritableXReg (temp_writable_xreg)) + (rd WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Brev8 rs ty step tmp tmp2 rd)))) + (writable_reg_to_reg rd))) + +;; Negates x +;; Equivalent to 0 - x +(decl neg (Type ValueRegs) ValueRegs) +(rule 1 (neg (fits_in_64 (ty_int ty)) val) + (value_reg + (rv_neg (value_regs_get val 0)))) + +(rule 2 (neg $I128 val) + (i128_sub (value_regs_zero) val)) + + +;; Selects the greatest of two registers as signed values. +(decl max (Type XReg XReg) XReg) +(rule (max (fits_in_64 (ty_int ty)) x y) + (if-let $true (has_zbb)) + (rv_max x y)) + +(rule (max (fits_in_64 (ty_int ty)) x y) + (if-let $false (has_zbb)) + (gen_select_reg (IntCC.SignedGreaterThan) x y x y)) + + +(decl gen_trapif (XReg TrapCode) InstOutput) +(rule + (gen_trapif test trap_code) + (side_effect (SideEffectNoResult.Inst (MInst.TrapIf test trap_code)))) + +(decl gen_trapifc (IntCC XReg XReg TrapCode) InstOutput) +(rule + (gen_trapifc cc a b trap_code) + (side_effect (SideEffectNoResult.Inst (MInst.TrapIfC a b cc trap_code)))) + +(decl shift_int_to_most_significant (XReg Type) XReg) +(extern constructor shift_int_to_most_significant shift_int_to_most_significant) + +;;; generate div overflow. +(decl gen_div_overflow (XReg XReg Type) InstOutput) +(rule + (gen_div_overflow rs1 rs2 ty) + (let + ((r_const_neg_1 XReg (load_imm12 -1)) + (r_const_min XReg (rv_slli (load_imm12 1) (imm12_const 63))) + (tmp_rs1 XReg (shift_int_to_most_significant rs1 ty)) + (t1 XReg (gen_icmp (IntCC.Equal) r_const_neg_1 rs2 ty)) + (t2 XReg (gen_icmp (IntCC.Equal) r_const_min tmp_rs1 ty)) + (test XReg (rv_and t1 t2))) + (gen_trapif test (TrapCode.IntegerOverflow)))) + +(decl gen_div_by_zero (XReg) InstOutput) +(rule + (gen_div_by_zero r) + (gen_trapifc (IntCC.Equal) (zero_reg) r (TrapCode.IntegerDivisionByZero))) + +;;;; Helpers for Emitting Calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl gen_call (SigRef ExternalName RelocDistance ValueSlice) InstOutput) +(extern constructor gen_call gen_call) + +(decl gen_call_indirect (SigRef Value ValueSlice) InstOutput) +(extern constructor gen_call_indirect gen_call_indirect) + +;;; this is trying to imitate aarch64 `madd` instruction. +(decl madd (XReg XReg XReg) XReg) +(rule + (madd n m a) + (let + ((t XReg (rv_mul n m))) + (rv_add t a))) + +;;;; Helpers for bmask ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl lower_bmask (Type Type ValueRegs) ValueRegs) + +;; Produces -1 if the 64-bit value is non-zero, and 0 otherwise. +;; If the type is smaller than 64 bits, we need to mask off the +;; high bits. +(rule + 0 + (lower_bmask (fits_in_64 _) (fits_in_64 in_ty) val) + (let ((input XReg (truthy_to_reg in_ty (normalize_cmp_value in_ty val (ExtendOp.Zero)))) + (non_zero XReg (rv_snez input))) + (value_reg (rv_neg non_zero)))) + +;; Bitwise-or the two registers that make up the 128-bit value, then recurse as +;; though it was a 64-bit value. +(rule + 1 + (lower_bmask (fits_in_64 ty) $I128 val) + (let ((lo XReg (value_regs_get val 0)) + (hi XReg (value_regs_get val 1)) + (combined XReg (rv_or lo hi))) + (lower_bmask ty $I64 (value_reg combined)))) + +;; Conversion of one 64-bit value to a 128-bit one. Duplicate the result of the +;; bmask of the 64-bit value into both result registers of the i128. +(rule + 2 + (lower_bmask $I128 (fits_in_64 in_ty) val) + (let ((res ValueRegs (lower_bmask $I64 in_ty val))) + (value_regs (value_regs_get res 0) (value_regs_get res 0)))) + +;; Conversion of one 64-bit value to a 128-bit one. Duplicate the result of +;; bmasking the 128-bit value to a 64-bit value into both registers of the +;; 128-bit result. +(rule + 3 + (lower_bmask $I128 $I128 val) + (let ((res ValueRegs (lower_bmask $I64 $I128 val))) + (value_regs (value_regs_get res 0) (value_regs_get res 0)))) + + +;;;; Helpers for physical registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl gen_mov_from_preg (PReg) Reg) + +(rule + (gen_mov_from_preg rm) + (let ((rd WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.MovFromPReg rd rm)))) + rd)) + +(decl fp_reg () PReg) +(extern constructor fp_reg fp_reg) + +(decl sp_reg () PReg) +(extern constructor sp_reg sp_reg) + +;; Helper for creating the zero register. +(decl zero_reg () Reg) +(extern constructor zero_reg zero_reg) + +(decl value_regs_zero () ValueRegs) +(rule (value_regs_zero) + (value_regs (imm $I64 0) (imm $I64 0))) + +(decl writable_zero_reg () WritableReg) +(extern constructor writable_zero_reg writable_zero_reg) + + +;;;; Helpers for floating point comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl not (XReg) XReg) +(rule (not x) (rv_xori x (imm_from_bits 1))) + +(decl is_not_nan (Type FReg) XReg) +(rule (is_not_nan ty a) (rv_feq ty a a)) + +(decl ordered (Type FReg FReg) XReg) +(rule (ordered ty a b) (rv_and (is_not_nan ty a) (is_not_nan ty b))) + +(type CmpResult (enum + (Result + (result XReg) + (invert bool)))) + +;; Wrapper for the common case when constructing comparison results. It assumes +;; that the result isn't negated. +(decl cmp_result (XReg) CmpResult) +(rule (cmp_result result) (CmpResult.Result result $false)) + +;; Wrapper for the case where it's more convenient to construct the negated +;; version of the comparison. +(decl cmp_result_invert (XReg) CmpResult) +(rule (cmp_result_invert result) (CmpResult.Result result $true)) + +;; Consume a CmpResult, producing a branch on its result. +(decl cond_br (CmpResult BranchTarget BranchTarget) SideEffectNoResult) +(rule (cond_br cmp then else) + (SideEffectNoResult.Inst + (MInst.CondBr then else (cmp_integer_compare cmp)))) + +;; Construct an IntegerCompare value. +(decl int_compare (IntCC XReg XReg) IntegerCompare) +(extern constructor int_compare int_compare) + +;; Convert a comparison into a branch test. +(decl cmp_integer_compare (CmpResult) IntegerCompare) + +(rule + (cmp_integer_compare (CmpResult.Result res $false)) + (int_compare (IntCC.NotEqual) res (zero_reg))) + +(rule + (cmp_integer_compare (CmpResult.Result res $true)) + (int_compare (IntCC.Equal) res (zero_reg))) + +;; Convert a comparison into a boolean value. +(decl cmp_value (CmpResult) XReg) +(rule (cmp_value (CmpResult.Result res $false)) res) +(rule (cmp_value (CmpResult.Result res $true)) (not res)) + +;; Compare two floating point numbers and return a zero/non-zero result. +(decl emit_fcmp (FloatCC Type FReg FReg) CmpResult) + +;; a is not nan && b is not nan +(rule + (emit_fcmp (FloatCC.Ordered) ty a b) + (cmp_result (ordered ty a b))) + +;; a is nan || b is nan +;; == !(a is not nan && b is not nan) +(rule + (emit_fcmp (FloatCC.Unordered) ty a b) + (cmp_result_invert (ordered ty a b))) + +;; a == b +(rule + (emit_fcmp (FloatCC.Equal) ty a b) + (cmp_result (rv_feq ty a b))) + +;; a != b +;; == !(a == b) +(rule + (emit_fcmp (FloatCC.NotEqual) ty a b) + (cmp_result_invert (rv_feq ty a b))) + +;; a < b || a > b +(rule + (emit_fcmp (FloatCC.OrderedNotEqual) ty a b) + (cmp_result (rv_or (rv_flt ty a b) (rv_fgt ty a b)))) + +;; !(ordered a b) || a == b +(rule + (emit_fcmp (FloatCC.UnorderedOrEqual) ty a b) + (cmp_result (rv_or (not (ordered ty a b)) (rv_feq ty a b)))) + +;; a < b +(rule + (emit_fcmp (FloatCC.LessThan) ty a b) + (cmp_result (rv_flt ty a b))) + +;; a <= b +(rule + (emit_fcmp (FloatCC.LessThanOrEqual) ty a b) + (cmp_result (rv_fle ty a b))) + +;; a > b +(rule + (emit_fcmp (FloatCC.GreaterThan) ty a b) + (cmp_result (rv_fgt ty a b))) + +;; a >= b +(rule + (emit_fcmp (FloatCC.GreaterThanOrEqual) ty a b) + (cmp_result (rv_fge ty a b))) + +;; !(ordered a b) || a < b +;; == !(ordered a b && a >= b) +(rule + (emit_fcmp (FloatCC.UnorderedOrLessThan) ty a b) + (cmp_result_invert (rv_and (ordered ty a b) (rv_fge ty a b)))) + +;; !(ordered a b) || a <= b +;; == !(ordered a b && a > b) +(rule + (emit_fcmp (FloatCC.UnorderedOrLessThanOrEqual) ty a b) + (cmp_result_invert (rv_and (ordered ty a b) (rv_fgt ty a b)))) + +;; !(ordered a b) || a > b +;; == !(ordered a b && a <= b) +(rule + (emit_fcmp (FloatCC.UnorderedOrGreaterThan) ty a b) + (cmp_result_invert (rv_and (ordered ty a b) (rv_fle ty a b)))) + +;; !(ordered a b) || a >= b +;; == !(ordered a b && a < b) +(rule + (emit_fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) ty a b) + (cmp_result_invert (rv_and (ordered ty a b) (rv_flt ty a b)))) diff --git a/cranelift/codegen/src/isa/zkasm/inst/args.rs b/cranelift/codegen/src/isa/zkasm/inst/args.rs new file mode 100644 index 000000000000..d71cef8a32c4 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/args.rs @@ -0,0 +1,1812 @@ +//! Riscv64 ISA definitions: instruction arguments. + +// Some variants are never constructed, but we still want them as options in the future. +#![allow(dead_code)] +use super::*; +use crate::ir::condcodes::CondCode; + +use crate::isa::zkasm::inst::{reg_name, reg_to_gpr_num}; +use crate::machinst::isle::WritableReg; + +use std::fmt::{Display, Formatter, Result}; + +/// A macro for defining a newtype of `Reg` that enforces some invariant about +/// the wrapped `Reg` (such as that it is of a particular register class). +macro_rules! newtype_of_reg { + ( + $newtype_reg:ident, + $newtype_writable_reg:ident, + |$check_reg:ident| $check:expr + ) => { + /// A newtype wrapper around `Reg`. + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct $newtype_reg(Reg); + + impl PartialEq for $newtype_reg { + fn eq(&self, other: &Reg) -> bool { + self.0 == *other + } + } + + impl From<$newtype_reg> for Reg { + fn from(r: $newtype_reg) -> Self { + r.0 + } + } + + impl $newtype_reg { + /// Create this newtype from the given register, or return `None` if the register + /// is not a valid instance of this newtype. + pub fn new($check_reg: Reg) -> Option { + if $check { + Some(Self($check_reg)) + } else { + None + } + } + + /// Get this newtype's underlying `Reg`. + pub fn to_reg(self) -> Reg { + self.0 + } + } + + // Convenience impl so that people working with this newtype can use it + // "just like" a plain `Reg`. + // + // NB: We cannot implement `DerefMut` because that would let people do + // nasty stuff like `*my_xreg.deref_mut() = some_freg`, breaking the + // invariants that `XReg` provides. + impl std::ops::Deref for $newtype_reg { + type Target = Reg; + + fn deref(&self) -> &Reg { + &self.0 + } + } + + /// Writable Reg. + pub type $newtype_writable_reg = Writable<$newtype_reg>; + }; +} + +// Newtypes for registers classes. +newtype_of_reg!(XReg, WritableXReg, |reg| reg.class() == RegClass::Int); +newtype_of_reg!(FReg, WritableFReg, |reg| reg.class() == RegClass::Float); +newtype_of_reg!(VReg, WritableVReg, |reg| reg.class() == RegClass::Vector); + +/// An addressing mode specified for a load/store operation. +#[derive(Clone, Debug, Copy)] +pub enum AMode { + /// Arbitrary offset from a register. Converted to generation of large + /// offsets with multiple instructions as necessary during code emission. + RegOffset(Reg, i64, Type), + /// Offset from the stack pointer. + SPOffset(i64, Type), + + /// Offset from the frame pointer. + FPOffset(i64, Type), + + /// Offset from the "nominal stack pointer", which is where the real SP is + /// just after stack and spill slots are allocated in the function prologue. + /// At emission time, this is converted to `SPOffset` with a fixup added to + /// the offset constant. The fixup is a running value that is tracked as + /// emission iterates through instructions in linear order, and can be + /// adjusted up and down with [Inst::VirtualSPOffsetAdj]. + /// + /// The standard ABI is in charge of handling this (by emitting the + /// adjustment meta-instructions). It maintains the invariant that "nominal + /// SP" is where the actual SP is after the function prologue and before + /// clobber pushes. See the diagram in the documentation for + /// [crate::isa::zkasm::abi](the ABI module) for more details. + NominalSPOffset(i64, Type), + + /// A reference to a constant which is placed outside of the function's + /// body, typically at the end. + Const(VCodeConstant), + + /// A reference to a label. + Label(MachLabel), +} + +impl AMode { + pub(crate) fn with_allocs(self, allocs: &mut AllocationConsumer<'_>) -> Self { + match self { + AMode::RegOffset(reg, offset, ty) => AMode::RegOffset(allocs.next(reg), offset, ty), + AMode::SPOffset(..) + | AMode::FPOffset(..) + | AMode::NominalSPOffset(..) + | AMode::Const(..) + | AMode::Label(..) => self, + } + } + + /// Returns the registers that known to the register allocator. + /// Keep this in sync with `with_allocs`. + pub(crate) fn get_allocatable_register(&self) -> Option { + match self { + AMode::RegOffset(reg, ..) => Some(*reg), + AMode::SPOffset(..) + | AMode::FPOffset(..) + | AMode::NominalSPOffset(..) + | AMode::Const(..) + | AMode::Label(..) => None, + } + } + + pub(crate) fn get_base_register(&self) -> Option { + match self { + &AMode::RegOffset(reg, ..) => Some(reg), + &AMode::SPOffset(..) => Some(stack_reg()), + &AMode::FPOffset(..) => Some(fp_reg()), + &AMode::NominalSPOffset(..) => Some(stack_reg()), + &AMode::Const(..) | AMode::Label(..) => None, + } + } + + pub(crate) fn get_offset_with_state(&self, state: &EmitState) -> i64 { + match self { + &AMode::NominalSPOffset(offset, _) => offset + state.virtual_sp_offset, + _ => self.get_offset(), + } + } + + fn get_offset(&self) -> i64 { + match self { + &AMode::RegOffset(_, offset, ..) => offset, + &AMode::SPOffset(offset, _) => offset, + &AMode::FPOffset(offset, _) => offset, + &AMode::NominalSPOffset(offset, _) => offset, + &AMode::Const(_) | &AMode::Label(_) => 0, + } + } + + pub(crate) fn to_string_with_alloc(&self, allocs: &mut AllocationConsumer<'_>) -> String { + format!("{}", self.clone().with_allocs(allocs)) + } +} + +impl Display for AMode { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + match self { + &AMode::RegOffset(r, offset, ..) => { + write!(f, "{}({})", offset, reg_name(r)) + } + &AMode::SPOffset(offset, ..) => { + write!(f, "{}(sp)", offset) + } + &AMode::NominalSPOffset(offset, ..) => { + write!(f, "{}(nominal_sp)", offset) + } + &AMode::FPOffset(offset, ..) => { + write!(f, "{}(fp)", offset) + } + &AMode::Const(addr, ..) => { + write!(f, "[const({})]", addr.as_u32()) + } + &AMode::Label(label) => { + write!(f, "[label{}]", label.as_u32()) + } + } + } +} + +impl Into for StackAMode { + fn into(self) -> AMode { + match self { + StackAMode::FPOffset(offset, ty) => AMode::FPOffset(offset, ty), + StackAMode::SPOffset(offset, ty) => AMode::SPOffset(offset, ty), + StackAMode::NominalSPOffset(offset, ty) => AMode::NominalSPOffset(offset, ty), + } + } +} + +/// risc-v always take two register to compare +#[derive(Clone, Copy, Debug)] +pub struct IntegerCompare { + pub(crate) kind: IntCC, + pub(crate) rs1: Reg, + pub(crate) rs2: Reg, +} + +pub(crate) enum BranchFunct3 { + // == + Eq, + // != + Ne, + // signed < + Lt, + // signed >= + Ge, + // unsigned < + Ltu, + // unsigned >= + Geu, +} + +impl BranchFunct3 { + pub(crate) fn funct3(self) -> u32 { + match self { + BranchFunct3::Eq => 0b000, + BranchFunct3::Ne => 0b001, + BranchFunct3::Lt => 0b100, + BranchFunct3::Ge => 0b101, + BranchFunct3::Ltu => 0b110, + BranchFunct3::Geu => 0b111, + } + } + pub(crate) fn op_name(self) -> &'static str { + match self { + BranchFunct3::Eq => "eq", + BranchFunct3::Ne => "ne", + BranchFunct3::Lt => "lt", + BranchFunct3::Ge => "ge", + BranchFunct3::Ltu => "ltu", + BranchFunct3::Geu => "geu", + } + } +} +impl IntegerCompare { + pub(crate) fn op_code(self) -> u32 { + 0b1100011 + } + + // funct3 and if need inverse the register + pub(crate) fn funct3(&self) -> (BranchFunct3, bool) { + match self.kind { + IntCC::Equal => (BranchFunct3::Eq, false), + IntCC::NotEqual => (BranchFunct3::Ne, false), + IntCC::SignedLessThan => (BranchFunct3::Lt, false), + IntCC::SignedGreaterThanOrEqual => (BranchFunct3::Ge, false), + + IntCC::SignedGreaterThan => (BranchFunct3::Lt, true), + IntCC::SignedLessThanOrEqual => (BranchFunct3::Ge, true), + + IntCC::UnsignedLessThan => (BranchFunct3::Ltu, false), + IntCC::UnsignedGreaterThanOrEqual => (BranchFunct3::Geu, false), + + IntCC::UnsignedGreaterThan => (BranchFunct3::Ltu, true), + IntCC::UnsignedLessThanOrEqual => (BranchFunct3::Geu, true), + } + } + + #[inline] + pub(crate) fn op_name(&self) -> &'static str { + match self.kind { + IntCC::Equal => "EQ", + IntCC::NotEqual => "bne", + IntCC::SignedLessThan => "SLT", + IntCC::SignedGreaterThanOrEqual => "bge", + IntCC::SignedGreaterThan => "bgt", + IntCC::SignedLessThanOrEqual => "ble", + IntCC::UnsignedLessThan => "LT", + IntCC::UnsignedGreaterThanOrEqual => "bgeu", + IntCC::UnsignedGreaterThan => "bgtu", + IntCC::UnsignedLessThanOrEqual => "bleu", + } + } + + pub(crate) fn emit(self) -> u32 { + let (funct3, reverse) = self.funct3(); + let (rs1, rs2) = if reverse { + (self.rs2, self.rs1) + } else { + (self.rs1, self.rs2) + }; + + self.op_code() + | funct3.funct3() << 12 + | reg_to_gpr_num(rs1) << 15 + | reg_to_gpr_num(rs2) << 20 + } + + pub(crate) fn inverse(self) -> Self { + Self { + kind: self.kind.complement(), + ..self + } + } +} + +impl FpuOPRRRR { + pub(crate) fn op_name(self) -> &'static str { + match self { + Self::FmaddS => "fmadd.s", + Self::FmsubS => "fmsub.s", + Self::FnmsubS => "fnmsub.s", + Self::FnmaddS => "fnmadd.s", + Self::FmaddD => "fmadd.d", + Self::FmsubD => "fmsub.d", + Self::FnmsubD => "fnmsub.d", + Self::FnmaddD => "fnmadd.d", + } + } + + pub(crate) fn funct2(self) -> u32 { + match self { + FpuOPRRRR::FmaddS | FpuOPRRRR::FmsubS | FpuOPRRRR::FnmsubS | FpuOPRRRR::FnmaddS => 0, + FpuOPRRRR::FmaddD | FpuOPRRRR::FmsubD | FpuOPRRRR::FnmsubD | FpuOPRRRR::FnmaddD => 1, + } + } + + pub(crate) fn funct3(self, rounding_mode: Option) -> u32 { + rounding_mode.unwrap_or_default().as_u32() + } + + pub(crate) fn op_code(self) -> u32 { + match self { + FpuOPRRRR::FmaddS => 0b1000011, + FpuOPRRRR::FmsubS => 0b1000111, + FpuOPRRRR::FnmsubS => 0b1001011, + FpuOPRRRR::FnmaddS => 0b1001111, + FpuOPRRRR::FmaddD => 0b1000011, + FpuOPRRRR::FmsubD => 0b1000111, + FpuOPRRRR::FnmsubD => 0b1001011, + FpuOPRRRR::FnmaddD => 0b1001111, + } + } +} + +impl FpuOPRR { + pub(crate) fn op_name(self) -> &'static str { + match self { + Self::FsqrtS => "fsqrt.s", + Self::FcvtWS => "fcvt.w.s", + Self::FcvtWuS => "fcvt.wu.s", + Self::FmvXW => "fmv.x.w", + Self::FclassS => "fclass.s", + Self::FcvtSw => "fcvt.s.w", + Self::FcvtSwU => "fcvt.s.wu", + Self::FmvWX => "fmv.w.x", + Self::FcvtLS => "fcvt.l.s", + Self::FcvtLuS => "fcvt.lu.s", + Self::FcvtSL => "fcvt.s.l", + Self::FcvtSLU => "fcvt.s.lu", + Self::FcvtLD => "fcvt.l.d", + Self::FcvtLuD => "fcvt.lu.d", + Self::FmvXD => "fmv.x.d", + Self::FcvtDL => "fcvt.d.l", + Self::FcvtDLu => "fcvt.d.lu", + Self::FmvDX => "fmv.d.x", + Self::FsqrtD => "fsqrt.d", + Self::FcvtSD => "fcvt.s.d", + Self::FcvtDS => "fcvt.d.s", + Self::FclassD => "fclass.d", + Self::FcvtWD => "fcvt.w.d", + Self::FcvtWuD => "fcvt.wu.d", + Self::FcvtDW => "fcvt.d.w", + Self::FcvtDWU => "fcvt.d.wu", + } + } + + pub(crate) fn is_convert_to_int(self) -> bool { + match self { + Self::FcvtWS + | Self::FcvtWuS + | Self::FcvtLS + | Self::FcvtLuS + | Self::FcvtWD + | Self::FcvtWuD + | Self::FcvtLD + | Self::FcvtLuD => true, + _ => false, + } + } + // move from x register to float register. + pub(crate) fn move_x_to_f_op(ty: Type) -> Self { + match ty { + F32 => Self::FmvWX, + F64 => Self::FmvDX, + _ => unreachable!("ty:{:?}", ty), + } + } + + // move from f register to x register. + pub(crate) fn move_f_to_x_op(ty: Type) -> Self { + match ty { + F32 => Self::FmvXW, + F64 => Self::FmvXD, + _ => unreachable!("ty:{:?}", ty), + } + } + + pub(crate) fn float_convert_2_int_op(from: Type, is_type_signed: bool, to: Type) -> Self { + let type_32 = to.bits() <= 32; + match from { + F32 => { + if is_type_signed { + if type_32 { + Self::FcvtWS + } else { + Self::FcvtLS + } + } else { + if type_32 { + Self::FcvtWuS + } else { + Self::FcvtLuS + } + } + } + F64 => { + if is_type_signed { + if type_32 { + Self::FcvtWD + } else { + Self::FcvtLD + } + } else { + if type_32 { + Self::FcvtWuD + } else { + Self::FcvtLuD + } + } + } + _ => unreachable!("from type:{}", from), + } + } + + pub(crate) fn int_convert_2_float_op(from: Type, is_type_signed: bool, to: Type) -> Self { + let type_32 = from.bits() == 32; + match to { + F32 => { + if is_type_signed { + if type_32 { + Self::FcvtSw + } else { + Self::FcvtSL + } + } else { + if type_32 { + Self::FcvtSwU + } else { + Self::FcvtSLU + } + } + } + F64 => { + if is_type_signed { + if type_32 { + Self::FcvtDW + } else { + Self::FcvtDL + } + } else { + if type_32 { + Self::FcvtDWU + } else { + Self::FcvtDLu + } + } + } + _ => unreachable!("to type:{}", to), + } + } + + pub(crate) fn op_code(self) -> u32 { + match self { + FpuOPRR::FsqrtS + | FpuOPRR::FcvtWS + | FpuOPRR::FcvtWuS + | FpuOPRR::FmvXW + | FpuOPRR::FclassS + | FpuOPRR::FcvtSw + | FpuOPRR::FcvtSwU + | FpuOPRR::FmvWX => 0b1010011, + + FpuOPRR::FcvtLS | FpuOPRR::FcvtLuS | FpuOPRR::FcvtSL | FpuOPRR::FcvtSLU => 0b1010011, + + FpuOPRR::FcvtLD + | FpuOPRR::FcvtLuD + | FpuOPRR::FmvXD + | FpuOPRR::FcvtDL + | FpuOPRR::FcvtDLu + | FpuOPRR::FmvDX => 0b1010011, + + FpuOPRR::FsqrtD + | FpuOPRR::FcvtSD + | FpuOPRR::FcvtDS + | FpuOPRR::FclassD + | FpuOPRR::FcvtWD + | FpuOPRR::FcvtWuD + | FpuOPRR::FcvtDW + | FpuOPRR::FcvtDWU => 0b1010011, + } + } + + pub(crate) fn rs2_funct5(self) -> u32 { + match self { + FpuOPRR::FsqrtS => 0b00000, + FpuOPRR::FcvtWS => 0b00000, + FpuOPRR::FcvtWuS => 0b00001, + FpuOPRR::FmvXW => 0b00000, + FpuOPRR::FclassS => 0b00000, + FpuOPRR::FcvtSw => 0b00000, + FpuOPRR::FcvtSwU => 0b00001, + FpuOPRR::FmvWX => 0b00000, + FpuOPRR::FcvtLS => 0b00010, + FpuOPRR::FcvtLuS => 0b00011, + FpuOPRR::FcvtSL => 0b00010, + FpuOPRR::FcvtSLU => 0b00011, + FpuOPRR::FcvtLD => 0b00010, + FpuOPRR::FcvtLuD => 0b00011, + FpuOPRR::FmvXD => 0b00000, + FpuOPRR::FcvtDL => 0b00010, + FpuOPRR::FcvtDLu => 0b00011, + FpuOPRR::FmvDX => 0b00000, + FpuOPRR::FcvtSD => 0b00001, + FpuOPRR::FcvtDS => 0b00000, + FpuOPRR::FclassD => 0b00000, + FpuOPRR::FcvtWD => 0b00000, + FpuOPRR::FcvtWuD => 0b00001, + FpuOPRR::FcvtDW => 0b00000, + FpuOPRR::FcvtDWU => 0b00001, + FpuOPRR::FsqrtD => 0b00000, + } + } + pub(crate) fn funct7(self) -> u32 { + match self { + FpuOPRR::FsqrtS => 0b0101100, + FpuOPRR::FcvtWS => 0b1100000, + FpuOPRR::FcvtWuS => 0b1100000, + FpuOPRR::FmvXW => 0b1110000, + FpuOPRR::FclassS => 0b1110000, + FpuOPRR::FcvtSw => 0b1101000, + FpuOPRR::FcvtSwU => 0b1101000, + FpuOPRR::FmvWX => 0b1111000, + FpuOPRR::FcvtLS => 0b1100000, + FpuOPRR::FcvtLuS => 0b1100000, + FpuOPRR::FcvtSL => 0b1101000, + FpuOPRR::FcvtSLU => 0b1101000, + FpuOPRR::FcvtLD => 0b1100001, + FpuOPRR::FcvtLuD => 0b1100001, + FpuOPRR::FmvXD => 0b1110001, + FpuOPRR::FcvtDL => 0b1101001, + FpuOPRR::FcvtDLu => 0b1101001, + FpuOPRR::FmvDX => 0b1111001, + FpuOPRR::FcvtSD => 0b0100000, + FpuOPRR::FcvtDS => 0b0100001, + FpuOPRR::FclassD => 0b1110001, + FpuOPRR::FcvtWD => 0b1100001, + FpuOPRR::FcvtWuD => 0b1100001, + FpuOPRR::FcvtDW => 0b1101001, + FpuOPRR::FcvtDWU => 0b1101001, + FpuOPRR::FsqrtD => 0b0101101, + } + } + + pub(crate) fn funct3(self, rounding_mode: Option) -> u32 { + let rounding_mode = rounding_mode.unwrap_or_default().as_u32(); + match self { + FpuOPRR::FsqrtS => rounding_mode, + FpuOPRR::FcvtWS => rounding_mode, + FpuOPRR::FcvtWuS => rounding_mode, + FpuOPRR::FmvXW => 0b000, + FpuOPRR::FclassS => 0b001, + FpuOPRR::FcvtSw => rounding_mode, + FpuOPRR::FcvtSwU => rounding_mode, + FpuOPRR::FmvWX => 0b000, + FpuOPRR::FcvtLS => rounding_mode, + FpuOPRR::FcvtLuS => rounding_mode, + FpuOPRR::FcvtSL => rounding_mode, + FpuOPRR::FcvtSLU => rounding_mode, + FpuOPRR::FcvtLD => rounding_mode, + FpuOPRR::FcvtLuD => rounding_mode, + FpuOPRR::FmvXD => 0b000, + FpuOPRR::FcvtDL => rounding_mode, + FpuOPRR::FcvtDLu => rounding_mode, + FpuOPRR::FmvDX => 0b000, + FpuOPRR::FcvtSD => rounding_mode, + FpuOPRR::FcvtDS => rounding_mode, + FpuOPRR::FclassD => 0b001, + FpuOPRR::FcvtWD => rounding_mode, + FpuOPRR::FcvtWuD => rounding_mode, + FpuOPRR::FcvtDW => rounding_mode, + FpuOPRR::FcvtDWU => 0b000, + FpuOPRR::FsqrtD => rounding_mode, + } + } +} + +impl FpuOPRRR { + pub(crate) const fn op_name(self) -> &'static str { + match self { + Self::FaddS => "fadd.s", + Self::FsubS => "fsub.s", + Self::FmulS => "fmul.s", + Self::FdivS => "fdiv.s", + Self::FsgnjS => "fsgnj.s", + Self::FsgnjnS => "fsgnjn.s", + Self::FsgnjxS => "fsgnjx.s", + Self::FminS => "fmin.s", + Self::FmaxS => "fmax.s", + Self::FeqS => "feq.s", + Self::FltS => "flt.s", + Self::FleS => "fle.s", + Self::FaddD => "fadd.d", + Self::FsubD => "fsub.d", + Self::FmulD => "fmul.d", + Self::FdivD => "fdiv.d", + Self::FsgnjD => "fsgnj.d", + Self::FsgnjnD => "fsgnjn.d", + Self::FsgnjxD => "fsgnjx.d", + Self::FminD => "fmin.d", + Self::FmaxD => "fmax.d", + Self::FeqD => "feq.d", + Self::FltD => "flt.d", + Self::FleD => "fle.d", + } + } + + pub fn funct3(self, rounding_mode: Option) -> u32 { + let rounding_mode = rounding_mode.unwrap_or_default(); + let rounding_mode = rounding_mode.as_u32(); + match self { + Self::FaddS => rounding_mode, + Self::FsubS => rounding_mode, + Self::FmulS => rounding_mode, + Self::FdivS => rounding_mode, + + Self::FsgnjS => 0b000, + Self::FsgnjnS => 0b001, + Self::FsgnjxS => 0b010, + Self::FminS => 0b000, + Self::FmaxS => 0b001, + + Self::FeqS => 0b010, + Self::FltS => 0b001, + Self::FleS => 0b000, + + Self::FaddD => rounding_mode, + Self::FsubD => rounding_mode, + Self::FmulD => rounding_mode, + Self::FdivD => rounding_mode, + + Self::FsgnjD => 0b000, + Self::FsgnjnD => 0b001, + Self::FsgnjxD => 0b010, + Self::FminD => 0b000, + Self::FmaxD => 0b001, + Self::FeqD => 0b010, + Self::FltD => 0b001, + Self::FleD => 0b000, + } + } + + pub fn op_code(self) -> u32 { + match self { + Self::FaddS + | Self::FsubS + | Self::FmulS + | Self::FdivS + | Self::FsgnjS + | Self::FsgnjnS + | Self::FsgnjxS + | Self::FminS + | Self::FmaxS + | Self::FeqS + | Self::FltS + | Self::FleS => 0b1010011, + + Self::FaddD + | Self::FsubD + | Self::FmulD + | Self::FdivD + | Self::FsgnjD + | Self::FsgnjnD + | Self::FsgnjxD + | Self::FminD + | Self::FmaxD + | Self::FeqD + | Self::FltD + | Self::FleD => 0b1010011, + } + } + + pub const fn funct7(self) -> u32 { + match self { + Self::FaddS => 0b0000000, + Self::FsubS => 0b0000100, + Self::FmulS => 0b0001000, + Self::FdivS => 0b0001100, + + Self::FsgnjS => 0b0010000, + Self::FsgnjnS => 0b0010000, + Self::FsgnjxS => 0b0010000, + Self::FminS => 0b0010100, + Self::FmaxS => 0b0010100, + Self::FeqS => 0b1010000, + Self::FltS => 0b1010000, + Self::FleS => 0b1010000, + + Self::FaddD => 0b0000001, + Self::FsubD => 0b0000101, + Self::FmulD => 0b0001001, + Self::FdivD => 0b0001101, + Self::FsgnjD => 0b0010001, + Self::FsgnjnD => 0b0010001, + Self::FsgnjxD => 0b0010001, + Self::FminD => 0b0010101, + Self::FmaxD => 0b0010101, + Self::FeqD => 0b1010001, + Self::FltD => 0b1010001, + Self::FleD => 0b1010001, + } + } + pub fn is_32(self) -> bool { + match self { + Self::FaddS + | Self::FsubS + | Self::FmulS + | Self::FdivS + | Self::FsgnjS + | Self::FsgnjnS + | Self::FsgnjxS + | Self::FminS + | Self::FmaxS + | Self::FeqS + | Self::FltS + | Self::FleS => true, + _ => false, + } + } + + pub fn is_copy_sign(self) -> bool { + match self { + Self::FsgnjD | Self::FsgnjS => true, + _ => false, + } + } + + pub fn is_copy_neg_sign(self) -> bool { + match self { + Self::FsgnjnD | Self::FsgnjnS => true, + _ => false, + } + } + pub fn is_copy_xor_sign(self) -> bool { + match self { + Self::FsgnjxS | Self::FsgnjxD => true, + _ => false, + } + } +} +impl AluOPRRR { + pub(crate) const fn op_name(self) -> &'static str { + match self { + Self::Add => "ADD", + Self::Sub => "SUB", + Self::Sll => "sll", + Self::Slt => "slt", + Self::Sgt => "sgt", + Self::SltU => "sltu", + Self::Sgtu => "sgtu", + Self::Xor => "xor", + Self::Srl => "srl", + Self::Sra => "sra", + Self::Or => "or", + Self::And => "and", + Self::Addw => "ADD", + Self::Subw => "SUB", + Self::Sllw => "sllw", + Self::Srlw => "srlw", + Self::Sraw => "sraw", + Self::Mul => "mul", + Self::Mulh => "mulh", + Self::Mulhsu => "mulhsu", + Self::Mulhu => "mulhu", + Self::Div => "div", + Self::DivU => "divu", + Self::Rem => "rem", + Self::RemU => "remu", + Self::Mulw => "mulw", + Self::Divw => "divw", + Self::Divuw => "divuw", + Self::Remw => "remw", + Self::Remuw => "remuw", + Self::Adduw => "add.uw", + Self::Andn => "andn", + Self::Bclr => "bclr", + Self::Bext => "bext", + Self::Binv => "binv", + Self::Bset => "bset", + Self::Clmul => "clmul", + Self::Clmulh => "clmulh", + Self::Clmulr => "clmulr", + Self::Max => "max", + Self::Maxu => "maxu", + Self::Min => "min", + Self::Minu => "minu", + Self::Orn => "orn", + Self::Rol => "rol", + Self::Rolw => "rolw", + Self::Ror => "ror", + Self::Rorw => "rorw", + Self::Sh1add => "sh1add", + Self::Sh1adduw => "sh1add.uw", + Self::Sh2add => "sh2add", + Self::Sh2adduw => "sh2add.uw", + Self::Sh3add => "sh3add", + Self::Sh3adduw => "sh3add.uw", + Self::Xnor => "xnor", + Self::Pack => "pack", + Self::Packw => "packw", + Self::Packh => "packh", + } + } + + pub fn funct3(self) -> u32 { + match self { + AluOPRRR::Add => 0b000, + AluOPRRR::Sll => 0b001, + AluOPRRR::Slt => 0b010, + AluOPRRR::Sgt => 0b010, + AluOPRRR::SltU => 0b011, + AluOPRRR::Sgtu => 0b011, + AluOPRRR::Xor => 0b100, + AluOPRRR::Srl => 0b101, + AluOPRRR::Sra => 0b101, + AluOPRRR::Or => 0b110, + AluOPRRR::And => 0b111, + AluOPRRR::Sub => 0b000, + + AluOPRRR::Addw => 0b000, + AluOPRRR::Subw => 0b000, + AluOPRRR::Sllw => 0b001, + AluOPRRR::Srlw => 0b101, + AluOPRRR::Sraw => 0b101, + + AluOPRRR::Mul => 0b000, + AluOPRRR::Mulh => 0b001, + AluOPRRR::Mulhsu => 0b010, + AluOPRRR::Mulhu => 0b011, + AluOPRRR::Div => 0b100, + AluOPRRR::DivU => 0b101, + AluOPRRR::Rem => 0b110, + AluOPRRR::RemU => 0b111, + + AluOPRRR::Mulw => 0b000, + AluOPRRR::Divw => 0b100, + AluOPRRR::Divuw => 0b101, + AluOPRRR::Remw => 0b110, + AluOPRRR::Remuw => 0b111, + + // Zbb + AluOPRRR::Adduw => 0b000, + AluOPRRR::Andn => 0b111, + AluOPRRR::Bclr => 0b001, + AluOPRRR::Bext => 0b101, + AluOPRRR::Binv => 0b001, + AluOPRRR::Bset => 0b001, + AluOPRRR::Clmul => 0b001, + AluOPRRR::Clmulh => 0b011, + AluOPRRR::Clmulr => 0b010, + AluOPRRR::Max => 0b110, + AluOPRRR::Maxu => 0b111, + AluOPRRR::Min => 0b100, + AluOPRRR::Minu => 0b101, + AluOPRRR::Orn => 0b110, + AluOPRRR::Rol => 0b001, + AluOPRRR::Rolw => 0b001, + AluOPRRR::Ror => 0b101, + AluOPRRR::Rorw => 0b101, + AluOPRRR::Sh1add => 0b010, + AluOPRRR::Sh1adduw => 0b010, + AluOPRRR::Sh2add => 0b100, + AluOPRRR::Sh2adduw => 0b100, + AluOPRRR::Sh3add => 0b110, + AluOPRRR::Sh3adduw => 0b110, + AluOPRRR::Xnor => 0b100, + + // Zbkb + AluOPRRR::Pack => 0b100, + AluOPRRR::Packw => 0b100, + AluOPRRR::Packh => 0b111, + } + } + + pub fn op_code(self) -> u32 { + match self { + AluOPRRR::Add + | AluOPRRR::Sub + | AluOPRRR::Sll + | AluOPRRR::Slt + | AluOPRRR::Sgt + | AluOPRRR::SltU + | AluOPRRR::Sgtu + | AluOPRRR::Xor + | AluOPRRR::Srl + | AluOPRRR::Sra + | AluOPRRR::Or + | AluOPRRR::And + | AluOPRRR::Pack + | AluOPRRR::Packh => 0b0110011, + + AluOPRRR::Addw + | AluOPRRR::Subw + | AluOPRRR::Sllw + | AluOPRRR::Srlw + | AluOPRRR::Sraw + | AluOPRRR::Packw => 0b0111011, + + AluOPRRR::Mul + | AluOPRRR::Mulh + | AluOPRRR::Mulhsu + | AluOPRRR::Mulhu + | AluOPRRR::Div + | AluOPRRR::DivU + | AluOPRRR::Rem + | AluOPRRR::RemU => 0b0110011, + + AluOPRRR::Mulw + | AluOPRRR::Divw + | AluOPRRR::Divuw + | AluOPRRR::Remw + | AluOPRRR::Remuw => 0b0111011, + + AluOPRRR::Adduw => 0b0111011, + AluOPRRR::Andn + | AluOPRRR::Bclr + | AluOPRRR::Bext + | AluOPRRR::Binv + | AluOPRRR::Bset + | AluOPRRR::Clmul + | AluOPRRR::Clmulh + | AluOPRRR::Clmulr + | AluOPRRR::Max + | AluOPRRR::Maxu + | AluOPRRR::Min + | AluOPRRR::Minu + | AluOPRRR::Orn + | AluOPRRR::Rol + | AluOPRRR::Ror + | AluOPRRR::Sh1add + | AluOPRRR::Sh2add + | AluOPRRR::Sh3add + | AluOPRRR::Xnor => 0b0110011, + + AluOPRRR::Rolw + | AluOPRRR::Rorw + | AluOPRRR::Sh2adduw + | AluOPRRR::Sh3adduw + | AluOPRRR::Sh1adduw => 0b0111011, + } + } + + pub const fn funct7(self) -> u32 { + match self { + AluOPRRR::Add => 0b0000000, + AluOPRRR::Sub => 0b0100000, + AluOPRRR::Sll => 0b0000000, + AluOPRRR::Slt => 0b0000000, + AluOPRRR::Sgt => 0b0000000, + AluOPRRR::SltU => 0b0000000, + AluOPRRR::Sgtu => 0b0000000, + + AluOPRRR::Xor => 0b0000000, + AluOPRRR::Srl => 0b0000000, + AluOPRRR::Sra => 0b0100000, + AluOPRRR::Or => 0b0000000, + AluOPRRR::And => 0b0000000, + + AluOPRRR::Addw => 0b0000000, + AluOPRRR::Subw => 0b0100000, + AluOPRRR::Sllw => 0b0000000, + AluOPRRR::Srlw => 0b0000000, + AluOPRRR::Sraw => 0b0100000, + + AluOPRRR::Mul => 0b0000001, + AluOPRRR::Mulh => 0b0000001, + AluOPRRR::Mulhsu => 0b0000001, + AluOPRRR::Mulhu => 0b0000001, + AluOPRRR::Div => 0b0000001, + AluOPRRR::DivU => 0b0000001, + AluOPRRR::Rem => 0b0000001, + AluOPRRR::RemU => 0b0000001, + + AluOPRRR::Mulw => 0b0000001, + AluOPRRR::Divw => 0b0000001, + AluOPRRR::Divuw => 0b0000001, + AluOPRRR::Remw => 0b0000001, + AluOPRRR::Remuw => 0b0000001, + AluOPRRR::Adduw => 0b0000100, + AluOPRRR::Andn => 0b0100000, + AluOPRRR::Bclr => 0b0100100, + AluOPRRR::Bext => 0b0100100, + AluOPRRR::Binv => 0b0110100, + AluOPRRR::Bset => 0b0010100, + AluOPRRR::Clmul => 0b0000101, + AluOPRRR::Clmulh => 0b0000101, + AluOPRRR::Clmulr => 0b0000101, + AluOPRRR::Max => 0b0000101, + AluOPRRR::Maxu => 0b0000101, + AluOPRRR::Min => 0b0000101, + AluOPRRR::Minu => 0b0000101, + AluOPRRR::Orn => 0b0100000, + AluOPRRR::Rol => 0b0110000, + AluOPRRR::Rolw => 0b0110000, + AluOPRRR::Ror => 0b0110000, + AluOPRRR::Rorw => 0b0110000, + AluOPRRR::Sh1add => 0b0010000, + AluOPRRR::Sh1adduw => 0b0010000, + AluOPRRR::Sh2add => 0b0010000, + AluOPRRR::Sh2adduw => 0b0010000, + AluOPRRR::Sh3add => 0b0010000, + AluOPRRR::Sh3adduw => 0b0010000, + AluOPRRR::Xnor => 0b0100000, + + // Zbkb + AluOPRRR::Pack => 0b0000100, + AluOPRRR::Packw => 0b0000100, + AluOPRRR::Packh => 0b0000100, + } + } + + pub(crate) fn reverse_rs(self) -> bool { + // special case. + // sgt and sgtu is not defined in isa. + // emit should reverse rs1 and rs2. + self == AluOPRRR::Sgt || self == AluOPRRR::Sgtu + } +} + +impl AluOPRRI { + pub(crate) fn option_funct6(self) -> Option { + let x: Option = match self { + Self::Slli => Some(0b00_0000), + Self::Srli => Some(0b00_0000), + Self::Srai => Some(0b01_0000), + Self::Bclri => Some(0b010010), + Self::Bexti => Some(0b010010), + Self::Binvi => Some(0b011010), + Self::Bseti => Some(0b001010), + Self::Rori => Some(0b011000), + Self::SlliUw => Some(0b000010), + _ => None, + }; + x + } + + pub(crate) fn option_funct7(self) -> Option { + let x = match self { + Self::Slliw => Some(0b000_0000), + Self::SrliW => Some(0b000_0000), + Self::Sraiw => Some(0b010_0000), + Self::Roriw => Some(0b0110000), + _ => None, + }; + x + } + + pub(crate) fn imm12(self, imm12: Imm12) -> u32 { + let x = imm12.as_u32(); + if let Some(func) = self.option_funct6() { + func << 6 | (x & 0b11_1111) + } else if let Some(func) = self.option_funct7() { + func << 5 | (x & 0b1_1111) + } else if let Some(func) = self.option_funct12() { + func + } else { + x + } + } + + pub(crate) fn option_funct12(self) -> Option { + match self { + Self::Clz => Some(0b011000000000), + Self::Clzw => Some(0b011000000000), + Self::Cpop => Some(0b011000000010), + Self::Cpopw => Some(0b011000000010), + Self::Ctz => Some(0b011000000001), + Self::Ctzw => Some(0b011000000001), + Self::Rev8 => Some(0b011010111000), + Self::Sextb => Some(0b011000000100), + Self::Sexth => Some(0b011000000101), + Self::Zexth => Some(0b000010000000), + Self::Orcb => Some(0b001010000111), + Self::Brev8 => Some(0b0110_1000_0111), + _ => None, + } + } + + pub(crate) fn op_name(self) -> &'static str { + match self { + Self::Addi => "ADD", + Self::Slti => "slti", + Self::SltiU => "sltiu", + Self::Xori => "xori", + Self::Ori => "ori", + Self::Andi => "andi", + Self::Slli => "slli", + Self::Srli => "srli", + Self::Srai => "srai", + Self::Addiw => "addiw", + Self::Slliw => "slliw", + Self::SrliW => "srliw", + Self::Sraiw => "sraiw", + Self::Bclri => "bclri", + Self::Bexti => "bexti", + Self::Binvi => "binvi", + Self::Bseti => "bseti", + Self::Rori => "rori", + Self::Roriw => "roriw", + Self::SlliUw => "slli.uw", + Self::Clz => "clz", + Self::Clzw => "clzw", + Self::Cpop => "cpop", + Self::Cpopw => "cpopw", + Self::Ctz => "ctz", + Self::Ctzw => "ctzw", + Self::Rev8 => "rev8", + Self::Sextb => "sext.b", + Self::Sexth => "sext.h", + Self::Zexth => "zext.h", + Self::Orcb => "orc.b", + Self::Brev8 => "brev8", + } + } + + pub fn funct3(self) -> u32 { + match self { + AluOPRRI::Addi => 0b000, + AluOPRRI::Slti => 0b010, + AluOPRRI::SltiU => 0b011, + AluOPRRI::Xori => 0b100, + AluOPRRI::Ori => 0b110, + AluOPRRI::Andi => 0b111, + AluOPRRI::Slli => 0b001, + AluOPRRI::Srli => 0b101, + AluOPRRI::Srai => 0b101, + AluOPRRI::Addiw => 0b000, + AluOPRRI::Slliw => 0b001, + AluOPRRI::SrliW => 0b101, + AluOPRRI::Sraiw => 0b101, + AluOPRRI::Bclri => 0b001, + AluOPRRI::Bexti => 0b101, + AluOPRRI::Binvi => 0b001, + AluOPRRI::Bseti => 0b001, + AluOPRRI::Rori => 0b101, + AluOPRRI::Roriw => 0b101, + AluOPRRI::SlliUw => 0b001, + AluOPRRI::Clz => 0b001, + AluOPRRI::Clzw => 0b001, + AluOPRRI::Cpop => 0b001, + AluOPRRI::Cpopw => 0b001, + AluOPRRI::Ctz => 0b001, + AluOPRRI::Ctzw => 0b001, + AluOPRRI::Rev8 => 0b101, + AluOPRRI::Sextb => 0b001, + AluOPRRI::Sexth => 0b001, + AluOPRRI::Zexth => 0b100, + AluOPRRI::Orcb => 0b101, + AluOPRRI::Brev8 => 0b101, + } + } + + pub fn op_code(self) -> u32 { + match self { + AluOPRRI::Addi + | AluOPRRI::Slti + | AluOPRRI::SltiU + | AluOPRRI::Xori + | AluOPRRI::Ori + | AluOPRRI::Andi + | AluOPRRI::Slli + | AluOPRRI::Srli + | AluOPRRI::Srai + | AluOPRRI::Bclri + | AluOPRRI::Bexti + | AluOPRRI::Binvi + | AluOPRRI::Bseti + | AluOPRRI::Rori + | AluOPRRI::Clz + | AluOPRRI::Cpop + | AluOPRRI::Ctz + | AluOPRRI::Rev8 + | AluOPRRI::Sextb + | AluOPRRI::Sexth + | AluOPRRI::Orcb + | AluOPRRI::Brev8 => 0b0010011, + + AluOPRRI::Addiw + | AluOPRRI::Slliw + | AluOPRRI::SrliW + | AluOPRRI::Sraiw + | AluOPRRI::Roriw + | AluOPRRI::SlliUw + | AluOPRRI::Clzw + | AluOPRRI::Cpopw + | AluOPRRI::Ctzw => 0b0011011, + AluOPRRI::Zexth => 0b0111011, + } + } +} + +impl Default for FRM { + fn default() -> Self { + Self::Fcsr + } +} + +/// float rounding mode. +impl FRM { + pub(crate) fn to_static_str(self) -> &'static str { + match self { + FRM::RNE => "rne", + FRM::RTZ => "rtz", + FRM::RDN => "rdn", + FRM::RUP => "rup", + FRM::RMM => "rmm", + FRM::Fcsr => "fcsr", + } + } + + #[inline] + pub(crate) fn bits(self) -> u8 { + match self { + FRM::RNE => 0b000, + FRM::RTZ => 0b001, + FRM::RDN => 0b010, + FRM::RUP => 0b011, + FRM::RMM => 0b100, + FRM::Fcsr => 0b111, + } + } + pub(crate) fn as_u32(self) -> u32 { + self.bits() as u32 + } +} + +impl FFlagsException { + #[inline] + pub(crate) fn mask(self) -> u32 { + match self { + FFlagsException::NV => 1 << 4, + FFlagsException::DZ => 1 << 3, + FFlagsException::OF => 1 << 2, + FFlagsException::UF => 1 << 1, + FFlagsException::NX => 1 << 0, + } + } +} + +impl LoadOP { + pub(crate) fn op_name(self) -> &'static str { + match self { + Self::Lb => "lb", + Self::Lh => "lh", + Self::Lw => "lw", + Self::Lbu => "lbu", + Self::Lhu => "lhu", + Self::Lwu => "lwu", + Self::Ld => "ld", + Self::Flw => "flw", + Self::Fld => "fld", + } + } + + pub(crate) fn from_type(t: Type) -> Self { + if t.is_float() { + return if t == F32 { Self::Flw } else { Self::Fld }; + } + match t { + R32 => Self::Lwu, + R64 | I64 => Self::Ld, + + I8 => Self::Lb, + I16 => Self::Lh, + I32 => Self::Lw, + _ => unreachable!(), + } + } + + pub(crate) fn op_code(self) -> u32 { + match self { + Self::Lb | Self::Lh | Self::Lw | Self::Lbu | Self::Lhu | Self::Lwu | Self::Ld => { + 0b0000011 + } + Self::Flw | Self::Fld => 0b0000111, + } + } + pub(crate) fn funct3(self) -> u32 { + match self { + Self::Lb => 0b000, + Self::Lh => 0b001, + Self::Lw => 0b010, + Self::Lwu => 0b110, + Self::Lbu => 0b100, + Self::Lhu => 0b101, + Self::Ld => 0b011, + Self::Flw => 0b010, + Self::Fld => 0b011, + } + } +} + +impl StoreOP { + pub(crate) fn op_name(self) -> &'static str { + match self { + Self::Sb => "sb", + Self::Sh => "sh", + Self::Sw => "sw", + Self::Sd => "sd", + Self::Fsw => "fsw", + Self::Fsd => "fsd", + } + } + pub(crate) fn from_type(t: Type) -> Self { + if t.is_float() { + return if t == F32 { Self::Fsw } else { Self::Fsd }; + } + match t.bits() { + 1 | 8 => Self::Sb, + 16 => Self::Sh, + 32 => Self::Sw, + 64 => Self::Sd, + _ => unreachable!(), + } + } + pub(crate) fn op_code(self) -> u32 { + match self { + Self::Sb | Self::Sh | Self::Sw | Self::Sd => 0b0100011, + Self::Fsw | Self::Fsd => 0b0100111, + } + } + pub(crate) fn funct3(self) -> u32 { + match self { + Self::Sb => 0b000, + Self::Sh => 0b001, + Self::Sw => 0b010, + Self::Sd => 0b011, + Self::Fsw => 0b010, + Self::Fsd => 0b011, + } + } +} + +impl FClassResult { + pub(crate) const fn bit(self) -> u32 { + match self { + FClassResult::NegInfinite => 1 << 0, + FClassResult::NegNormal => 1 << 1, + FClassResult::NegSubNormal => 1 << 2, + FClassResult::NegZero => 1 << 3, + FClassResult::PosZero => 1 << 4, + FClassResult::PosSubNormal => 1 << 5, + FClassResult::PosNormal => 1 << 6, + FClassResult::PosInfinite => 1 << 7, + FClassResult::SNaN => 1 << 8, + FClassResult::QNaN => 1 << 9, + } + } + + #[inline] + pub(crate) const fn is_nan_bits() -> u32 { + Self::SNaN.bit() | Self::QNaN.bit() + } + #[inline] + pub(crate) fn is_zero_bits() -> u32 { + Self::NegZero.bit() | Self::PosZero.bit() + } + + #[inline] + pub(crate) fn is_infinite_bits() -> u32 { + Self::PosInfinite.bit() | Self::NegInfinite.bit() + } +} + +impl AtomicOP { + #[inline] + pub(crate) fn is_load(self) -> bool { + match self { + Self::LrW | Self::LrD => true, + _ => false, + } + } + + #[inline] + pub(crate) fn op_name(self, amo: AMO) -> String { + let s = match self { + Self::LrW => "lr.w", + Self::ScW => "sc.w", + + Self::AmoswapW => "amoswap.w", + Self::AmoaddW => "amoadd.w", + Self::AmoxorW => "amoxor.w", + Self::AmoandW => "amoand.w", + Self::AmoorW => "amoor.w", + Self::AmominW => "amomin.w", + Self::AmomaxW => "amomax.w", + Self::AmominuW => "amominu.w", + Self::AmomaxuW => "amomaxu.w", + Self::LrD => "lr.d", + Self::ScD => "sc.d", + Self::AmoswapD => "amoswap.d", + Self::AmoaddD => "amoadd.d", + Self::AmoxorD => "amoxor.d", + Self::AmoandD => "amoand.d", + Self::AmoorD => "amoor.d", + Self::AmominD => "amomin.d", + Self::AmomaxD => "amomax.d", + Self::AmominuD => "amominu.d", + Self::AmomaxuD => "amomaxu.d", + }; + format!("{}{}", s, amo.to_static_str()) + } + #[inline] + pub(crate) fn op_code(self) -> u32 { + 0b0101111 + } + + #[inline] + pub(crate) fn funct7(self, amo: AMO) -> u32 { + self.funct5() << 2 | amo.as_u32() & 0b11 + } + + pub(crate) fn funct3(self) -> u32 { + match self { + AtomicOP::LrW + | AtomicOP::ScW + | AtomicOP::AmoswapW + | AtomicOP::AmoaddW + | AtomicOP::AmoxorW + | AtomicOP::AmoandW + | AtomicOP::AmoorW + | AtomicOP::AmominW + | AtomicOP::AmomaxW + | AtomicOP::AmominuW + | AtomicOP::AmomaxuW => 0b010, + AtomicOP::LrD + | AtomicOP::ScD + | AtomicOP::AmoswapD + | AtomicOP::AmoaddD + | AtomicOP::AmoxorD + | AtomicOP::AmoandD + | AtomicOP::AmoorD + | AtomicOP::AmominD + | AtomicOP::AmomaxD + | AtomicOP::AmominuD + | AtomicOP::AmomaxuD => 0b011, + } + } + pub(crate) fn funct5(self) -> u32 { + match self { + AtomicOP::LrW => 0b00010, + AtomicOP::ScW => 0b00011, + AtomicOP::AmoswapW => 0b00001, + AtomicOP::AmoaddW => 0b00000, + AtomicOP::AmoxorW => 0b00100, + AtomicOP::AmoandW => 0b01100, + AtomicOP::AmoorW => 0b01000, + AtomicOP::AmominW => 0b10000, + AtomicOP::AmomaxW => 0b10100, + AtomicOP::AmominuW => 0b11000, + AtomicOP::AmomaxuW => 0b11100, + AtomicOP::LrD => 0b00010, + AtomicOP::ScD => 0b00011, + AtomicOP::AmoswapD => 0b00001, + AtomicOP::AmoaddD => 0b00000, + AtomicOP::AmoxorD => 0b00100, + AtomicOP::AmoandD => 0b01100, + AtomicOP::AmoorD => 0b01000, + AtomicOP::AmominD => 0b10000, + AtomicOP::AmomaxD => 0b10100, + AtomicOP::AmominuD => 0b11000, + AtomicOP::AmomaxuD => 0b11100, + } + } + + pub(crate) fn load_op(t: Type) -> Self { + if t.bits() <= 32 { + Self::LrW + } else { + Self::LrD + } + } + pub(crate) fn store_op(t: Type) -> Self { + if t.bits() <= 32 { + Self::ScW + } else { + Self::ScD + } + } + + /// extract + pub(crate) fn extract(rd: WritableReg, offset: Reg, rs: Reg, ty: Type) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Srl, + rd: rd, + rs1: rs, + rs2: offset, + }); + // + insts.push(Inst::Extend { + rd: rd, + rn: rd.to_reg(), + signed: false, + from_bits: ty.bits() as u8, + to_bits: 64, + }); + insts + } + + /// like extract but sign extend the value. + /// suitable for smax,etc. + pub(crate) fn extract_sext( + rd: WritableReg, + offset: Reg, + rs: Reg, + ty: Type, + ) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Srl, + rd: rd, + rs1: rs, + rs2: offset, + }); + // + insts.push(Inst::Extend { + rd: rd, + rn: rd.to_reg(), + signed: true, + from_bits: ty.bits() as u8, + to_bits: 64, + }); + insts + } + + pub(crate) fn unset( + rd: WritableReg, + tmp: WritableReg, + offset: Reg, + ty: Type, + ) -> SmallInstVec { + assert!(rd != tmp); + let mut insts = SmallInstVec::new(); + insts.extend(Inst::load_int_mask(tmp, ty)); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Sll, + rd: tmp, + rs1: tmp.to_reg(), + rs2: offset, + }); + insts.push(Inst::construct_bit_not(tmp, tmp.to_reg())); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: rd, + rs1: rd.to_reg(), + rs2: tmp.to_reg(), + }); + insts + } + + pub(crate) fn set( + rd: WritableReg, + tmp: WritableReg, + offset: Reg, + rs: Reg, + ty: Type, + ) -> SmallInstVec { + assert!(rd != tmp); + let mut insts = SmallInstVec::new(); + // make rs into tmp. + insts.push(Inst::Extend { + rd: tmp, + rn: rs, + signed: false, + from_bits: ty.bits() as u8, + to_bits: 64, + }); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Sll, + rd: tmp, + rs1: tmp.to_reg(), + rs2: offset, + }); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Or, + rd: rd, + rs1: rd.to_reg(), + rs2: tmp.to_reg(), + }); + insts + } + + /// Merge reset part of rs into rd. + /// Call this function must make sure that other part of value is already in rd. + pub(crate) fn merge( + rd: WritableReg, + tmp: WritableReg, + offset: Reg, + rs: Reg, + ty: Type, + ) -> SmallInstVec { + let mut insts = Self::unset(rd, tmp, offset, ty); + insts.extend(Self::set(rd, tmp, offset, rs, ty)); + insts + } +} + +impl IntSelectOP { + #[inline] + pub(crate) fn from_ir_op(op: crate::ir::Opcode) -> Self { + match op { + crate::ir::Opcode::Smax => Self::Smax, + crate::ir::Opcode::Umax => Self::Umax, + crate::ir::Opcode::Smin => Self::Smin, + crate::ir::Opcode::Umin => Self::Umin, + _ => unreachable!(), + } + } + #[inline] + pub(crate) fn op_name(self) -> &'static str { + match self { + IntSelectOP::Smax => "smax", + IntSelectOP::Umax => "umax", + IntSelectOP::Smin => "smin", + IntSelectOP::Umin => "umin", + } + } + #[inline] + pub(crate) fn to_int_cc(self) -> IntCC { + match self { + IntSelectOP::Smax => IntCC::SignedGreaterThan, + IntSelectOP::Umax => IntCC::UnsignedGreaterThan, + IntSelectOP::Smin => IntCC::SignedLessThan, + IntSelectOP::Umin => IntCC::UnsignedLessThan, + } + } +} + +///Atomic Memory ordering. +#[derive(Copy, Clone, Debug)] +pub enum AMO { + Relax = 0b00, + Release = 0b01, + Aquire = 0b10, + SeqCst = 0b11, +} + +impl AMO { + pub(crate) fn to_static_str(self) -> &'static str { + match self { + AMO::Relax => "", + AMO::Release => ".rl", + AMO::Aquire => ".aq", + AMO::SeqCst => ".aqrl", + } + } + pub(crate) fn as_u32(self) -> u32 { + self as u32 + } +} + +impl Inst { + /// fence request bits. + pub(crate) const FENCE_REQ_I: u8 = 1 << 3; + pub(crate) const FENCE_REQ_O: u8 = 1 << 2; + pub(crate) const FENCE_REQ_R: u8 = 1 << 1; + pub(crate) const FENCE_REQ_W: u8 = 1 << 0; + pub(crate) fn fence_req_to_string(x: u8) -> String { + let mut s = String::default(); + if x & Self::FENCE_REQ_I != 0 { + s.push_str("i"); + } + if x & Self::FENCE_REQ_O != 0 { + s.push_str("o"); + } + if x & Self::FENCE_REQ_R != 0 { + s.push_str("r"); + } + if x & Self::FENCE_REQ_W != 0 { + s.push_str("w"); + } + s + } +} + +impl FloatRoundOP { + pub(crate) fn op_name(self) -> &'static str { + match self { + FloatRoundOP::Nearest => "nearest", + FloatRoundOP::Ceil => "ceil", + FloatRoundOP::Floor => "floor", + FloatRoundOP::Trunc => "trunc", + } + } + + pub(crate) fn to_frm(self) -> FRM { + match self { + FloatRoundOP::Nearest => FRM::RNE, + FloatRoundOP::Ceil => FRM::RUP, + FloatRoundOP::Floor => FRM::RDN, + FloatRoundOP::Trunc => FRM::RTZ, + } + } +} + +impl FloatSelectOP { + pub(crate) fn op_name(self) -> &'static str { + match self { + FloatSelectOP::Max => "max", + FloatSelectOP::Min => "min", + } + } + + pub(crate) fn to_fpuoprrr(self, ty: Type) -> FpuOPRRR { + match self { + FloatSelectOP::Max => { + if ty == F32 { + FpuOPRRR::FmaxS + } else { + FpuOPRRR::FmaxD + } + } + FloatSelectOP::Min => { + if ty == F32 { + FpuOPRRR::FminS + } else { + FpuOPRRR::FminD + } + } + } + } + // move qnan bits into int register. + // pub(crate) fn snan_bits(self, rd: Writable, ty: Type) -> SmallInstVec { + // let mut insts = SmallInstVec::new(); + // insts.push(Inst::load_imm12(rd, Imm12::from_bits(-1))); + // let x = if ty == F32 { 22 } else { 51 }; + // insts.push(Inst::AluRRImm12 { + // alu_op: AluOPRRI::Srli, + // rd: rd, + // rs: rd.to_reg(), + // imm12: Imm12::from_bits(x), + // }); + // insts.push(Inst::AluRRImm12 { + // alu_op: AluOPRRI::Slli, + // rd: rd, + // rs: rd.to_reg(), + // imm12: Imm12::from_bits(x), + // }); + // insts + // } +} + +pub(crate) fn f32_bits(f: f32) -> u32 { + u32::from_le_bytes(f.to_le_bytes()) +} +pub(crate) fn f64_bits(f: f64) -> u64 { + u64::from_le_bytes(f.to_le_bytes()) +} + +/// +pub(crate) fn f32_cvt_to_int_bounds(signed: bool, out_bits: u8) -> (f32, f32) { + match (signed, out_bits) { + (true, 8) => (i8::min_value() as f32 - 1., i8::max_value() as f32 + 1.), + (true, 16) => (i16::min_value() as f32 - 1., i16::max_value() as f32 + 1.), + (true, 32) => (-2147483904.0, 2147483648.0), + (true, 64) => (-9223373136366403584.0, 9223372036854775808.0), + (false, 8) => (-1., u8::max_value() as f32 + 1.), + (false, 16) => (-1., u16::max_value() as f32 + 1.), + (false, 32) => (-1., 4294967296.0), + (false, 64) => (-1., 18446744073709551616.0), + _ => unreachable!(), + } +} + +pub(crate) fn f64_cvt_to_int_bounds(signed: bool, out_bits: u8) -> (f64, f64) { + match (signed, out_bits) { + (true, 8) => (i8::min_value() as f64 - 1., i8::max_value() as f64 + 1.), + (true, 16) => (i16::min_value() as f64 - 1., i16::max_value() as f64 + 1.), + (true, 32) => (-2147483649.0, 2147483648.0), + (true, 64) => (-9223372036854777856.0, 9223372036854775808.0), + (false, 8) => (-1., u8::max_value() as f64 + 1.), + (false, 16) => (-1., u16::max_value() as f64 + 1.), + (false, 32) => (-1., 4294967296.0), + (false, 64) => (-1., 18446744073709551616.0), + _ => unreachable!(), + } +} diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs new file mode 100644 index 000000000000..122e7bc97c1c --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -0,0 +1,3426 @@ +//! Riscv64 ISA: binary code emission. + +use crate::binemit::StackMap; +use crate::ir::{self, RelSourceLoc, TrapCode}; +use crate::isa::zkasm::inst::*; +use crate::machinst::{AllocationConsumer, Reg, Writable}; +use crate::trace; +use cranelift_control::ControlPlane; +use cranelift_entity::EntityRef; +use regalloc2::Allocation; + +pub struct EmitInfo { + shared_flag: settings::Flags, + isa_flags: super::super::riscv_settings::Flags, +} + +impl EmitInfo { + pub(crate) fn new( + shared_flag: settings::Flags, + isa_flags: super::super::riscv_settings::Flags, + ) -> Self { + Self { + shared_flag, + isa_flags, + } + } +} + +/// load constant by put the constant in the code stream. +/// calculate the pc and using load instruction. +/// This is only allow used in the emit stage. +/// Because of those instruction must execute together. +/// see https://github.com/bytecodealliance/wasmtime/pull/5612 +#[derive(Clone, Copy)] +pub(crate) enum LoadConstant { + U32(u32), + U64(u64), +} + +#[allow(unused)] +impl LoadConstant { + fn to_le_bytes(self) -> Vec { + match self { + LoadConstant::U32(x) => Vec::from_iter(x.to_le_bytes().into_iter()), + LoadConstant::U64(x) => Vec::from_iter(x.to_le_bytes().into_iter()), + } + } + fn load_op(self) -> LoadOP { + match self { + LoadConstant::U32(_) => LoadOP::Lwu, + LoadConstant::U64(_) => LoadOP::Ld, + } + } + fn load_ty(self) -> Type { + match self { + LoadConstant::U32(_) => R32, + LoadConstant::U64(_) => R64, + } + } + + pub(crate) fn load_constant Writable>( + self, + rd: Writable, + alloc_tmp: &mut F, + ) -> SmallInstVec { + todo!() + /* + let mut insts = SmallInstVec::new(); + // get current pc. + let pc = alloc_tmp(I64); + insts.push(Inst::Auipc { + rd: pc, + imm: Imm20 { bits: 0 }, + }); + // load + insts.push(Inst::Load { + rd, + op: self.load_op(), + flags: MemFlags::new(), + from: AMode::RegOffset(pc.to_reg(), 12, self.load_ty()), + }); + let data = self.to_le_bytes(); + // jump over. + insts.push(Inst::Jal { + dest: BranchTarget::ResolvedOffset(Inst::INSTRUCTION_SIZE + data.len() as i32), + }); + insts.push(Inst::RawData { data }); + insts + */ + } + + // load and perform an extra add. + pub(crate) fn load_constant_and_add(self, rd: Writable, rs: Reg) -> SmallInstVec { + todo!() + /* + let mut insts = self.load_constant(rd, &mut |_| rd); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd, + rs1: rd.to_reg(), + rs2: rs, + }); + insts + */ + } +} + +pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 { + u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap() +} + +#[derive(Clone, Debug, PartialEq, Default)] +pub enum EmitVState { + #[default] + Unknown, + Known(VState), +} + +/// State carried between emissions of a sequence of instructions. +#[derive(Default, Clone, Debug)] +pub struct EmitState { + pub(crate) virtual_sp_offset: i64, + pub(crate) nominal_sp_to_fp: i64, + /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. + stack_map: Option, + /// Current source-code location corresponding to instruction to be emitted. + cur_srcloc: RelSourceLoc, + /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and + /// optimized away at compiletime. See [cranelift_control]. + ctrl_plane: ControlPlane, + /// Vector State + /// Controls the current state of the vector unit at the emission point. + vstate: EmitVState, +} + +impl EmitState { + fn take_stack_map(&mut self) -> Option { + self.stack_map.take() + } + + fn clear_post_insn(&mut self) { + self.stack_map = None; + } + + fn cur_srcloc(&self) -> RelSourceLoc { + self.cur_srcloc + } +} + +impl MachInstEmitState for EmitState { + fn new( + abi: &Callee, + ctrl_plane: ControlPlane, + ) -> Self { + EmitState { + virtual_sp_offset: 0, + nominal_sp_to_fp: abi.frame_size() as i64, + stack_map: None, + cur_srcloc: RelSourceLoc::default(), + ctrl_plane, + vstate: EmitVState::Unknown, + } + } + + fn pre_safepoint(&mut self, stack_map: StackMap) { + self.stack_map = Some(stack_map); + } + + fn pre_sourceloc(&mut self, srcloc: RelSourceLoc) { + self.cur_srcloc = srcloc; + } + + fn ctrl_plane_mut(&mut self) -> &mut ControlPlane { + &mut self.ctrl_plane + } + + fn take_ctrl_plane(self) -> ControlPlane { + self.ctrl_plane + } + + fn on_new_block(&mut self) { + // Reset the vector state. + self.vstate = EmitVState::Unknown; + } +} + +#[allow(unused)] +impl Inst { + /// construct a "imm - rs". + pub(crate) fn construct_imm_sub_rs(rd: Writable, imm: u64, rs: Reg) -> SmallInstVec { + todo!() + /* let mut insts = Inst::load_constant_u64(rd, imm, &mut |_| rd); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Sub, + rd, + rs1: rd.to_reg(), + rs2: rs, + }); + insts */ + } + + /// Load int mask. + /// If ty is int then 0xff in rd. + pub(crate) fn load_int_mask(rd: Writable, ty: Type) -> SmallInstVec { + todo!() + /* let mut insts = SmallInstVec::new(); + assert!(ty.is_int() && ty.bits() <= 64); + match ty { + I64 => { + insts.push(Inst::load_imm12(rd, Imm12::from_bits(-1))); + } + I32 | I16 => { + insts.push(Inst::load_imm12(rd, Imm12::from_bits(-1))); + insts.push(Inst::Extend { + rd: rd, + rn: rd.to_reg(), + signed: false, + from_bits: ty.bits() as u8, + to_bits: 64, + }); + } + I8 => { + insts.push(Inst::load_imm12(rd, Imm12::from_bits(255))); + } + _ => unreachable!("ty:{:?}", ty), + } + insts */ + } + /// inverse all bit + pub(crate) fn construct_bit_not(rd: Writable, rs: Reg) -> Inst { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Xori, + rd, + rs, + imm12: Imm12::from_bits(-1), + } + } + + // emit a float is not a nan. + pub(crate) fn emit_not_nan(rd: Writable, rs: Reg, ty: Type) -> Inst { + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FeqS + } else { + FpuOPRRR::FeqD + }, + frm: None, + rd: rd, + rs1: rs, + rs2: rs, + } + } + + pub(crate) fn emit_fabs(rd: Writable, rs: Reg, ty: Type) -> Inst { + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FsgnjxS + } else { + FpuOPRRR::FsgnjxD + }, + frm: None, + rd: rd, + rs1: rs, + rs2: rs, + } + } + /// If a float is zero. + pub(crate) fn emit_if_float_not_zero( + tmp: Writable, + rs: Reg, + ty: Type, + taken: BranchTarget, + not_taken: BranchTarget, + ) -> SmallInstVec { + todo!() + /* let mut insts = SmallInstVec::new(); + let class_op = if ty == F32 { + FpuOPRR::FclassS + } else { + FpuOPRR::FclassD + }; + insts.push(Inst::FpuRR { + alu_op: class_op, + frm: None, + rd: tmp, + rs: rs, + }); + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(FClassResult::is_zero_bits() as i16), + }); + insts.push(Inst::CondBr { + taken, + not_taken, + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + }); + insts */ + } + pub(crate) fn emit_fneg(rd: Writable, rs: Reg, ty: Type) -> Inst { + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FsgnjnS + } else { + FpuOPRRR::FsgnjnD + }, + frm: None, + rd: rd, + rs1: rs, + rs2: rs, + } + } + + pub(crate) fn lower_br_icmp( + cc: IntCC, + a: ValueRegs, + b: ValueRegs, + taken: BranchTarget, + not_taken: BranchTarget, + ty: Type, + ) -> SmallInstVec { + todo!() + /* let mut insts = SmallInstVec::new(); + if ty.bits() <= 64 { + let rs1 = a.only_reg().unwrap(); + let rs2 = b.only_reg().unwrap(); + let inst = Inst::CondBr { + taken, + not_taken, + kind: IntegerCompare { kind: cc, rs1, rs2 }, + }; + insts.push(inst); + return insts; + } + // compare i128 + let low = |cc: IntCC| -> IntegerCompare { + IntegerCompare { + rs1: a.regs()[0], + rs2: b.regs()[0], + kind: cc, + } + }; + let high = |cc: IntCC| -> IntegerCompare { + IntegerCompare { + rs1: a.regs()[1], + rs2: b.regs()[1], + kind: cc, + } + }; + match cc { + IntCC::Equal => { + // if high part not equal, + // then we can go to not_taken otherwise fallthrough. + insts.push(Inst::CondBr { + taken: not_taken, + not_taken: BranchTarget::zero(), + kind: high(IntCC::NotEqual), + }); + // the rest part. + insts.push(Inst::CondBr { + taken, + not_taken, + kind: low(IntCC::Equal), + }); + } + + IntCC::NotEqual => { + // if the high part not equal , + // we know the whole must be not equal, + // we can goto the taken part , otherwise fallthrought. + insts.push(Inst::CondBr { + taken, + not_taken: BranchTarget::zero(), // no branch + kind: high(IntCC::NotEqual), + }); + + insts.push(Inst::CondBr { + taken, + not_taken, + kind: low(IntCC::NotEqual), + }); + } + IntCC::SignedGreaterThanOrEqual + | IntCC::SignedLessThanOrEqual + | IntCC::UnsignedGreaterThanOrEqual + | IntCC::UnsignedLessThanOrEqual + | IntCC::SignedGreaterThan + | IntCC::SignedLessThan + | IntCC::UnsignedLessThan + | IntCC::UnsignedGreaterThan => { + // + insts.push(Inst::CondBr { + taken, + not_taken: BranchTarget::zero(), + kind: high(cc.without_equal()), + }); + // + insts.push(Inst::CondBr { + taken: not_taken, + not_taken: BranchTarget::zero(), + kind: high(IntCC::NotEqual), + }); + insts.push(Inst::CondBr { + taken, + not_taken, + kind: low(cc.unsigned()), + }); + } + } + insts */ + } + + /// Returns Some(VState) if this insturction is expecting a specific vector state + /// before emission. + fn expected_vstate(&self) -> Option<&VState> { + match self { + Inst::Nop0 + | Inst::Nop4 + | Inst::Label { .. } + | Inst::BrTable { .. } + | Inst::Auipc { .. } + | Inst::Lui { .. } + | Inst::LoadConst32 { .. } + | Inst::LoadConst64 { .. } + | Inst::AluRRR { .. } + | Inst::AddImm32 { .. } + | Inst::MulImm32 { .. } + | Inst::FpuRRR { .. } + | Inst::AluRRImm12 { .. } + | Inst::Load { .. } + | Inst::Store { .. } + | Inst::Args { .. } + | Inst::Ret { .. } + | Inst::Extend { .. } + | Inst::AdjustSp { .. } + | Inst::Call { .. } + | Inst::CallInd { .. } + | Inst::ReturnCall { .. } + | Inst::ReturnCallInd { .. } + | Inst::TrapIf { .. } + | Inst::Jal { .. } + | Inst::CondBr { .. } + | Inst::LoadExtName { .. } + | Inst::LoadAddr { .. } + | Inst::VirtualSPOffsetAdj { .. } + | Inst::Mov { .. } + | Inst::MovFromPReg { .. } + | Inst::Fence { .. } + | Inst::FenceI + | Inst::ECall + | Inst::EBreak + | Inst::Udf { .. } + | Inst::FpuRR { .. } + | Inst::FpuRRRR { .. } + | Inst::Jalr { .. } + | Inst::Atomic { .. } + | Inst::Select { .. } + | Inst::AtomicCas { .. } + | Inst::IntSelect { .. } + | Inst::Icmp { .. } + | Inst::SelectReg { .. } + | Inst::FcvtToInt { .. } + | Inst::RawData { .. } + | Inst::AtomicStore { .. } + | Inst::AtomicLoad { .. } + | Inst::AtomicRmwLoop { .. } + | Inst::TrapIfC { .. } + | Inst::Unwind { .. } + | Inst::DummyUse { .. } + | Inst::FloatRound { .. } + | Inst::FloatSelect { .. } + | Inst::Popcnt { .. } + | Inst::Rev8 { .. } + | Inst::Cltz { .. } + | Inst::Brev8 { .. } + | Inst::StackProbeLoop { .. } => None, + + // VecSetState does not expect any vstate, rather it updates it. + Inst::VecSetState { .. } => None, + + // `vmv` instructions copy a set of registers and ignore vstate. + Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None, + + Inst::VecAluRR { vstate, .. } | + Inst::VecAluRRR { vstate, .. } | + Inst::VecAluRRRR { vstate, .. } | + Inst::VecAluRImm5 { vstate, .. } | + Inst::VecAluRRImm5 { vstate, .. } | + Inst::VecAluRRRImm5 { vstate, .. } | + // TODO: Unit-stride loads and stores only need the AVL to be correct, not + // the full vtype. A future optimization could be to decouple these two when + // updating vstate. This would allow us to avoid emitting a VecSetState in + // some cases. + Inst::VecLoad { vstate, .. } + | Inst::VecStore { vstate, .. } => Some(vstate), + } + } +} + +fn put_string(s: &str, sink: &mut MachBuffer) { + sink.put_data(" ".as_bytes()); + sink.put_data(s.as_bytes()); +} + +fn access_reg_with_offset(reg: Reg, offset: i16) -> String { + let name = reg_name(reg); + match offset.cmp(&0) { + core::cmp::Ordering::Less => format!("{name} - {}", -offset), + core::cmp::Ordering::Equal => name, + core::cmp::Ordering::Greater => format!("{name} + {}", offset), + } +} + +#[allow(unused)] +impl MachInstEmit for Inst { + type State = EmitState; + type Info = EmitInfo; + + fn emit( + &self, + allocs: &[Allocation], + sink: &mut MachBuffer, + emit_info: &Self::Info, + state: &mut EmitState, + ) { + let mut allocs = AllocationConsumer::new(allocs); + + // Check if we need to update the vector state before emitting this instruction + if let Some(expected) = self.expected_vstate() { + if state.vstate != EmitVState::Known(expected.clone()) { + // Update the vector state. + Inst::VecSetState { + rd: writable_zero_reg(), + vstate: expected.clone(), + } + .emit(&[], sink, emit_info, state); + } + } + + // N.B.: we *must* not exceed the "worst-case size" used to compute + // where to insert islands, except when islands are explicitly triggered + // (with an `EmitIsland`). We check this in debug builds. This is `mut` + // to allow disabling the check for `JTSequence`, which is always + // emitted following an `EmitIsland`. + let mut start_off = sink.cur_offset(); + match self { + &Inst::Nop0 => { + // do nothing + } + // Addi x0, x0, 0 + &Inst::Nop4 => { + todo!() /* let x = Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: Writable::from_reg(zero_reg()), + rs: zero_reg(), + imm12: Imm12::zero(), + }; + x.emit(&[], sink, emit_info, state) */ + } + &Inst::Label { imm } => { + sink.put_data(format!("label_{imm}:\n").as_bytes()); + } + &Inst::RawData { ref data } => { + // Right now we only put a u32 or u64 in this instruction. + // It is not very long, no need to check if need `emit_island`. + // If data is very long , this is a bug because RawData is typecial + // use to load some data and rely on some positon in the code stream. + // and we may exceed `Inst::worst_case_size`. + // for more information see https://github.com/bytecodealliance/wasmtime/pull/5612. + todo!() // sink.put_data(&data[..]); + } + &Inst::Lui { rd, ref imm } => { + todo!() /* let rd = allocs.next_writable(rd); + let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.as_u32() << 12); + sink.put4(x); */ + } + &Inst::LoadConst32 { rd, imm } => { + todo!() /* let rd = allocs.next_writable(rd); + LoadConstant::U32(imm) + .load_constant(rd, &mut |_| rd) + .into_iter() + .for_each(|inst| inst.emit(&[], sink, emit_info, state)); */ + } + &Inst::LoadConst64 { rd, imm } => { + todo!() /* let rd = allocs.next_writable(rd); + LoadConstant::U64(imm) + .load_constant(rd, &mut |_| rd) + .into_iter() + .for_each(|inst| inst.emit(&[], sink, emit_info, state)); */ + } + &Inst::FpuRR { + frm, + alu_op, + rd, + rs, + } => { + todo!() /* let rs = allocs.next(rs); + let rd = allocs.next_writable(rd); + let x = alu_op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | alu_op.funct3(frm) << 12 + | reg_to_gpr_num(rs) << 15 + | alu_op.rs2_funct5() << 20 + | alu_op.funct7() << 25; + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && alu_op.is_convert_to_int() { + sink.add_trap(TrapCode::BadConversionToInteger); + } + sink.put4(x); */ + } + &Inst::FpuRRRR { + alu_op, + rd, + rs1, + rs2, + rs3, + frm, + } => { + todo!() /* let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let rs3 = allocs.next(rs3); + let rd = allocs.next_writable(rd); + let x = alu_op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | alu_op.funct3(frm) << 12 + | reg_to_gpr_num(rs1) << 15 + | reg_to_gpr_num(rs2) << 20 + | alu_op.funct2() << 25 + | reg_to_gpr_num(rs3) << 27; + + sink.put4(x); */ + } + &Inst::FpuRRR { + alu_op, + frm, + rd, + rs1, + rs2, + } => { + todo!() /* let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let rd = allocs.next_writable(rd); + + let x: u32 = alu_op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | (alu_op.funct3(frm)) << 12 + | reg_to_gpr_num(rs1) << 15 + | reg_to_gpr_num(rs2) << 20 + | alu_op.funct7() << 25; + sink.put4(x); */ + } + &Inst::Unwind { ref inst } => { + put_string(&format!("Unwind\n"), sink); + // sink.add_unwind(inst.clone()); + } + &Inst::DummyUse { reg } => { + todo!() // allocs.next(reg); + } + &Inst::AddImm32 { rd, src1, src2 } => { + let rd = allocs.next(rd.to_reg()); + // TODO(akashin): Should we have a function for `bits` field? + put_string( + &format!("{} + {} => {}\n", src1.bits, src2.bits, reg_name(rd)), + sink, + ); + } + &Inst::MulImm32 { rd, src1, src2 } => { + let rd = allocs.next(rd.to_reg()); + // TODO(akashin): Should we have a function for `bits` field? + put_string( + &format!("{} * {} => {}\n", src1.bits, src2.bits, reg_name(rd)), + sink, + ); + } + &Inst::AluRRR { + alu_op, + rd, + rs1, + rs2, + } => { + let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + debug_assert_eq!(rs1, a0()); + debug_assert_eq!(rs2, b0()); + let rd = allocs.next_writable(rd); + put_string( + &format!("$ => {} :{}\n", reg_name(rd.to_reg()), alu_op.op_name()), + sink, + ); + + /* + let (rs1, rs2) = if alu_op.reverse_rs() { + (rs2, rs1) + } else { + (rs1, rs2) + }; + + sink.put4(encode_r_type( + alu_op.op_code(), + rd, + alu_op.funct3(), + rs1, + rs2, + alu_op.funct7(), + )); */ + } + &Inst::AluRRImm12 { + alu_op, + rd, + rs, + imm12, + } => { + let rs = allocs.next(rs); + let rd = allocs.next_writable(rd); + match alu_op { + AluOPRRI::Addi => { + put_string( + &format!( + "{} + {} => {}\n", + reg_name(rs), + imm12.bits, + reg_name(rd.to_reg()) + ), + sink, + ); + } + AluOPRRI::Slli => { + put_string( + &format!( + "{} << {} => {}\n", + reg_name(rs), + imm12.bits, + reg_name(rd.to_reg()) + ), + sink, + ); + } + AluOPRRI::Srli => { + put_string( + &format!( + "{} >> {} => {}\n", + reg_name(rs), + imm12.bits, + reg_name(rd.to_reg()) + ), + sink, + ); + } + _ => unreachable!("Op {:?} is not implemented", alu_op), + }; + + // let x = alu_op.op_code() + // | reg_to_gpr_num(rd.to_reg()) << 7 + // | alu_op.funct3() << 12 + // | reg_to_gpr_num(rs) << 15 + // | alu_op.imm12(imm12) << 20; + // sink.put4(x); + } + &Inst::Load { + rd, + op, + from, + flags, + } => { + let from = from.clone().with_allocs(&mut allocs); + let base = from.get_base_register(); + let offset = from.get_offset_with_state(state); + let offset_imm12 = Imm12::maybe_from_u64(offset as u64); + let rd = allocs.next_writable(rd); + + let (addr, imm12) = match (base, offset_imm12) { + // If the offset fits into an imm12 we can directly encode it. + (Some(base), Some(imm12)) => (base, imm12), + // Otherwise load the address it into a reg and load from it. + _ => { + let tmp = writable_spilltmp_reg(); + Inst::LoadAddr { rd: tmp, mem: from }.emit(&[], sink, emit_info, state); + (tmp.to_reg(), Imm12::zero()) + } + }; + put_string( + &format!( + "$ => {} :MLOAD({})\n", + reg_name(rd.to_reg()), + access_reg_with_offset(addr, imm12.bits), + ), + sink, + ); + + // let srcloc = state.cur_srcloc(); + // if !srcloc.is_default() && !flags.notrap() { + // // Register the offset at which the actual load instruction starts. + // sink.add_trap(TrapCode::HeapOutOfBounds); + // } + // + // sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12)); + } + &Inst::Store { op, src, flags, to } => { + let to = to.clone().with_allocs(&mut allocs); + let src = allocs.next(src); + + let base = to.get_base_register(); + let offset = to.get_offset_with_state(state); + let offset_imm12 = Imm12::maybe_from_u64(offset as u64); + + let (addr, imm12) = match (base, offset_imm12) { + // If the offset fits into an imm12 we can directly encode it. + (Some(base), Some(imm12)) => (base, imm12), + // Otherwise load the address it into a reg and load from it. + _ => { + let tmp = writable_spilltmp_reg(); + Inst::LoadAddr { rd: tmp, mem: to }.emit(&[], sink, emit_info, state); + (tmp.to_reg(), Imm12::zero()) + } + }; + put_string( + &format!( + "{} :MSTORE({})\n", + reg_name(src), + access_reg_with_offset(addr, imm12.bits), + ), + sink, + ); + + // let srcloc = state.cur_srcloc(); + // if !srcloc.is_default() && !flags.notrap() { + // // Register the offset at which the actual load instruction starts. + // sink.add_trap(TrapCode::HeapOutOfBounds); + // } + // + // sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12)); + } + &Inst::Args { .. } => { + // Nothing: this is a pseudoinstruction that serves + // only to constrain registers at a certain point. + } + &Inst::Ret { + stack_bytes_to_pop, .. + } => { + // put_string(&format!("RETURN\n"), sink); + put_string(&format!(":JMP(RR)\n"), sink); + + /* if stack_bytes_to_pop != 0 { + Inst::AdjustSp { + amount: i64::from(stack_bytes_to_pop), + } + .emit(&[], sink, emit_info, state); + } + //jalr x0, x1, 0 + let x: u32 = (0b1100111) | (1 << 15); + sink.put4(x); */ + } + + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits: _to_bits, + } => { + todo!() /* let rn = allocs.next(rn); + let rd = allocs.next_writable(rd); + let mut insts = SmallInstVec::new(); + let shift_bits = (64 - from_bits) as i16; + let is_u8 = || from_bits == 8 && signed == false; + if is_u8() { + // special for u8. + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd, + rs: rn, + imm12: Imm12::from_bits(255), + }); + } else { + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd, + rs: rn, + imm12: Imm12::from_bits(shift_bits), + }); + insts.push(Inst::AluRRImm12 { + alu_op: if signed { + AluOPRRI::Srai + } else { + AluOPRRI::Srli + }, + rd, + rs: rd.to_reg(), + imm12: Imm12::from_bits(shift_bits), + }); + } + insts + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); */ + } + &Inst::AdjustSp { amount } => { + let amount = if amount > 0 { + format!("- {}", amount) + } else { + format!("+ {}", -amount) + }; + put_string(&format!("SP {amount} => SP\n"), sink); + + /* if let Some(imm) = Imm12::maybe_from_u64(amount as u64) { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: writable_stack_reg(), + rs: stack_reg(), + imm12: imm, + } + .emit(&[], sink, emit_info, state); + } else { + let tmp = writable_spilltmp_reg(); + let mut insts = Inst::load_constant_u64(tmp, amount as u64, &mut |_| tmp); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: writable_stack_reg(), + rs1: tmp.to_reg(), + rs2: stack_reg(), + }); + insts + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } */ + } + &Inst::Call { ref info } => { + // call + match info.dest { + ExternalName::User(name) => { + // For now we only support calls. + assert!(info.opcode.is_call()); + sink.add_call_site(info.opcode); + sink.add_reloc(Reloc::RiscvCall, &info.dest, 0); + // This will be patched externally to do a necessary jump. + put_string(&format!("; CALL {name}\n"), sink); + + // match name.index() { + // // Special case for ASSERT call. + // 0 => { + // Inst::Mov { + // ty: types::I64, + // rd: regs::writable_a0(), + // rm: info.uses[0].preg, + // } + // .emit(&[], sink, emit_info, state); + // put_string( + // &format!("{} :ASSERT\n", reg_name(info.uses[1].preg)), + // sink, + // ); + // } + // v => { + // Inst::Jal { + // dest: BranchTarget::Label(MachLabel::new(v)), + // } + // .emit(&[], sink, emit_info, state); + // } + // }; + + // if let Some(s) = state.take_stack_map() { + // sink.add_stack_map(StackMapExtent::UpcomingBytes(8), s); + // } + // Inst::construct_auipc_and_jalr( + // Some(writable_link_reg()), + // writable_link_reg(), + // 0, + // ) + // .into_iter() + // .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + ExternalName::LibCall(..) + | ExternalName::TestCase { .. } + | ExternalName::KnownSymbol(..) => { + unimplemented!(); + // use indirect call. it is more simple. + // load ext name. + // Inst::LoadExtName { + // rd: writable_spilltmp_reg2(), + // name: Box::new(info.dest.clone()), + // offset: 0, + // } + // .emit(&[], sink, emit_info, state); + // + // if let Some(s) = state.take_stack_map() { + // sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); + // } + // if info.opcode.is_call() { + // sink.add_call_site(info.opcode); + // } + // call + // Inst::Jalr { + // rd: writable_link_reg(), + // base: spilltmp_reg2(), + // offset: Imm12::zero(), + // } + // .emit(&[], sink, emit_info, state); + } + } + + let callee_pop_size = i64::from(info.callee_pop_size); + state.virtual_sp_offset -= callee_pop_size; + trace!( + "call adjusts virtual sp offset by {callee_pop_size} -> {}", + state.virtual_sp_offset + ); + } + &Inst::CallInd { ref info } => { + // let rn = allocs.next(info.rn); + // put_string(&format!("CALL {}, {:?}\n", reg_name(rn), info.uses), sink); + + dbg!(info); + todo!(); + // For now we only support calls. + // assert!(info.opcode.is_call()); + + /* + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); + } + + Inst::Jalr { + rd: writable_link_reg(), + base: rn, + offset: Imm12::zero(), + } + .emit(&[], sink, emit_info, state); + + let callee_pop_size = i64::from(info.callee_pop_size); + state.virtual_sp_offset -= callee_pop_size; + trace!( + "call adjusts virtual sp offset by {callee_pop_size} -> {}", + state.virtual_sp_offset + ); */ + } + + &Inst::ReturnCall { + ref callee, + ref info, + } => { + todo!() /* emit_return_call_common_sequence( + &mut allocs, + sink, + emit_info, + state, + info.new_stack_arg_size, + info.old_stack_arg_size, + &info.uses, + ); + + sink.add_call_site(ir::Opcode::ReturnCall); + sink.add_reloc(Reloc::RiscvCall, &callee, 0); + Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + // `emit_return_call_common_sequence` emits an island if + // necessary, so we can safely disable the worst-case-size check + // in this case. + start_off = sink.cur_offset(); */ + } + + &Inst::ReturnCallInd { callee, ref info } => { + todo!() /* let callee = allocs.next(callee); + + emit_return_call_common_sequence( + &mut allocs, + sink, + emit_info, + state, + info.new_stack_arg_size, + info.old_stack_arg_size, + &info.uses, + ); + + Inst::Jalr { + rd: writable_zero_reg(), + base: callee, + offset: Imm12::zero(), + } + .emit(&[], sink, emit_info, state); + + // `emit_return_call_common_sequence` emits an island if + // necessary, so we can safely disable the worst-case-size check + // in this case. + start_off = sink.cur_offset(); */ + } + + &Inst::Jal { dest } => { + match dest { + BranchTarget::Label(label) => { + // TODO: the following two lines allow eg. optimizing out jump-to-here + // sink.use_label_at_offset(start_off, label, LabelUse::Jal20); + // sink.add_uncond_branch(start_off, start_off + 4, label); + put_string(&format!(":JMP(label_{})\n", label.index()), sink); + } + BranchTarget::ResolvedOffset(offset) => { + todo!() /* + let offset = offset as i64; + if offset != 0 { + if LabelUse::Jal20.offset_in_range(offset) { + let mut code = code.to_le_bytes(); + LabelUse::Jal20.patch_raw_offset(&mut code, offset); + sink.put_data(&code[..]); + } else { + Inst::construct_auipc_and_jalr( + None, + writable_spilltmp_reg(), + offset, + ) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + } else { + // CondBr often generate Jal {dest : 0}, means otherwise no jump. + } */ + } + } + } + &Inst::CondBr { + taken, + not_taken, + mut kind, + } => { + kind.rs1 = allocs.next(kind.rs1); + kind.rs2 = allocs.next(kind.rs2); + // TODO(akashin): Support other types of comparisons. + assert!(matches!(kind.kind, IntCC::NotEqual)); + assert_eq!(kind.rs2, zero_reg()); + match taken { + BranchTarget::Label(label) => { + put_string( + &format!("{} :JMPNZ(label_{})\n", reg_name(kind.rs1), label.index()), + sink, + ); + + // let code = kind.emit(); + // let code_inverse = kind.inverse().emit().to_le_bytes(); + // sink.use_label_at_offset(start_off, label, LabelUse::B12); + // sink.add_cond_branch(start_off, start_off + 4, label, &code_inverse); + // sink.put4(code); + } + BranchTarget::ResolvedOffset(offset) => { + assert!(offset != 0); + todo!(); + + // if LabelUse::B12.offset_in_range(offset as i64) { + // let code = kind.emit(); + // let mut code = code.to_le_bytes(); + // LabelUse::B12.patch_raw_offset(&mut code, offset as i64); + // sink.put_data(&code[..]) + // } else { + // let mut code = kind.emit().to_le_bytes(); + // // jump over the condbr , 4 bytes. + // LabelUse::B12.patch_raw_offset(&mut code[..], 4); + // sink.put_data(&code[..]); + // Inst::construct_auipc_and_jalr( + // None, + // writable_spilltmp_reg(), + // offset as i64, + // ) + // .into_iter() + // .for_each(|i| i.emit(&[], sink, emit_info, state)); + // } + } + } + // TODO(akashin): Can also merge this as an else in jump. + Inst::Jal { dest: not_taken }.emit(&[], sink, emit_info, state); + } + + &Inst::Mov { rd, rm, ty } => { + if rd.to_reg() == rm { + return; + } + + let rm = allocs.next(rm); + let rd = allocs.next_writable(rd); + put_string( + &format!("{} => {}\n", reg_name(rm), reg_name(rd.to_reg())), + sink, + ); + + // match rm.class() { + // RegClass::Int => Inst::AluRRImm12 { + // alu_op: AluOPRRI::Ori, + // rd: rd, + // rs: rm, + // imm12: Imm12::zero(), + // }, + // RegClass::Float => Inst::FpuRRR { + // alu_op: if ty == F32 { + // FpuOPRRR::FsgnjS + // } else { + // FpuOPRRR::FsgnjD + // }, + // frm: None, + // rd: rd, + // rs1: rm, + // rs2: rm, + // }, + // RegClass::Vector => Inst::VecAluRRImm5 { + // op: VecAluOpRRImm5::VmvrV, + // vd: rd, + // vs2: rm, + // // Imm 0 means copy 1 register. + // imm: Imm5::maybe_from_i8(0).unwrap(), + // mask: VecOpMasking::Disabled, + // // Vstate for this instruction is ignored. + // vstate: VState::from_type(ty), + // }, + // } + // .emit(&[], sink, emit_info, state); + } + + &Inst::MovFromPReg { rd, rm } => { + todo!() /* debug_assert!([px_reg(2), px_reg(8)].contains(&rm)); + let rd = allocs.next_writable(rd); + let x = Inst::AluRRImm12 { + alu_op: AluOPRRI::Ori, + rd, + rs: Reg::from(rm), + imm12: Imm12::zero(), + }; + x.emit(&[], sink, emit_info, state); */ + } + + &Inst::BrTable { + index, + tmp1, + tmp2, + ref targets, + } => { + todo!() /* let index = allocs.next(index); + let tmp1 = allocs.next_writable(tmp1); + let tmp2 = allocs.next_writable(tmp2); + let ext_index = writable_spilltmp_reg(); + + // The default target is passed in as the 0th element of `targets` + // separate it here for clarity. + let default_target = targets[0]; + let targets = &targets[1..]; + + // We emit a bounds check on the index, if the index is larger than the number of + // jump table entries, we jump to the default block. Otherwise we compute a jump + // offset by multiplying the index by 8 (the size of each entry) and then jump to + // that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially. + // + // Build the following sequence: + // + // extend_index: + // zext.w ext_index, index + // bounds_check: + // li tmp, n_labels + // bltu ext_index, tmp, compute_target + // jump_to_default_block: + // auipc pc, 0 + // jalr zero, pc, default_block + // compute_target: + // auipc pc, 0 + // slli tmp, ext_index, 3 + // add pc, pc, tmp + // jalr zero, pc, 0x10 + // jump_table: + // ; This repeats for each entry in the jumptable + // auipc pc, 0 + // jalr zero, pc, block_target + + // Extend the index to 64 bits. + // + // This prevents us branching on the top 32 bits of the index, which + // are undefined. + Inst::Extend { + rd: ext_index, + rn: index, + signed: false, + from_bits: 32, + to_bits: 64, + } + .emit(&[], sink, emit_info, state); + + // Bounds check. + // + // Check if the index passed in is larger than the number of jumptable + // entries that we have. If it is, we fallthrough to a jump into the + // default block. + Inst::load_constant_u32(tmp2, targets.len() as u64, &mut |_| tmp2) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::CondBr { + taken: BranchTarget::offset(Inst::INSTRUCTION_SIZE * 3), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::UnsignedLessThan, + rs1: ext_index.to_reg(), + rs2: tmp2.to_reg(), + }, + } + .emit(&[], sink, emit_info, state); + sink.use_label_at_offset( + sink.cur_offset(), + default_target.as_label().unwrap(), + LabelUse::PCRel32, + ); + Inst::construct_auipc_and_jalr(None, tmp2, 0) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + // Compute the jump table offset. + // We need to emit a PC relative offset, + + // Get the current PC. + Inst::Auipc { + rd: tmp1, + imm: Imm20::from_bits(0), + } + .emit(&[], sink, emit_info, state); + + // Multiply the index by 8, since that is the size in + // bytes of each jump table entry + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp2, + rs: ext_index.to_reg(), + imm12: Imm12::from_bits(3), + } + .emit(&[], sink, emit_info, state); + + // Calculate the base of the jump, PC + the offset from above. + Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: tmp1, + rs1: tmp1.to_reg(), + rs2: tmp2.to_reg(), + } + .emit(&[], sink, emit_info, state); + + // Jump to the middle of the jump table. + // We add a 16 byte offset here, since we used 4 instructions + // since the AUIPC that was used to get the PC. + Inst::Jalr { + rd: writable_zero_reg(), + base: tmp1.to_reg(), + offset: Imm12::from_bits((4 * Inst::INSTRUCTION_SIZE) as i16), + } + .emit(&[], sink, emit_info, state); + + // Emit the jump table. + // + // Each entry is a aupc + jalr to the target block. We also start with a island + // if necessary. + + // Each entry in the jump table is 2 instructions, so 8 bytes. Check if + // we need to emit a jump table here to support that jump. + let distance = (targets.len() * 2 * Inst::INSTRUCTION_SIZE as usize) as u32; + if sink.island_needed(distance) { + sink.emit_island(&mut state.ctrl_plane); + } + + // Emit the jumps back to back + for target in targets.iter() { + sink.use_label_at_offset( + sink.cur_offset(), + target.as_label().unwrap(), + LabelUse::PCRel32, + ); + + Inst::construct_auipc_and_jalr(None, tmp2, 0) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + + // We've just emitted an island that is safe up to *here*. + // Mark it as such so that we don't needlessly emit additional islands. + start_off = sink.cur_offset(); */ + } + + &Inst::VirtualSPOffsetAdj { amount } => { + println!("virtual_sp_offset_adj {amount}"); + // crate::trace!( + // "virtual sp offset adjusted by {} -> {}", + // amount, + // state.virtual_sp_offset + amount + // ); + // state.virtual_sp_offset += amount; + } + &Inst::Atomic { + op, + rd, + addr, + src, + amo, + } => { + todo!() /* let addr = allocs.next(addr); + let src = allocs.next(src); + let rd = allocs.next_writable(rd); + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() { + sink.add_trap(TrapCode::HeapOutOfBounds); + } + let x = op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | op.funct3() << 12 + | reg_to_gpr_num(addr) << 15 + | reg_to_gpr_num(src) << 20 + | op.funct7(amo) << 25; + + sink.put4(x); */ + } + &Inst::Fence { pred, succ } => { + todo!() /* let x = 0b0001111 + | 0b00000 << 7 + | 0b000 << 12 + | 0b00000 << 15 + | (succ as u32) << 20 + | (pred as u32) << 24; + + sink.put4(x); */ + } + &Inst::FenceI => todo!(), // sink.put4(0x0000100f), + &Inst::Auipc { rd, imm } => { + todo!() /* let rd = allocs.next_writable(rd); + let x = enc_auipc(rd, imm); + sink.put4(x); */ + } + + &Inst::LoadAddr { rd, mem } => { + todo!() /* let mem = mem.with_allocs(&mut allocs); + let rd = allocs.next_writable(rd); + + let base = mem.get_base_register(); + let offset = mem.get_offset_with_state(state); + let offset_imm12 = Imm12::maybe_from_u64(offset as u64); + + match (mem, base, offset_imm12) { + (_, Some(rs), Some(imm12)) => { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd, + rs, + imm12, + } + .emit(&[], sink, emit_info, state); + } + (_, Some(rs), None) => { + LoadConstant::U64(offset as u64) + .load_constant_and_add(rd, rs) + .into_iter() + .for_each(|inst| inst.emit(&[], sink, emit_info, state)); + } + (AMode::Const(addr), None, _) => { + // Get an address label for the constant and recurse. + let label = sink.get_label_for_constant(addr); + Inst::LoadAddr { + rd, + mem: AMode::Label(label), + } + .emit(&[], sink, emit_info, state); + } + (AMode::Label(label), None, _) => { + // Get the current PC. + sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20); + let inst = Inst::Auipc { + rd, + imm: Imm20::from_bits(0), + }; + inst.emit(&[], sink, emit_info, state); + + // Emit an add to the address with a relocation. + // This later gets patched up with the correct offset. + sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd, + rs: rd.to_reg(), + imm12: Imm12::zero(), + } + .emit(&[], sink, emit_info, state); + } + (amode, _, _) => { + unimplemented!("LoadAddr: {:?}", amode); + } + } */ + } + + &Inst::Select { + ref dst, + condition, + ref x, + ref y, + ty: _ty, + } => { + todo!() /* let condition = allocs.next(condition); + let x = alloc_value_regs(x, &mut allocs); + let y = alloc_value_regs(y, &mut allocs); + let dst: Vec<_> = dst + .clone() + .into_iter() + .map(|r| allocs.next_writable(r)) + .collect(); + + let mut insts = SmallInstVec::new(); + let label_false = sink.get_label(); + insts.push(Inst::CondBr { + taken: BranchTarget::Label(label_false), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: condition, + rs2: zero_reg(), + }, + }); + // here is the true + // select the first value + insts.extend(gen_moves(&dst[..], x.regs())); + let label_jump_over = sink.get_label(); + insts.push(Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + }); + // here is false + insts + .drain(..) + .for_each(|i: Inst| i.emit(&[], sink, emit_info, state)); + sink.bind_label(label_false, &mut state.ctrl_plane); + // select second value1 + insts.extend(gen_moves(&dst[..], y.regs())); + insts + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ + } + &Inst::Jalr { rd, base, offset } => { + todo!() /* let rd = allocs.next_writable(rd); + let x = enc_jalr(rd, base, offset); + sink.put4(x); */ + } + &Inst::ECall => { + todo!() // sink.put4(0x00000073); + } + &Inst::EBreak => { + todo!() // sink.put4(0x00100073); + } + &Inst::Icmp { + cc, + rd, + ref a, + ref b, + ty, + } => { + let a = alloc_value_regs(a, &mut allocs); + let b = alloc_value_regs(b, &mut allocs); + let rd = allocs.next_writable(rd); + + let a = a + .only_reg() + .expect("Only support 1 register in comparison now"); + let b = b + .only_reg() + .expect("Only support 1 register in comparison now"); + debug_assert_eq!(a, a0()); + debug_assert_eq!(b, b0()); + + let opcode = match cc { + IntCC::Equal => "EQ", + IntCC::NotEqual => "NEQ", + IntCC::SignedLessThan => "SLT", + IntCC::SignedGreaterThanOrEqual => todo!(), + IntCC::SignedGreaterThan => todo!(), + IntCC::SignedLessThanOrEqual => todo!(), + IntCC::UnsignedLessThan => "LT", + IntCC::UnsignedGreaterThanOrEqual => todo!(), + IntCC::UnsignedGreaterThan => todo!(), + IntCC::UnsignedLessThanOrEqual => todo!(), + }; + + put_string(&format!("$ => {} :{opcode}\n", reg_name(rd.to_reg())), sink); + + /* + let label_true = sink.get_label(); + let label_false = sink.get_label(); + Inst::lower_br_icmp( + cc, + a, + b, + BranchTarget::Label(label_true), + BranchTarget::Label(label_false), + ty, + ) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + sink.bind_label(label_true, &mut state.ctrl_plane); + Inst::load_imm12(rd, Imm12::TRUE).emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::offset(Inst::INSTRUCTION_SIZE * 2), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_false, &mut state.ctrl_plane); + Inst::load_imm12(rd, Imm12::FALSE).emit(&[], sink, emit_info, state); */ + } + &Inst::AtomicCas { + offset, + t0, + dst, + e, + addr, + v, + ty, + } => { + todo!() /* let offset = allocs.next(offset); + let e = allocs.next(e); + let addr = allocs.next(addr); + let v = allocs.next(v); + let t0 = allocs.next_writable(t0); + let dst = allocs.next_writable(dst); + + // # addr holds address of memory location + // # e holds expected value + // # v holds desired value + // # dst holds return value + // cas: + // lr.w dst, (addr) # Load original value. + // bne dst, e, fail # Doesn’t match, so fail. + // sc.w t0, v, (addr) # Try to update. + // bnez t0 , cas # if store not ok,retry. + // fail: + let fail_label = sink.get_label(); + let cas_lebel = sink.get_label(); + sink.bind_label(cas_lebel, &mut state.ctrl_plane); + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: dst, + addr, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + if ty.bits() < 32 { + AtomicOP::extract(dst, offset, dst.to_reg(), ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } else if ty.bits() == 32 { + Inst::Extend { + rd: dst, + rn: dst.to_reg(), + signed: false, + from_bits: 32, + to_bits: 64, + } + .emit(&[], sink, emit_info, state); + } + Inst::CondBr { + taken: BranchTarget::Label(fail_label), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: e, + rs2: dst.to_reg(), + }, + } + .emit(&[], sink, emit_info, state); + let store_value = if ty.bits() < 32 { + // reload value to t0. + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: t0, + addr, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + // set reset part. + AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + t0.to_reg() + } else { + v + }; + Inst::Atomic { + op: AtomicOP::store_op(ty), + rd: t0, + addr, + src: store_value, + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + // check is our value stored. + Inst::CondBr { + taken: BranchTarget::Label(cas_lebel), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: t0.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + sink.bind_label(fail_label, &mut state.ctrl_plane); */ + } + &Inst::AtomicRmwLoop { + offset, + op, + dst, + ty, + p, + x, + t0, + } => { + todo!() /* let offset = allocs.next(offset); + let p = allocs.next(p); + let x = allocs.next(x); + let t0 = allocs.next_writable(t0); + let dst = allocs.next_writable(dst); + let retry = sink.get_label(); + sink.bind_label(retry, &mut state.ctrl_plane); + // load old value. + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: dst, + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + // + + let store_value: Reg = match op { + crate::ir::AtomicRmwOp::Add + | crate::ir::AtomicRmwOp::Sub + | crate::ir::AtomicRmwOp::And + | crate::ir::AtomicRmwOp::Or + | crate::ir::AtomicRmwOp::Xor => { + AtomicOP::extract(dst, offset, dst.to_reg(), ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::AluRRR { + alu_op: match op { + crate::ir::AtomicRmwOp::Add => AluOPRRR::Add, + crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub, + crate::ir::AtomicRmwOp::And => AluOPRRR::And, + crate::ir::AtomicRmwOp::Or => AluOPRRR::Or, + crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor, + _ => unreachable!(), + }, + rd: t0, + rs1: dst.to_reg(), + rs2: x, + } + .emit(&[], sink, emit_info, state); + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: writable_spilltmp_reg2(), + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + AtomicOP::merge( + writable_spilltmp_reg2(), + writable_spilltmp_reg(), + offset, + t0.to_reg(), + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + spilltmp_reg2() + } + crate::ir::AtomicRmwOp::Nand => { + if ty.bits() < 32 { + AtomicOP::extract(dst, offset, dst.to_reg(), ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: t0, + rs1: x, + rs2: dst.to_reg(), + } + .emit(&[], sink, emit_info, state); + Inst::construct_bit_not(t0, t0.to_reg()).emit(&[], sink, emit_info, state); + if ty.bits() < 32 { + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: writable_spilltmp_reg2(), + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + AtomicOP::merge( + writable_spilltmp_reg2(), + writable_spilltmp_reg(), + offset, + t0.to_reg(), + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + spilltmp_reg2() + } else { + t0.to_reg() + } + } + + crate::ir::AtomicRmwOp::Umin + | crate::ir::AtomicRmwOp::Umax + | crate::ir::AtomicRmwOp::Smin + | crate::ir::AtomicRmwOp::Smax => { + let label_select_dst = sink.get_label(); + let label_select_done = sink.get_label(); + if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax + { + AtomicOP::extract(dst, offset, dst.to_reg(), ty) + } else { + AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty) + } + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::lower_br_icmp( + match op { + crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan, + crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan, + crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan, + crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan, + _ => unreachable!(), + }, + ValueRegs::one(dst.to_reg()), + ValueRegs::one(x), + BranchTarget::Label(label_select_dst), + BranchTarget::zero(), + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + // here we select x. + Inst::gen_move(t0, x, I64).emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_select_done), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_select_dst, &mut state.ctrl_plane); + Inst::gen_move(t0, dst.to_reg(), I64).emit(&[], sink, emit_info, state); + sink.bind_label(label_select_done, &mut state.ctrl_plane); + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: writable_spilltmp_reg2(), + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + AtomicOP::merge( + writable_spilltmp_reg2(), + writable_spilltmp_reg(), + offset, + t0.to_reg(), + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + spilltmp_reg2() + } + crate::ir::AtomicRmwOp::Xchg => { + AtomicOP::extract(dst, offset, dst.to_reg(), ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: writable_spilltmp_reg2(), + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + AtomicOP::merge( + writable_spilltmp_reg2(), + writable_spilltmp_reg(), + offset, + x, + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + spilltmp_reg2() + } + }; + + Inst::Atomic { + op: AtomicOP::store_op(ty), + rd: t0, + addr: p, + src: store_value, + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + + // if store is not ok,retry. + Inst::CondBr { + taken: BranchTarget::Label(retry), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: t0.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); */ + } + + &Inst::IntSelect { + op, + ref dst, + ref x, + ref y, + ty, + } => { + todo!() /* let x = alloc_value_regs(x, &mut allocs); + let y = alloc_value_regs(y, &mut allocs); + let dst: Vec<_> = dst.iter().map(|r| allocs.next_writable(*r)).collect(); + let label_true = sink.get_label(); + let label_false = sink.get_label(); + let label_done = sink.get_label(); + Inst::lower_br_icmp( + op.to_int_cc(), + x, + y, + BranchTarget::Label(label_true), + BranchTarget::Label(label_false), + ty, + ) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + let gen_move = |dst: &Vec>, + val: &ValueRegs, + sink: &mut MachBuffer, + state: &mut EmitState| { + let mut insts = SmallInstVec::new(); + insts.push(Inst::Mov { + rd: dst[0], + rm: val.regs()[0], + ty: I64, + }); + if ty.bits() == 128 { + insts.push(Inst::Mov { + rd: dst[1], + rm: val.regs()[1], + ty, + }); + } + insts + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + }; + //here is true , use x. + sink.bind_label(label_true, &mut state.ctrl_plane); + gen_move(&dst, &x, sink, state); + Inst::gen_jump(label_done).emit(&[], sink, emit_info, state); + // here is false use y + sink.bind_label(label_false, &mut state.ctrl_plane); + gen_move(&dst, &y, sink, state); + sink.bind_label(label_done, &mut state.ctrl_plane); */ + } + + &Inst::SelectReg { + condition, + rd, + rs1, + rs2, + } => { + todo!() /* let mut condition = condition.clone(); + condition.rs1 = allocs.next(condition.rs1); + condition.rs2 = allocs.next(condition.rs2); + let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let rd = allocs.next_writable(rd); + let label_true = sink.get_label(); + let label_jump_over = sink.get_label(); + let ty = Inst::canonical_type_for_rc(rs1.class()); + + sink.use_label_at_offset(sink.cur_offset(), label_true, LabelUse::B12); + let x = condition.emit(); + sink.put4(x); + // here is false , use rs2 + Inst::gen_move(rd, rs2, ty).emit(&[], sink, emit_info, state); + // and jump over + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here condition is true , use rs1 + sink.bind_label(label_true, &mut state.ctrl_plane); + Inst::gen_move(rd, rs1, ty).emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ + } + &Inst::FcvtToInt { + is_sat, + rd, + rs, + is_signed, + in_type, + out_type, + tmp, + } => { + todo!() /* let rs = allocs.next(rs); + let tmp = allocs.next_writable(tmp); + let rd = allocs.next_writable(rd); + let label_nan = sink.get_label(); + let label_jump_over = sink.get_label(); + // get if nan. + Inst::emit_not_nan(rd, rs, in_type).emit(&[], sink, emit_info, state); + // jump to nan. + Inst::CondBr { + taken: BranchTarget::Label(label_nan), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs2: zero_reg(), + rs1: rd.to_reg(), + }, + } + .emit(&[], sink, emit_info, state); + + if !is_sat { + let f32_bounds = f32_cvt_to_int_bounds(is_signed, out_type.bits() as u8); + let f64_bounds = f64_cvt_to_int_bounds(is_signed, out_type.bits() as u8); + if in_type == F32 { + Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.0), |_| { + writable_spilltmp_reg() + }) + } else { + Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.0), |_| { + writable_spilltmp_reg() + }) + } + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + let le_op = if in_type == F32 { + FpuOPRRR::FleS + } else { + FpuOPRRR::FleD + }; + + // rd := rs <= tmp + Inst::FpuRRR { + alu_op: le_op, + frm: None, + rd, + rs1: rs, + rs2: tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + Inst::TrapIf { + test: rd.to_reg(), + trap_code: TrapCode::IntegerOverflow, + } + .emit(&[], sink, emit_info, state); + + if in_type == F32 { + Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.1), |_| { + writable_spilltmp_reg() + }) + } else { + Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.1), |_| { + writable_spilltmp_reg() + }) + } + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + // rd := rs >= tmp + Inst::FpuRRR { + alu_op: le_op, + frm: None, + rd, + rs1: tmp.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + + Inst::TrapIf { + test: rd.to_reg(), + trap_code: TrapCode::IntegerOverflow, + } + .emit(&[], sink, emit_info, state); + } + // convert to int normally. + Inst::FpuRR { + frm: Some(FRM::RTZ), + alu_op: FpuOPRR::float_convert_2_int_op(in_type, is_signed, out_type), + rd, + rs, + } + .emit(&[], sink, emit_info, state); + if out_type.bits() < 32 && is_signed { + // load value part mask. + Inst::load_constant_u32( + writable_spilltmp_reg(), + if 16 == out_type.bits() { + (u16::MAX >> 1) as u64 + } else { + // I8 + (u8::MAX >> 1) as u64 + }, + &mut |_| writable_spilltmp_reg2(), + ) + .into_iter() + .for_each(|x| x.emit(&[], sink, emit_info, state)); + // keep value part. + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_spilltmp_reg(), + rs1: rd.to_reg(), + rs2: spilltmp_reg(), + } + .emit(&[], sink, emit_info, state); + // extact sign bit. + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: rd, + rs: rd.to_reg(), + imm12: Imm12::from_bits(31), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: rd, + rs: rd.to_reg(), + imm12: Imm12::from_bits(if 16 == out_type.bits() { + 15 + } else { + // I8 + 7 + }), + } + .emit(&[], sink, emit_info, state); + // make result,sign bit and value part. + Inst::AluRRR { + alu_op: AluOPRRR::Or, + rd: rd, + rs1: rd.to_reg(), + rs2: spilltmp_reg(), + } + .emit(&[], sink, emit_info, state); + } + + // I already have the result,jump over. + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here is nan , move 0 into rd register + sink.bind_label(label_nan, &mut state.ctrl_plane); + if is_sat { + Inst::load_imm12(rd, Imm12::from_bits(0)).emit(&[], sink, emit_info, state); + } else { + // here is ud2. + Inst::Udf { + trap_code: TrapCode::BadConversionToInteger, + } + .emit(&[], sink, emit_info, state); + } + // bind jump_over + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ + } + + &Inst::LoadExtName { + rd, + ref name, + offset, + } => { + // dbg!(rd, name, offset); + // let rd = allocs.next_writable(rd); + // put_string(&format!("CALL {name:?} => {}\n", reg_name(rd.to_reg())), sink); + + /* + // get the current pc. + Inst::Auipc { + rd: rd, + imm: Imm20::from_bits(0), + } + .emit(&[], sink, emit_info, state); + // load the value. + Inst::Load { + rd: rd, + op: LoadOP::Ld, + flags: MemFlags::trusted(), + from: AMode::RegOffset( + rd.to_reg(), + 12, // auipc load and jal. + I64, + ), + } + .emit(&[], sink, emit_info, state); + // jump over. + Inst::Jal { + // jal and abs8 size for 12. + dest: BranchTarget::offset(12), + } + .emit(&[], sink, emit_info, state); + + sink.add_reloc(Reloc::Abs8, name.as_ref(), offset); + sink.put8(0); */ + } + &Inst::TrapIfC { + rs1, + rs2, + cc, + trap_code, + } => { + todo!() /* let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let label_trap = sink.get_label(); + let label_jump_over = sink.get_label(); + Inst::CondBr { + taken: BranchTarget::Label(label_trap), + not_taken: BranchTarget::Label(label_jump_over), + kind: IntegerCompare { kind: cc, rs1, rs2 }, + } + .emit(&[], sink, emit_info, state); + // trap + sink.bind_label(label_trap, &mut state.ctrl_plane); + Inst::Udf { trap_code }.emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ + } + &Inst::TrapIf { test, trap_code } => { + todo!() /* let test = allocs.next(test); + let label_trap = sink.get_label(); + let label_jump_over = sink.get_label(); + Inst::CondBr { + taken: BranchTarget::Label(label_trap), + not_taken: BranchTarget::Label(label_jump_over), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: test, + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // trap + sink.bind_label(label_trap, &mut state.ctrl_plane); + Inst::Udf { + trap_code: trap_code, + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ + } + &Inst::Udf { trap_code } => { + todo!() /* sink.add_trap(trap_code); + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); + } + sink.put_data(Inst::TRAP_OPCODE); */ + } + &Inst::AtomicLoad { rd, ty, p } => { + todo!() /* let p = allocs.next(p); + let rd = allocs.next_writable(rd); + // emit the fence. + Inst::Fence { + pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, + succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, + } + .emit(&[], sink, emit_info, state); + // load. + Inst::Load { + rd: rd, + op: LoadOP::from_type(ty), + flags: MemFlags::new(), + from: AMode::RegOffset(p, 0, ty), + } + .emit(&[], sink, emit_info, state); + Inst::Fence { + pred: Inst::FENCE_REQ_R, + succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, + } + .emit(&[], sink, emit_info, state); */ + } + &Inst::AtomicStore { src, ty, p } => { + todo!() /* let src = allocs.next(src); + let p = allocs.next(p); + Inst::Fence { + pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, + succ: Inst::FENCE_REQ_W, + } + .emit(&[], sink, emit_info, state); + Inst::Store { + to: AMode::RegOffset(p, 0, ty), + op: StoreOP::from_type(ty), + flags: MemFlags::new(), + src, + } + .emit(&[], sink, emit_info, state); */ + } + &Inst::FloatRound { + op, + rd, + int_tmp, + f_tmp, + rs, + ty, + } => { + todo!() /* // this code is port from glibc ceil floor ... implementation. + let rs = allocs.next(rs); + let int_tmp = allocs.next_writable(int_tmp); + let f_tmp = allocs.next_writable(f_tmp); + let rd = allocs.next_writable(rd); + let label_nan = sink.get_label(); + let label_x = sink.get_label(); + let label_jump_over = sink.get_label(); + // check if is nan. + Inst::emit_not_nan(int_tmp, rs, ty).emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_nan), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: int_tmp.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + fn max_value_need_round(ty: Type) -> u64 { + match ty { + F32 => { + let x: u64 = 1 << f32::MANTISSA_DIGITS; + let x = x as f32; + let x = u32::from_le_bytes(x.to_le_bytes()); + x as u64 + } + F64 => { + let x: u64 = 1 << f64::MANTISSA_DIGITS; + let x = x as f64; + u64::from_le_bytes(x.to_le_bytes()) + } + _ => unreachable!(), + } + } + // load max value need to round. + if ty == F32 { + Inst::load_fp_constant32(f_tmp, max_value_need_round(ty) as u32, &mut |_| { + writable_spilltmp_reg() + }) + } else { + Inst::load_fp_constant64(f_tmp, max_value_need_round(ty), &mut |_| { + writable_spilltmp_reg() + }) + } + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + // get abs value. + Inst::emit_fabs(rd, rs, ty).emit(&[], sink, emit_info, state); + + // branch if f_tmp < rd + Inst::FpuRRR { + frm: None, + alu_op: if ty == F32 { + FpuOPRRR::FltS + } else { + FpuOPRRR::FltD + }, + rd: int_tmp, + rs1: f_tmp.to_reg(), + rs2: rd.to_reg(), + } + .emit(&[], sink, emit_info, state); + + Inst::CondBr { + taken: BranchTarget::Label(label_x), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: int_tmp.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + + //convert to int. + Inst::FpuRR { + alu_op: FpuOPRR::float_convert_2_int_op(ty, true, I64), + frm: Some(op.to_frm()), + rd: int_tmp, + rs: rs, + } + .emit(&[], sink, emit_info, state); + //convert back. + Inst::FpuRR { + alu_op: FpuOPRR::int_convert_2_float_op(I64, true, ty), + frm: Some(op.to_frm()), + rd, + rs: int_tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + // copy sign. + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FsgnjS + } else { + FpuOPRRR::FsgnjD + }, + frm: None, + rd, + rs1: rd.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + // jump over. + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here is nan. + sink.bind_label(label_nan, &mut state.ctrl_plane); + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FaddS + } else { + FpuOPRRR::FaddD + }, + frm: None, + rd: rd, + rs1: rs, + rs2: rs, + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here select origin x. + sink.bind_label(label_x, &mut state.ctrl_plane); + Inst::gen_move(rd, rs, ty).emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ + } + + &Inst::FloatSelect { + op, + rd, + tmp, + rs1, + rs2, + ty, + } => { + todo!() /* let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let tmp = allocs.next_writable(tmp); + let rd = allocs.next_writable(rd); + let label_nan = sink.get_label(); + let label_jump_over = sink.get_label(); + // check if rs1 is nan. + Inst::emit_not_nan(tmp, rs1, ty).emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_nan), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // check if rs2 is nan. + Inst::emit_not_nan(tmp, rs2, ty).emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_nan), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // here rs1 and rs2 is not nan. + Inst::FpuRRR { + alu_op: op.to_fpuoprrr(ty), + frm: None, + rd: rd, + rs1: rs1, + rs2: rs2, + } + .emit(&[], sink, emit_info, state); + // special handle for +0 or -0. + { + // check is rs1 and rs2 all equal to zero. + let label_done = sink.get_label(); + { + // if rs1 == 0 + let mut insts = Inst::emit_if_float_not_zero( + tmp, + rs1, + ty, + BranchTarget::Label(label_done), + BranchTarget::zero(), + ); + insts.extend(Inst::emit_if_float_not_zero( + tmp, + rs2, + ty, + BranchTarget::Label(label_done), + BranchTarget::zero(), + )); + insts + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + Inst::FpuRR { + alu_op: FpuOPRR::move_f_to_x_op(ty), + frm: None, + rd: tmp, + rs: rs1, + } + .emit(&[], sink, emit_info, state); + Inst::FpuRR { + alu_op: FpuOPRR::move_f_to_x_op(ty), + frm: None, + rd: writable_spilltmp_reg(), + rs: rs2, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRR { + alu_op: if op == FloatSelectOP::Max { + AluOPRRR::And + } else { + AluOPRRR::Or + }, + rd: tmp, + rs1: tmp.to_reg(), + rs2: spilltmp_reg(), + } + .emit(&[], sink, emit_info, state); + // move back to rd. + Inst::FpuRR { + alu_op: FpuOPRR::move_x_to_f_op(ty), + frm: None, + rd, + rs: tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + // + sink.bind_label(label_done, &mut state.ctrl_plane); + } + // we have the reuslt,jump over. + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here is nan. + sink.bind_label(label_nan, &mut state.ctrl_plane); + op.snan_bits(tmp, ty) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + // move to rd. + Inst::FpuRR { + alu_op: FpuOPRR::move_x_to_f_op(ty), + frm: None, + rd, + rs: tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ + } + &Inst::Popcnt { + sum, + tmp, + step, + rs, + ty, + } => { + todo!() /* let rs = allocs.next(rs); + let tmp = allocs.next_writable(tmp); + let step = allocs.next_writable(step); + let sum = allocs.next_writable(sum); + // load 0 to sum , init. + Inst::gen_move(sum, zero_reg(), I64).emit(&[], sink, emit_info, state); + // load + Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit( + &[], + sink, + emit_info, + state, + ); + // + Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits((ty.bits() - 1) as i16), + } + .emit(&[], sink, emit_info, state); + let label_done = sink.get_label(); + let label_loop = sink.get_label(); + sink.bind_label(label_loop, &mut state.ctrl_plane); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::SignedLessThanOrEqual, + rs1: step.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // test and add sum. + { + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_spilltmp_reg2(), + rs1: tmp.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + let label_over = sink.get_label(); + Inst::CondBr { + taken: BranchTarget::Label(label_over), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: zero_reg(), + rs2: spilltmp_reg2(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: sum, + rs: sum.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_over, &mut state.ctrl_plane); + } + // set step and tmp. + { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: step, + rs: step.to_reg(), + imm12: Imm12::from_bits(-1), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_loop), + } + .emit(&[], sink, emit_info, state); + } + sink.bind_label(label_done, &mut state.ctrl_plane); */ + } + &Inst::Rev8 { rs, rd, tmp, step } => { + todo!() /* let rs = allocs.next(rs); + let tmp = allocs.next_writable(tmp); + let step = allocs.next_writable(step); + let rd = allocs.next_writable(rd); + // init. + Inst::gen_move(rd, zero_reg(), I64).emit(&[], sink, emit_info, state); + Inst::gen_move(tmp, rs, I64).emit(&[], sink, emit_info, state); + // load 56 to step. + Inst::load_imm12(step, Imm12::from_bits(56)).emit(&[], sink, emit_info, state); + let label_done = sink.get_label(); + let label_loop = sink.get_label(); + sink.bind_label(label_loop, &mut state.ctrl_plane); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::SignedLessThan, + rs1: step.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd: writable_spilltmp_reg(), + rs: tmp.to_reg(), + imm12: Imm12::from_bits(255), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRR { + alu_op: AluOPRRR::Sll, + rd: writable_spilltmp_reg(), + rs1: spilltmp_reg(), + rs2: step.to_reg(), + } + .emit(&[], sink, emit_info, state); + + Inst::AluRRR { + alu_op: AluOPRRR::Or, + rd: rd, + rs1: rd.to_reg(), + rs2: spilltmp_reg(), + } + .emit(&[], sink, emit_info, state); + { + // reset step + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: step, + rs: step.to_reg(), + imm12: Imm12::from_bits(-8), + } + .emit(&[], sink, emit_info, state); + //reset tmp. + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(8), + } + .emit(&[], sink, emit_info, state); + // loop. + Inst::Jal { + dest: BranchTarget::Label(label_loop), + } + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_done, &mut state.ctrl_plane); */ + } + &Inst::Cltz { + sum, + tmp, + step, + rs, + leading, + ty, + } => { + todo!() /* let rs = allocs.next(rs); + let tmp = allocs.next_writable(tmp); + let step = allocs.next_writable(step); + let sum = allocs.next_writable(sum); + // load 0 to sum , init. + Inst::gen_move(sum, zero_reg(), I64).emit(&[], sink, emit_info, state); + // load + Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit( + &[], + sink, + emit_info, + state, + ); + // + Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); + if leading { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits((ty.bits() - 1) as i16), + } + .emit(&[], sink, emit_info, state); + } + let label_done = sink.get_label(); + let label_loop = sink.get_label(); + sink.bind_label(label_loop, &mut state.ctrl_plane); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::SignedLessThanOrEqual, + rs1: step.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // test and add sum. + { + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_spilltmp_reg2(), + rs1: tmp.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: zero_reg(), + rs2: spilltmp_reg2(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: sum, + rs: sum.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + } + // set step and tmp. + { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: step, + rs: step.to_reg(), + imm12: Imm12::from_bits(-1), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: if leading { + AluOPRRI::Srli + } else { + AluOPRRI::Slli + }, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_loop), + } + .emit(&[], sink, emit_info, state); + } + sink.bind_label(label_done, &mut state.ctrl_plane); */ + } + &Inst::Brev8 { + rs, + ty, + step, + tmp, + tmp2, + rd, + } => { + todo!() /* let rs = allocs.next(rs); + let step = allocs.next_writable(step); + let tmp = allocs.next_writable(tmp); + let tmp2 = allocs.next_writable(tmp2); + let rd = allocs.next_writable(rd); + Inst::gen_move(rd, zero_reg(), I64).emit(&[], sink, emit_info, state); + Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit( + &[], + sink, + emit_info, + state, + ); + // + Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits((ty.bits() - 1) as i16), + } + .emit(&[], sink, emit_info, state); + Inst::load_imm12(tmp2, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp2, + rs: tmp2.to_reg(), + imm12: Imm12::from_bits((ty.bits() - 8) as i16), + } + .emit(&[], sink, emit_info, state); + + let label_done = sink.get_label(); + let label_loop = sink.get_label(); + sink.bind_label(label_loop, &mut state.ctrl_plane); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::SignedLessThanOrEqual, + rs1: step.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // test and set bit. + { + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_spilltmp_reg2(), + rs1: tmp.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + let label_over = sink.get_label(); + Inst::CondBr { + taken: BranchTarget::Label(label_over), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: zero_reg(), + rs2: spilltmp_reg2(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRR { + alu_op: AluOPRRR::Or, + rd: rd, + rs1: rd.to_reg(), + rs2: tmp2.to_reg(), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_over, &mut state.ctrl_plane); + } + // set step and tmp. + { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: step, + rs: step.to_reg(), + imm12: Imm12::from_bits(-1), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + { + // reset tmp2 + // if (step %=8 == 0) then tmp2 = tmp2 >> 15 + // if (step %=8 != 0) then tmp2 = tmp2 << 1 + let label_over = sink.get_label(); + let label_sll_1 = sink.get_label(); + Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_bits(8)).emit( + &[], + sink, + emit_info, + state, + ); + Inst::AluRRR { + alu_op: AluOPRRR::Rem, + rd: writable_spilltmp_reg2(), + rs1: step.to_reg(), + rs2: spilltmp_reg2(), + } + .emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_sll_1), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: spilltmp_reg2(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: tmp2, + rs: tmp2.to_reg(), + imm12: Imm12::from_bits(15), + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_over), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_sll_1, &mut state.ctrl_plane); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp2, + rs: tmp2.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_over, &mut state.ctrl_plane); + } + Inst::Jal { + dest: BranchTarget::Label(label_loop), + } + .emit(&[], sink, emit_info, state); + } + sink.bind_label(label_done, &mut state.ctrl_plane); */ + } + &Inst::StackProbeLoop { + guard_size, + probe_count, + tmp: guard_size_tmp, + } => { + todo!() /* let step = writable_spilltmp_reg(); + Inst::load_constant_u64( + step, + (guard_size as u64) * (probe_count as u64), + &mut |_| step, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::load_constant_u64(guard_size_tmp, guard_size as u64, &mut |_| guard_size_tmp) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + let loop_start = sink.get_label(); + let label_done = sink.get_label(); + sink.bind_label(loop_start, &mut state.ctrl_plane); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::UnsignedLessThanOrEqual, + rs1: step.to_reg(), + rs2: guard_size_tmp.to_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // compute address. + Inst::AluRRR { + alu_op: AluOPRRR::Sub, + rd: writable_spilltmp_reg2(), + rs1: stack_reg(), + rs2: step.to_reg(), + } + .emit(&[], sink, emit_info, state); + Inst::Store { + to: AMode::RegOffset(spilltmp_reg2(), 0, I8), + op: StoreOP::Sb, + flags: MemFlags::new(), + src: zero_reg(), + } + .emit(&[], sink, emit_info, state); + // reset step. + Inst::AluRRR { + alu_op: AluOPRRR::Sub, + rd: step, + rs1: step.to_reg(), + rs2: guard_size_tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(loop_start), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_done, &mut state.ctrl_plane); */ + } + &Inst::VecAluRRRImm5 { + op, + vd, + vd_src, + imm, + vs2, + ref mask, + .. + } => { + todo!() /* let vs2 = allocs.next(vs2); + let vd_src = allocs.next(vd_src); + let vd = allocs.next_writable(vd); + let mask = mask.with_allocs(&mut allocs); + + debug_assert_eq!(vd.to_reg(), vd_src); + + sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, mask)); */ + } + &Inst::VecAluRRRR { + op, + vd, + vd_src, + vs1, + vs2, + ref mask, + .. + } => { + todo!() /* let vs1 = allocs.next(vs1); + let vs2 = allocs.next(vs2); + let vd_src = allocs.next(vd_src); + let vd = allocs.next_writable(vd); + let mask = mask.with_allocs(&mut allocs); + + debug_assert_eq!(vd.to_reg(), vd_src); + + sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, mask)); */ + } + &Inst::VecAluRRR { + op, + vd, + vs1, + vs2, + ref mask, + .. + } => { + todo!() /* let vs1 = allocs.next(vs1); + let vs2 = allocs.next(vs2); + let vd = allocs.next_writable(vd); + let mask = mask.with_allocs(&mut allocs); + + sink.put4(encode_valu(op, vd, vs1, vs2, mask)); */ + } + &Inst::VecAluRRImm5 { + op, + vd, + imm, + vs2, + ref mask, + .. + } => { + todo!() /* let vs2 = allocs.next(vs2); + let vd = allocs.next_writable(vd); + let mask = mask.with_allocs(&mut allocs); + + sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, mask)); */ + } + &Inst::VecAluRR { + op, + vd, + vs, + ref mask, + .. + } => { + todo!() /* let vs = allocs.next(vs); + let vd = allocs.next_writable(vd); + let mask = mask.with_allocs(&mut allocs); + + sink.put4(encode_valu_rr(op, vd, vs, mask)); */ + } + &Inst::VecAluRImm5 { + op, + vd, + imm, + ref mask, + .. + } => { + todo!() /* let vd = allocs.next_writable(vd); + let mask = mask.with_allocs(&mut allocs); + + sink.put4(encode_valu_r_imm(op, vd, imm, mask)); */ + } + &Inst::VecSetState { rd, ref vstate } => { + todo!() /* let rd = allocs.next_writable(rd); + + sink.put4(encode_vcfg_imm( + 0x57, + rd.to_reg(), + vstate.avl.unwrap_static(), + &vstate.vtype, + )); + + // Update the current vector emit state. + state.vstate = EmitVState::Known(vstate.clone()); */ + } + + &Inst::VecLoad { + eew, + to, + ref from, + ref mask, + flags, + .. + } => { + todo!() /* let from = from.clone().with_allocs(&mut allocs); + let to = allocs.next_writable(to); + let mask = mask.with_allocs(&mut allocs); + + // Vector Loads don't support immediate offsets, so we need to load it into a register. + let addr = match from { + VecAMode::UnitStride { base } => { + let base_reg = base.get_base_register(); + let offset = base.get_offset_with_state(state); + + // Reg+0 Offset can be directly encoded + if let (Some(base_reg), 0) = (base_reg, offset) { + base_reg + } else { + // Otherwise load the address it into a reg and load from it. + let tmp = writable_spilltmp_reg(); + Inst::LoadAddr { + rd: tmp, + mem: base.clone(), + } + .emit(&[], sink, emit_info, state); + tmp.to_reg() + } + } + }; + + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); + } + + sink.put4(encode_vmem_load( + 0x07, + to.to_reg(), + eew, + addr, + from.lumop(), + mask, + from.mop(), + from.nf(), + )); */ + } + + &Inst::VecStore { + eew, + ref to, + from, + ref mask, + flags, + .. + } => { + todo!() /* let to = to.clone().with_allocs(&mut allocs); + let from = allocs.next(from); + let mask = mask.with_allocs(&mut allocs); + + // Vector Stores don't support immediate offsets, so we need to load it into a register. + let addr = match to { + VecAMode::UnitStride { base } => { + let base_reg = base.get_base_register(); + let offset = base.get_offset_with_state(state); + + // Reg+0 Offset can be directly encoded + if let (Some(base_reg), 0) = (base_reg, offset) { + base_reg + } else { + // Otherwise load the address it into a reg and load from it. + let tmp = writable_spilltmp_reg(); + Inst::LoadAddr { + rd: tmp, + mem: base.clone(), + } + .emit(&[], sink, emit_info, state); + tmp.to_reg() + } + } + }; + + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); + } + + sink.put4(encode_vmem_store( + 0x27, + from, + eew, + addr, + to.sumop(), + mask, + to.mop(), + to.nf(), + )); */ + } + }; + let end_off = sink.cur_offset(); + assert!( + (end_off - start_off) <= Inst::worst_case_size(), + "Inst:{:?} length:{} worst_case_size:{}", + self, + end_off - start_off, + Inst::worst_case_size() + ); + } + + fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut Self::State) -> String { + let mut allocs = AllocationConsumer::new(allocs); + self.print_with_state(state, &mut allocs) + } +} + +// helper function. +fn alloc_value_regs(orgin: &ValueRegs, alloc: &mut AllocationConsumer) -> ValueRegs { + match orgin.regs().len() { + 1 => ValueRegs::one(alloc.next(orgin.regs()[0])), + 2 => ValueRegs::two(alloc.next(orgin.regs()[0]), alloc.next(orgin.regs()[1])), + _ => unreachable!(), + } +} + +#[allow(unused)] +fn emit_return_call_common_sequence( + allocs: &mut AllocationConsumer<'_>, + sink: &mut MachBuffer, + emit_info: &EmitInfo, + state: &mut EmitState, + new_stack_arg_size: u32, + old_stack_arg_size: u32, + uses: &CallArgList, +) { + todo!() + /* for u in uses { + let _ = allocs.next(u.vreg); + } + + // We are emitting a dynamic number of instructions and might need an + // island. We emit four instructions regardless of how many stack arguments + // we have, up to two instructions for the actual call, and then two + // instructions per word of stack argument space. + let new_stack_words = new_stack_arg_size / 8; + let insts = 4 + 2 + 2 * new_stack_words; + let space_needed = insts * u32::try_from(Inst::INSTRUCTION_SIZE).unwrap(); + if sink.island_needed(space_needed) { + let jump_around_label = sink.get_label(); + Inst::Jal { + dest: BranchTarget::Label(jump_around_label), + } + .emit(&[], sink, emit_info, state); + sink.emit_island(&mut state.ctrl_plane); + sink.bind_label(jump_around_label, &mut state.ctrl_plane); + } + + // Copy the new frame on top of our current frame. + // + // The current stack layout is the following: + // + // | ... | + // +---------------------+ + // | ... | + // | stack arguments | + // | ... | + // current | return address | + // frame | old FP | <-- FP + // | ... | + // | old stack slots | + // | ... | + // +---------------------+ + // | ... | + // new | new stack arguments | + // frame | ... | <-- SP + // +---------------------+ + // + // We need to restore the old FP, restore the return address from the stack + // to the link register, copy the new stack arguments over the old stack + // arguments, adjust SP to point to the new stack arguments, and then jump + // to the callee (which will push the old FP and RA again). Note that the + // actual jump happens outside this helper function. + + assert_eq!( + new_stack_arg_size % 8, + 0, + "size of new stack arguments must be 8-byte aligned" + ); + + // The delta from our frame pointer to the (eventual) stack pointer value + // when we jump to the tail callee. This is the difference in size of stack + // arguments as well as accounting for the two words we pushed onto the + // stack upon entry to this function (the return address and old frame + // pointer). + let fp_to_callee_sp = i64::from(old_stack_arg_size) - i64::from(new_stack_arg_size) + 16; + + let tmp1 = regs::writable_spilltmp_reg(); + let tmp2 = regs::writable_spilltmp_reg2(); + + // Restore the return address to the link register, and load the old FP into + // a temporary register. + // + // We can't put the old FP into the FP register until after we copy the + // stack arguments into place, since that uses address modes that are + // relative to our current FP. + // + // Note that the FP is saved in the function prologue for all non-leaf + // functions, even when `preserve_frame_pointers=false`. Note also that + // `return_call` instructions make it so that a function is considered + // non-leaf. Therefore we always have an FP to restore here. + + Inst::gen_load( + writable_link_reg(), + AMode::FPOffset(8, I64), + I64, + MemFlags::trusted(), + ) + .emit(&[], sink, emit_info, state); + Inst::gen_load(tmp1, AMode::FPOffset(0, I64), I64, MemFlags::trusted()).emit( + &[], + sink, + emit_info, + state, + ); + + // Copy the new stack arguments over the old stack arguments. + for i in (0..new_stack_words).rev() { + // Load the `i`th new stack argument word from the temporary stack + // space. + Inst::gen_load( + tmp2, + AMode::SPOffset(i64::from(i * 8), types::I64), + types::I64, + ir::MemFlags::trusted(), + ) + .emit(&[], sink, emit_info, state); + + // Store it to its final destination on the stack, overwriting our + // current frame. + Inst::gen_store( + AMode::FPOffset(fp_to_callee_sp + i64::from(i * 8), types::I64), + tmp2.to_reg(), + types::I64, + ir::MemFlags::trusted(), + ) + .emit(&[], sink, emit_info, state); + } + + // Initialize the SP for the tail callee, deallocating the temporary stack + // argument space and our current frame at the same time. + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: regs::writable_stack_reg(), + rs: regs::fp_reg(), + imm12: Imm12::maybe_from_u64(fp_to_callee_sp as u64).unwrap(), + } + .emit(&[], sink, emit_info, state); + + // Move the old FP value from the temporary into the FP register. + Inst::Mov { + ty: types::I64, + rd: regs::writable_fp_reg(), + rm: tmp1.to_reg(), + } + .emit(&[], sink, emit_info, state); + + state.virtual_sp_offset -= i64::from(new_stack_arg_size); + trace!( + "return_call[_ind] adjusts virtual sp offset by {} -> {}", + new_stack_arg_size, + state.virtual_sp_offset + ); */ +} diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit_tests.rs b/cranelift/codegen/src/isa/zkasm/inst/emit_tests.rs new file mode 100644 index 000000000000..41e8ea6f8a6f --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/emit_tests.rs @@ -0,0 +1,2338 @@ +#[allow(unused)] +use crate::ir::LibCall; +use crate::isa::zkasm::inst::*; +use crate::settings; +use alloc::vec::Vec; +use std::borrow::Cow; + +#[test] +fn test_zkasm_binemit() { + struct TestUnit { + inst: Inst, + assembly: &'static str, + code: TestEncoding, + } + + struct TestEncoding(Cow<'static, str>); + + impl From<&'static str> for TestEncoding { + fn from(value: &'static str) -> Self { + Self(value.into()) + } + } + + impl From for TestEncoding { + fn from(value: u32) -> Self { + let value = value.swap_bytes(); + let value = format!("{value:08X}"); + Self(value.into()) + } + } + + impl TestUnit { + fn new(inst: Inst, assembly: &'static str, code: impl Into) -> Self { + let code = code.into(); + Self { + inst, + assembly, + code, + } + } + } + + let mut insns = Vec::::with_capacity(500); + + insns.push(TestUnit::new( + Inst::Ret { + rets: vec![], + stack_bytes_to_pop: 0, + }, + "ret", + 0x00008067, + )); + insns.push(TestUnit::new( + Inst::Ret { + rets: vec![], + stack_bytes_to_pop: 16, + }, + "add sp, sp, #16 ; ret", + "1301010167800000", + )); + + insns.push(TestUnit::new( + Inst::Mov { + rd: writable_fa0(), + rm: fa1(), + ty: F32, + }, + "fmv.s fa0,fa1", + 0x20b58553, + )); + + insns.push(TestUnit::new( + Inst::Mov { + rd: writable_fa0(), + rm: fa1(), + ty: F64, + }, + "fmv.d fa0,fa1", + 0x22b58553, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Brev8, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "brev8 a1,a0", + 0x68755593, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Rev8, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "rev8 a1,a0", + 0x6b855593, + )); + + // + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Bclri, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "bclri a1,a0,5", + 0x48551593, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Bexti, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "bexti a1,a0,5", + 0x48555593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Binvi, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "binvi a1,a0,5", + 0x68551593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Bseti, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "bseti a1,a0,5", + 0x28551593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Rori, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "rori a1,a0,5", + 0x60555593, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Roriw, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "roriw a1,a0,5", + 0x6055559b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::SlliUw, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "slli.uw a1,a0,5", + 0x855159b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Clz, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "clz a1,a0", + 0x60051593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Clzw, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "clzw a1,a0", + 0x6005159b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Cpop, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "cpop a1,a0", + 0x60251593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Cpopw, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "cpopw a1,a0", + 0x6025159b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Ctz, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "ctz a1,a0", + 0x60151593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Ctzw, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "ctzw a1,a0", + 0x6015159b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Sextb, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "sext.b a1,a0", + 0x60451593, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Sexth, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "sext.h a1,a0", + 0x60551593, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Zexth, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "zext.h a1,a0", + 0x80545bb, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Orcb, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "orc.b a1,a0", + 0x28755593, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Adduw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "zext.w a1,a0", + 0x80505bb, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Adduw, + rd: writable_a1(), + rs1: a0(), + rs2: a1(), + }, + "add.uw a1,a0,a1", + 0x08b505bb, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Andn, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "andn a1,a0,zero", + 0x400575b3, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Bclr, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "bclr a1,a0,zero", + 0x480515b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Bext, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "bext a1,a0,zero", + 0x480555b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Binv, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "binv a1,a0,zero", + 0x680515b3, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Bset, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "bset a1,a0,zero", + 0x280515b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Clmul, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "clmul a1,a0,zero", + 0xa0515b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Clmulh, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "clmulh a1,a0,zero", + 0xa0535b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Clmulr, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "clmulr a1,a0,zero", + 0xa0525b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Max, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "max a1,a0,zero", + 0xa0565b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Maxu, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "maxu a1,a0,zero", + 0xa0575b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Min, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "min a1,a0,zero", + 0xa0545b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Minu, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "minu a1,a0,zero", + 0xa0555b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Orn, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "orn a1,a0,zero", + 0x400565b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Rol, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "rol a1,a0,zero", + 0x600515b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Rolw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "rolw a1,a0,zero", + 0x600515bb, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Ror, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "ror a1,a0,zero", + 0x600555b3, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Rorw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "rorw a1,a0,zero", + 0x600555bb, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh1add, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh1add a1,a0,zero", + 0x200525b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh1adduw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh1add.uw a1,a0,zero", + 0x200525bb, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh2add, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh2add a1,a0,zero", + 0x200545b3, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh2adduw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh2add.uw a1,a0,zero", + 0x200545bb, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh3add, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh3add a1,a0,zero", + 0x200565b3, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh3adduw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh3add.uw a1,a0,zero", + 0x200565bb, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Xnor, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "xnor a1,a0,zero", + 0x400545b3, + )); + + // Zbkb + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Pack, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "pack a1,a0,zero", + 0x080545b3, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Packw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "packw a1,a0,zero", + 0x080545bb, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Packh, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "packh a1,a0,zero", + 0x080575b3, + )); + + // + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: writable_fp_reg(), + rs1: fp_reg(), + rs2: zero_reg(), + }, + "add fp,fp,zero", + 0x40433, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: writable_fp_reg(), + rs: stack_reg(), + imm12: Imm12::maybe_from_u64(100).unwrap(), + }, + "addi fp,sp,100", + 0x6410413, + )); + insns.push(TestUnit::new( + Inst::Lui { + rd: writable_zero_reg(), + imm: Imm20::from_bits(120), + }, + "lui zero,120", + 0x78037, + )); + insns.push(TestUnit::new( + Inst::Auipc { + rd: writable_zero_reg(), + imm: Imm20::from_bits(120), + }, + "auipc zero,120", + 0x78017, + )); + + insns.push(TestUnit::new( + Inst::Jalr { + rd: writable_a0(), + base: a0(), + offset: Imm12::from_bits(100), + }, + "jalr a0,100(a0)", + 0x6450567, + )); + + insns.push(TestUnit::new( + Inst::Load { + rd: writable_a0(), + op: LoadOP::Lb, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I8), + }, + "lb a0,100(a1)", + 0x6458503, + )); + insns.push(TestUnit::new( + Inst::Load { + rd: writable_a0(), + op: LoadOP::Lh, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I16), + }, + "lh a0,100(a1)", + 0x6459503, + )); + + insns.push(TestUnit::new( + Inst::Load { + rd: writable_a0(), + op: LoadOP::Lw, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I32), + }, + "lw a0,100(a1)", + 0x645a503, + )); + + insns.push(TestUnit::new( + Inst::Load { + rd: writable_a0(), + op: LoadOP::Ld, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I64), + }, + "ld a0,100(a1)", + 0x645b503, + )); + insns.push(TestUnit::new( + Inst::Load { + rd: Writable::from_reg(fa0()), + op: LoadOP::Flw, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I64), + }, + "flw fa0,100(a1)", + 0x645a507, + )); + + insns.push(TestUnit::new( + Inst::Load { + rd: Writable::from_reg(fa0()), + op: LoadOP::Fld, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I64), + }, + "fld fa0,100(a1)", + 0x645b507, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I8), + op: StoreOP::Sb, + flags: MemFlags::new(), + src: a0(), + }, + "sb a0,100(sp)", + 0x6a10223, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I16), + op: StoreOP::Sh, + flags: MemFlags::new(), + src: a0(), + }, + "sh a0,100(sp)", + 0x6a11223, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I32), + op: StoreOP::Sw, + flags: MemFlags::new(), + src: a0(), + }, + "sw a0,100(sp)", + 0x6a12223, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I64), + op: StoreOP::Sd, + flags: MemFlags::new(), + src: a0(), + }, + "sd a0,100(sp)", + 0x6a13223, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I64), + op: StoreOP::Fsw, + flags: MemFlags::new(), + src: fa0(), + }, + "fsw fa0,100(sp)", + 0x6a12227, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I64), + op: StoreOP::Fsd, + flags: MemFlags::new(), + src: fa0(), + }, + "fsd fa0,100(sp)", + 0x6a13227, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(100), + }, + "addi a0,a0,100", + 0x6450513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slti, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(100), + }, + "slti a0,a0,100", + 0x6452513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::SltiU, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(100), + }, + "sltiu a0,a0,100", + 0x6453513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Xori, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(100), + }, + "xori a0,a0,100", + 0x6454513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(100), + }, + "andi a0,a0,100", + 0x6457513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "slli a0,a0,5", + 0x551513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "srli a0,a0,5", + 0x555513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srai, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "srai a0,a0,5", + 0x40555513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addiw, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(120), + }, + "addiw a0,a0,120", + 0x785051b, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slliw, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "slliw a0,a0,5", + 0x55151b, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::SrliW, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "srliw a0,a0,5", + 0x55551b, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Sraiw, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "sraiw a0,a0,5", + 0x4055551b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Sraiw, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "sraiw a0,a0,5", + 0x4055551b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "add a0,a0,a1", + 0xb50533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sub, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sub a0,a0,a1", + 0x40b50533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sll, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sll a0,a0,a1", + 0xb51533, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Slt, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "slt a0,a0,a1", + 0xb52533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::SltU, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sltu a0,a0,a1", + 0xb53533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Xor, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "xor a0,a0,a1", + 0xb54533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Srl, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "srl a0,a0,a1", + 0xb55533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sra, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sra a0,a0,a1", + 0x40b55533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Or, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "or a0,a0,a1", + 0xb56533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "and a0,a0,a1", + 0xb57533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Addw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "addw a0,a0,a1", + 0xb5053b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Subw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "subw a0,a0,a1", + 0x40b5053b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sllw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sllw a0,a0,a1", + 0xb5153b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Srlw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "srlw a0,a0,a1", + 0xb5553b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sraw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sraw a0,a0,a1", + 0x40b5553b, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Mul, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "mul a0,a0,a1", + 0x2b50533, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Mulh, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "mulh a0,a0,a1", + 0x2b51533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Mulhsu, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "mulhsu a0,a0,a1", + 0x2b52533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Mulhu, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "mulhu a0,a0,a1", + 0x2b53533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Div, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "div a0,a0,a1", + 0x2b54533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::DivU, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "divu a0,a0,a1", + 0x2b55533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Rem, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "rem a0,a0,a1", + 0x2b56533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::RemU, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "remu a0,a0,a1", + 0x2b57533, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Mulw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "mulw a0,a0,a1", + 0x2b5053b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Divw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "divw a0,a0,a1", + 0x2b5453b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Remw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "remw a0,a0,a1", + 0x2b5653b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Remuw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "remuw a0,a0,a1", + 0x2b5753b, + )); + + // + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: Some(FRM::RNE), + alu_op: FpuOPRRR::FaddS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fadd.s fa0,fa0,fa1,rne", + 0xb50553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: Some(FRM::RTZ), + alu_op: FpuOPRRR::FsubS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsub.s fa0,fa0,fa1,rtz", + 0x8b51553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: Some(FRM::RUP), + alu_op: FpuOPRRR::FmulS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmul.s fa0,fa0,fa1,rup", + 0x10b53553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FdivS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fdiv.s fa0,fa0,fa1", + 0x18b57553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnj.s fa0,fa0,fa1", + 0x20b50553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjnS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnjn.s fa0,fa0,fa1", + 0x20b51553, + )); + + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjxS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnjx.s fa0,fa0,fa1", + 0x20b52553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FminS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmin.s fa0,fa0,fa1", + 0x28b50553, + )); + + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FmaxS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmax.s fa0,fa0,fa1", + 0x28b51553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FeqS, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "feq.s a0,fa0,fa1", + 0xa0b52553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FltS, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "flt.s a0,fa0,fa1", + 0xa0b51553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FleS, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "fle.s a0,fa0,fa1", + 0xa0b50553, + )); + + // + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FaddD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fadd.d fa0,fa0,fa1", + 0x2b57553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsubD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsub.d fa0,fa0,fa1", + 0xab57553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FmulD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmul.d fa0,fa0,fa1", + 0x12b57553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FdivD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fdiv.d fa0,fa0,fa1", + 0x1ab57553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnj.d fa0,fa0,fa1", + 0x22b50553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjnD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnjn.d fa0,fa0,fa1", + 0x22b51553, + )); + + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjxD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnjx.d fa0,fa0,fa1", + 0x22b52553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FminD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmin.d fa0,fa0,fa1", + 0x2ab50553, + )); + + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FmaxD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmax.d fa0,fa0,fa1", + 0x2ab51553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FeqD, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "feq.d a0,fa0,fa1", + 0xa2b52553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FltD, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "flt.d a0,fa0,fa1", + 0xa2b51553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FleD, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "fle.d a0,fa0,fa1", + 0xa2b50553, + )); + + // + insns.push(TestUnit::new( + Inst::FpuRR { + frm: Some(FRM::RNE), + alu_op: FpuOPRR::FsqrtS, + rd: writable_fa0(), + rs: fa1(), + }, + "fsqrt.s fa0,fa1,rne", + 0x58058553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtWS, + rd: writable_a0(), + rs: fa1(), + }, + "fcvt.w.s a0,fa1", + 0xc005f553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtWuS, + rd: writable_a0(), + rs: fa1(), + }, + "fcvt.wu.s a0,fa1", + 0xc015f553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FmvXW, + rd: writable_a0(), + rs: fa1(), + }, + "fmv.x.w a0,fa1", + 0xe0058553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FclassS, + rd: writable_a0(), + rs: fa1(), + }, + "fclass.s a0,fa1", + 0xe0059553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtSw, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.s.w fa0,a0", + 0xd0057553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtSwU, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.s.wu fa0,a0", + 0xd0157553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FmvWX, + rd: writable_fa0(), + rs: a0(), + }, + "fmv.w.x fa0,a0", + 0xf0050553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtLS, + rd: writable_a0(), + rs: fa0(), + }, + "fcvt.l.s a0,fa0", + 0xc0257553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtLuS, + rd: writable_a0(), + rs: fa0(), + }, + "fcvt.lu.s a0,fa0", + 0xc0357553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + + alu_op: FpuOPRR::FcvtSL, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.s.l fa0,a0", + 0xd0257553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtSLU, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.s.lu fa0,a0", + 0xd0357553, + )); + + // + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FsqrtD, + rd: writable_fa0(), + rs: fa1(), + }, + "fsqrt.d fa0,fa1", + 0x5a05f553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtWD, + rd: writable_a0(), + rs: fa1(), + }, + "fcvt.w.d a0,fa1", + 0xc205f553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtWuD, + rd: writable_a0(), + rs: fa1(), + }, + "fcvt.wu.d a0,fa1", + 0xc215f553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FmvXD, + rd: writable_a0(), + rs: fa1(), + }, + "fmv.x.d a0,fa1", + 0xe2058553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FclassD, + rd: writable_a0(), + rs: fa1(), + }, + "fclass.d a0,fa1", + 0xe2059553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtSD, + rd: writable_fa0(), + rs: fa0(), + }, + "fcvt.s.d fa0,fa0", + 0x40157553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtDWU, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.d.wu fa0,a0", + 0xd2150553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FmvDX, + rd: writable_fa0(), + rs: a0(), + }, + "fmv.d.x fa0,a0", + 0xf2050553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtLD, + rd: writable_a0(), + rs: fa0(), + }, + "fcvt.l.d a0,fa0", + 0xc2257553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtLuD, + rd: writable_a0(), + rs: fa0(), + }, + "fcvt.lu.d a0,fa0", + 0xc2357553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtDL, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.d.l fa0,a0", + 0xd2257553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtDLu, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.d.lu fa0,a0", + 0xd2357553, + )); + ////////////////////// + + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: Some(FRM::RNE), + alu_op: FpuOPRRRR::FmaddS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fmadd.s fa0,fa0,fa1,fa7,rne", + 0x88b50543, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FmsubS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fmsub.s fa0,fa0,fa1,fa7", + 0x88b57547, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FnmsubS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fnmsub.s fa0,fa0,fa1,fa7", + 0x88b5754b, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FnmaddS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fnmadd.s fa0,fa0,fa1,fa7", + 0x88b5754f, + )); + + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FmaddD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fmadd.d fa0,fa0,fa1,fa7", + 0x8ab57543, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + + alu_op: FpuOPRRRR::FmsubD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fmsub.d fa0,fa0,fa1,fa7", + 0x8ab57547, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FnmsubD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fnmsub.d fa0,fa0,fa1,fa7", + 0x8ab5754b, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FnmaddD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fnmadd.d fa0,fa0,fa1,fa7", + 0x8ab5754f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::LrW, + rd: writable_a0(), + addr: a1(), + src: zero_reg(), + amo: AMO::Relax, + }, + "lr.w a0,(a1)", + 0x1005a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::ScW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Release, + }, + "sc.w.rl a0,a2,(a1)", + 0x1ac5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoswapW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Aquire, + }, + "amoswap.w.aq a0,a2,(a1)", + 0xcc5a52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoaddW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::SeqCst, + }, + "amoadd.w.aqrl a0,a2,(a1)", + 0x6c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoxorW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoxor.w a0,a2,(a1)", + 0x20c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoandW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoand.w a0,a2,(a1)", + 0x60c5a52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoorW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoor.w a0,a2,(a1)", + 0x40c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmominW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomin.w a0,a2,(a1)", + 0x80c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmomaxW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomax.w a0,a2,(a1)", + 0xa0c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmominuW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amominu.w a0,a2,(a1)", + 0xc0c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmomaxuW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomaxu.w a0,a2,(a1)", + 0xe0c5a52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::LrD, + rd: writable_a0(), + addr: a1(), + src: zero_reg(), + amo: AMO::Relax, + }, + "lr.d a0,(a1)", + 0x1005b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::ScD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "sc.d a0,a2,(a1)", + 0x18c5b52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoswapD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoswap.d a0,a2,(a1)", + 0x8c5b52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoaddD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoadd.d a0,a2,(a1)", + 0xc5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoxorD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoxor.d a0,a2,(a1)", + 0x20c5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoandD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoand.d a0,a2,(a1)", + 0x60c5b52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoorD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoor.d a0,a2,(a1)", + 0x40c5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmominD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomin.d a0,a2,(a1)", + 0x80c5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmomaxD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomax.d a0,a2,(a1)", + 0xa0c5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmominuD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amominu.d a0,a2,(a1)", + 0xc0c5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmomaxuD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomaxu.d a0,a2,(a1)", + 0xe0c5b52f, + )); + + ///////// + insns.push(TestUnit::new( + Inst::Fence { + pred: 1, + succ: 1 << 1, + }, + "fence w,r", + 0x120000f, + )); + insns.push(TestUnit::new(Inst::FenceI {}, "fence.i", 0x100f)); + insns.push(TestUnit::new(Inst::ECall {}, "ecall", 0x73)); + insns.push(TestUnit::new(Inst::EBreak {}, "ebreak", 0x100073)); + + insns.push(TestUnit::new( + Inst::FpuRRR { + alu_op: FpuOPRRR::FsgnjS, + frm: None, + rd: writable_fa0(), + rs1: fa1(), + rs2: fa1(), + }, + "fmv.s fa0,fa1", + 0x20b58553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + alu_op: FpuOPRRR::FsgnjD, + frm: None, + rd: writable_fa0(), + rs1: fa1(), + rs2: fa1(), + }, + "fmv.d fa0,fa1", + 0x22b58553, + )); + + insns.push(TestUnit::new( + Inst::FpuRRR { + alu_op: FpuOPRRR::FsgnjnS, + frm: None, + rd: writable_fa0(), + rs1: fa1(), + rs2: fa1(), + }, + "fneg.s fa0,fa1", + 0x20b59553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + alu_op: FpuOPRRR::FsgnjnD, + frm: None, + rd: writable_fa0(), + rs1: fa1(), + rs2: fa1(), + }, + "fneg.d fa0,fa1", + 0x22b59553, + )); + + let (flags, isa_flags) = make_test_flags(); + let emit_info = EmitInfo::new(flags, isa_flags); + + for unit in insns.iter() { + println!("Riscv64: {:?}, {}", unit.inst, unit.assembly); + // Check the printed text is as expected. + let actual_printing = unit + .inst + .print_with_state(&mut EmitState::default(), &mut AllocationConsumer::new(&[])); + assert_eq!(unit.assembly, actual_printing); + let mut buffer = MachBuffer::new(); + unit.inst + .emit(&[], &mut buffer, &emit_info, &mut Default::default()); + let buffer = buffer.finish(&Default::default(), &mut Default::default()); + let actual_encoding = buffer.stringify_code_bytes(); + + assert_eq!(actual_encoding, unit.code.0); + } +} + +fn make_test_flags() -> (settings::Flags, super::super::riscv_settings::Flags) { + let b = settings::builder(); + let flags = settings::Flags::new(b.clone()); + let b2 = super::super::riscv_settings::builder(); + let isa_flags = super::super::riscv_settings::Flags::new(&flags, &b2); + (flags, isa_flags) +} + +#[derive(Debug)] +pub(crate) struct DebugRTypeInst { + op_code: u32, + rd: u32, + funct3: u32, + rs1: u32, + rs2: u32, + funct7: u32, +} + +impl DebugRTypeInst { + pub(crate) fn from_bs(x: &[u8]) -> Option { + if x.len() != 4 { + return None; + } + let a = [x[0], x[1], x[2], x[3]]; + Some(Self::from_u32(u32::from_le_bytes(a))) + } + + pub(crate) fn from_u32(x: u32) -> Self { + let op_code = x & 0b111_1111; + let x = x >> 7; + let rd = x & 0b1_1111; + let x = x >> 5; + let funct3 = x & 0b111; + let x = x >> 3; + let rs1 = x & 0b1_1111; + let x = x >> 5; + let rs2 = x & 0b1_1111; + let x = x >> 5; + let funct7 = x & 0b111_1111; + Self { + op_code, + rd, + funct3, + rs1, + rs2, + funct7, + } + } +} + +#[derive(Debug)] +pub(crate) struct DebugITypeInst { + op_code: u32, + rd: u32, + funct3: u32, + rs: u32, + imm12: u32, + shamt5: u32, + shamt6: u32, + funct7: u32, + funct6: u32, +} + +impl DebugITypeInst { + pub(crate) fn from_bs(x: &[u8]) -> Self { + let a = [x[0], x[1], x[2], x[3]]; + Self::from_u32(u32::from_le_bytes(a)) + } + pub(crate) fn from_u32(x: u32) -> Self { + let op_code = x & 0b111_1111; + let x = x >> 7; + let rd = x & 0b1_1111; + let x = x >> 5; + let funct3 = x & 0b111; + let x = x >> 3; + let rs = x & 0b1_1111; + let x = x >> 5; + let imm12 = x & 0b1111_1111_1111; + let shamt5 = imm12 & 0b1_1111; + let shamt6 = imm12 & 0b11_1111; + let funct7 = imm12 >> 5; + let funct6 = funct7 >> 1; + Self { + op_code, + rd, + funct3, + rs, + imm12, + shamt5, + shamt6, + funct7, + funct6, + } + } + fn print_b(self) { + println!("opcode:{:b}", self.op_code); + println!("rd:{}", self.rd); + println!("funct3:{:b}", self.funct3); + println!("rs:{}", self.rs); + println!("shamt5:{:b}", self.shamt5); + println!("shamt6:{:b}", self.shamt6); + println!("funct6:{:b}", self.funct6); + println!("funct7:{:b}", self.funct7); + } +} + +#[test] +fn xxx() { + let x = 1240847763; + let x = DebugITypeInst::from_u32(x); + x.print_b(); +} + +#[test] +fn zkasm_worst_case_instruction_size() { + let (flags, isa_flags) = make_test_flags(); + let emit_info = EmitInfo::new(flags, isa_flags); + + //there are all candidates potential generate a lot of bytes. + let mut candidates: Vec = vec![]; + + candidates.push(Inst::IntSelect { + dst: vec![writable_a0(), writable_a0()], + ty: I128, + op: IntSelectOP::Smax, + x: ValueRegs::two(x_reg(1), x_reg(2)), + y: ValueRegs::two(x_reg(3), x_reg(4)), + }); + + candidates.push(Inst::FcvtToInt { + rd: writable_a0(), + rs: fa0(), + is_signed: true, + in_type: F64, + out_type: I8, + is_sat: false, + tmp: writable_a1(), + }); + candidates.push(Inst::FcvtToInt { + rd: writable_a0(), + rs: fa0(), + is_signed: true, + in_type: F64, + out_type: I16, + is_sat: false, + tmp: writable_a1(), + }); + candidates.push(Inst::FcvtToInt { + rd: writable_a0(), + rs: fa0(), + is_signed: true, + in_type: F32, + out_type: I8, + is_sat: false, + tmp: writable_a1(), + }); + candidates.push(Inst::FcvtToInt { + rd: writable_a0(), + rs: fa0(), + is_signed: true, + in_type: F32, + out_type: I16, + is_sat: false, + tmp: writable_a1(), + }); + candidates.push(Inst::FcvtToInt { + rd: writable_a0(), + rs: fa0(), + is_signed: true, + in_type: F64, + out_type: I8, + is_sat: false, + tmp: writable_a1(), + }); + candidates.push(Inst::FcvtToInt { + rd: writable_a0(), + rs: fa0(), + is_signed: true, + in_type: F64, + out_type: I16, + is_sat: false, + tmp: writable_a1(), + }); + + candidates.push(Inst::FloatRound { + op: FloatRoundOP::Trunc, + int_tmp: writable_a0(), + f_tmp: writable_a0(), + rd: writable_fa0(), + rs: fa0(), + ty: F64, + }); + + candidates.push(Inst::FloatSelect { + op: FloatSelectOP::Max, + rd: writable_fa0(), + tmp: writable_a0(), + rs1: fa0(), + rs2: fa0(), + ty: F64, + }); + + let mut max: (u32, MInst) = (0, Inst::Nop0); + for i in candidates { + let mut buffer = MachBuffer::new(); + i.emit(&[], &mut buffer, &emit_info, &mut Default::default()); + let buffer = buffer.finish(&Default::default(), &mut Default::default()); + let length = buffer.data().len() as u32; + if length > max.0 { + let length = buffer.data().len() as u32; + max = (length, i.clone()); + } + println!("insn:{:?} length: {}", i, length); + } + println!("calculate max size is {} , inst is {:?}", max.0, max.1); + assert!(max.0 <= Inst::worst_case_size()); +} diff --git a/cranelift/codegen/src/isa/zkasm/inst/encode.rs b/cranelift/codegen/src/isa/zkasm/inst/encode.rs new file mode 100644 index 000000000000..69d18d9bae77 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/encode.rs @@ -0,0 +1,299 @@ +//! Contains the RISC-V instruction encoding logic. +//! +//! These formats are specified in the RISC-V specification in section 2.2. +//! See: https://riscv.org/wp-content/uploads/2017/05/riscv-spec-v2.2.pdf +//! +//! Some instructions especially in extensions have slight variations from +//! the base RISC-V specification. + +use super::{Imm12, Imm5, UImm5, VType}; +use crate::isa::zkasm::inst::reg_to_gpr_num; +use crate::isa::zkasm::lower::isle::generated_code::{ + VecAluOpRImm5, VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, VecAluOpRRRImm5, VecAluOpRRRR, + VecElementWidth, VecOpCategory, VecOpMasking, +}; +use crate::machinst::isle::WritableReg; +use crate::Reg; + +fn unsigned_field_width(value: u32, width: u8) -> u32 { + debug_assert_eq!(value & (!0 << width), 0); + value +} + +/// Layout: +/// 0-------6-7-------11-12------14-15------19-20------24-25-------31 +/// | Opcode | rd | funct3 | rs1 | rs2 | funct7 | +fn encode_r_type_bits(opcode: u32, rd: u32, funct3: u32, rs1: u32, rs2: u32, funct7: u32) -> u32 { + let mut bits = 0; + bits |= unsigned_field_width(opcode, 7); + bits |= unsigned_field_width(rd, 5) << 7; + bits |= unsigned_field_width(funct3, 3) << 12; + bits |= unsigned_field_width(rs1, 5) << 15; + bits |= unsigned_field_width(rs2, 5) << 20; + bits |= unsigned_field_width(funct7, 7) << 25; + bits +} + +/// Encode an R-type instruction. +pub fn encode_r_type( + opcode: u32, + rd: WritableReg, + funct3: u32, + rs1: Reg, + rs2: Reg, + funct7: u32, +) -> u32 { + encode_r_type_bits( + opcode, + reg_to_gpr_num(rd.to_reg()), + funct3, + reg_to_gpr_num(rs1), + reg_to_gpr_num(rs2), + funct7, + ) +} + +/// Encode an I-type instruction. +/// +/// Layout: +/// 0-------6-7-------11-12------14-15------19-20------------------31 +/// | Opcode | rd | width | rs1 | Offset[11:0] | +pub fn encode_i_type(opcode: u32, rd: WritableReg, width: u32, rs1: Reg, offset: Imm12) -> u32 { + let mut bits = 0; + bits |= unsigned_field_width(opcode, 7); + bits |= reg_to_gpr_num(rd.to_reg()) << 7; + bits |= unsigned_field_width(width, 3) << 12; + bits |= reg_to_gpr_num(rs1) << 15; + bits |= unsigned_field_width(offset.as_u32(), 12) << 20; + bits +} + +/// Encode an S-type instruction. +/// +/// Layout: +/// 0-------6-7-------11-12------14-15------19-20---24-25-------------31 +/// | Opcode | imm[4:0] | width | base | src | imm[11:5] | +pub fn encode_s_type(opcode: u32, width: u32, base: Reg, src: Reg, offset: Imm12) -> u32 { + let mut bits = 0; + bits |= unsigned_field_width(opcode, 7); + bits |= (offset.as_u32() & 0b11111) << 7; + bits |= unsigned_field_width(width, 3) << 12; + bits |= reg_to_gpr_num(base) << 15; + bits |= reg_to_gpr_num(src) << 20; + bits |= unsigned_field_width(offset.as_u32() >> 5, 7) << 25; + bits +} + +/// Encodes a Vector ALU instruction. +/// +/// Fields: +/// - opcode (7 bits) +/// - vd (5 bits) +/// - funct3 (3 bits) +/// - vs1 (5 bits) +/// - vs2 (5 bits) +/// - vm (1 bit) +/// - funct6 (6 bits) +/// +/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc +pub fn encode_valu( + op: VecAluOpRRR, + vd: WritableReg, + vs1: Reg, + vs2: Reg, + masking: VecOpMasking, +) -> u32 { + let funct7 = (op.funct6() << 1) | masking.encode(); + encode_r_type_bits( + op.opcode(), + reg_to_gpr_num(vd.to_reg()), + op.funct3(), + reg_to_gpr_num(vs1), + reg_to_gpr_num(vs2), + funct7, + ) +} + +/// Encodes a Vector ALU+Imm instruction. +/// This is just a Vector ALU instruction with an immediate in the VS1 field. +/// +/// Fields: +/// - opcode (7 bits) +/// - vd (5 bits) +/// - funct3 (3 bits) +/// - imm (5 bits) +/// - vs2 (5 bits) +/// - vm (1 bit) +/// - funct6 (6 bits) +/// +/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc +pub fn encode_valu_rr_imm( + op: VecAluOpRRImm5, + vd: WritableReg, + imm: Imm5, + vs2: Reg, + masking: VecOpMasking, +) -> u32 { + let funct7 = (op.funct6() << 1) | masking.encode(); + let imm = imm.bits() as u32; + encode_r_type_bits( + op.opcode(), + reg_to_gpr_num(vd.to_reg()), + op.funct3(), + imm, + reg_to_gpr_num(vs2), + funct7, + ) +} + +pub fn encode_valu_rrrr( + op: VecAluOpRRRR, + vd: WritableReg, + vs2: Reg, + vs1: Reg, + masking: VecOpMasking, +) -> u32 { + let funct7 = (op.funct6() << 1) | masking.encode(); + encode_r_type_bits( + op.opcode(), + reg_to_gpr_num(vd.to_reg()), + op.funct3(), + reg_to_gpr_num(vs1), + reg_to_gpr_num(vs2), + funct7, + ) +} + +pub fn encode_valu_rrr_imm( + op: VecAluOpRRRImm5, + vd: WritableReg, + imm: Imm5, + vs2: Reg, + masking: VecOpMasking, +) -> u32 { + let funct7 = (op.funct6() << 1) | masking.encode(); + let imm = imm.bits() as u32; + encode_r_type_bits( + op.opcode(), + reg_to_gpr_num(vd.to_reg()), + op.funct3(), + imm, + reg_to_gpr_num(vs2), + funct7, + ) +} + +pub fn encode_valu_rr(op: VecAluOpRR, vd: WritableReg, vs: Reg, masking: VecOpMasking) -> u32 { + let funct7 = (op.funct6() << 1) | masking.encode(); + + let (vs1, vs2) = if op.vs_is_vs2_encoded() { + (op.aux_encoding(), reg_to_gpr_num(vs)) + } else { + (reg_to_gpr_num(vs), op.aux_encoding()) + }; + + encode_r_type_bits( + op.opcode(), + reg_to_gpr_num(vd.to_reg()), + op.funct3(), + vs1, + vs2, + funct7, + ) +} + +pub fn encode_valu_r_imm( + op: VecAluOpRImm5, + vd: WritableReg, + imm: Imm5, + masking: VecOpMasking, +) -> u32 { + let funct7 = (op.funct6() << 1) | masking.encode(); + + // This is true for this opcode, not sure if there are any other ones. + debug_assert_eq!(op, VecAluOpRImm5::VmvVI); + let vs1 = imm.bits() as u32; + let vs2 = op.aux_encoding(); + + encode_r_type_bits( + op.opcode(), + reg_to_gpr_num(vd.to_reg()), + op.funct3(), + vs1, + vs2, + funct7, + ) +} + +/// Encodes a Vector CFG Imm instruction. +/// +/// See: https://github.com/riscv/riscv-v-spec/blob/master/vcfg-format.adoc +// TODO: Check if this is any of the known instruction types in the spec. +pub fn encode_vcfg_imm(opcode: u32, rd: Reg, imm: UImm5, vtype: &VType) -> u32 { + let mut bits = 0; + bits |= unsigned_field_width(opcode, 7); + bits |= reg_to_gpr_num(rd) << 7; + bits |= VecOpCategory::OPCFG.encode() << 12; + bits |= unsigned_field_width(imm.bits(), 5) << 15; + bits |= unsigned_field_width(vtype.encode(), 10) << 20; + bits |= 0b11 << 30; + bits +} + +/// Encodes a Vector Mem Unit Stride Load instruction. +/// +/// See: https://github.com/riscv/riscv-v-spec/blob/master/vmem-format.adoc +/// TODO: These instructions share opcode space with LOAD-FP and STORE-FP +pub fn encode_vmem_load( + opcode: u32, + vd: Reg, + width: VecElementWidth, + rs1: Reg, + lumop: u32, + masking: VecOpMasking, + mop: u32, + nf: u32, +) -> u32 { + // Width is encoded differently to avoid a clash with the FP load/store sizes. + let width = match width { + VecElementWidth::E8 => 0b000, + VecElementWidth::E16 => 0b101, + VecElementWidth::E32 => 0b110, + VecElementWidth::E64 => 0b111, + }; + + let mut bits = 0; + bits |= unsigned_field_width(opcode, 7); + bits |= reg_to_gpr_num(vd) << 7; + bits |= width << 12; + bits |= reg_to_gpr_num(rs1) << 15; + bits |= unsigned_field_width(lumop, 5) << 20; + bits |= masking.encode() << 25; + bits |= unsigned_field_width(mop, 2) << 26; + + // The mew bit (inst[28]) when set is expected to be used to encode expanded + // memory sizes of 128 bits and above, but these encodings are currently reserved. + bits |= 0b0 << 28; + + bits |= unsigned_field_width(nf, 3) << 29; + bits +} + +/// Encodes a Vector Mem Unit Stride Load instruction. +/// +/// See: https://github.com/riscv/riscv-v-spec/blob/master/vmem-format.adoc +/// TODO: These instructions share opcode space with LOAD-FP and STORE-FP +pub fn encode_vmem_store( + opcode: u32, + vs3: Reg, + width: VecElementWidth, + rs1: Reg, + sumop: u32, + masking: VecOpMasking, + mop: u32, + nf: u32, +) -> u32 { + // This is pretty much the same as the load instruction, just + // with different names on the fields. + encode_vmem_load(opcode, vs3, width, rs1, sumop, masking, mop, nf) +} diff --git a/cranelift/codegen/src/isa/zkasm/inst/imms.rs b/cranelift/codegen/src/isa/zkasm/inst/imms.rs new file mode 100644 index 000000000000..f04477e1dcd4 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/imms.rs @@ -0,0 +1,280 @@ +//! Riscv64 ISA definitions: immediate constants. + +// Some variants are never constructed, but we still want them as options in the future. +use super::Inst; +#[allow(dead_code)] +use std::fmt::{Debug, Display, Formatter, Result}; + +#[derive(Copy, Clone, Debug, Default)] +pub struct Imm12 { + pub bits: i16, +} + +#[derive(Copy, Clone, Debug, Default)] +pub struct Imm32 { + pub bits: i32, +} + +impl Imm12 { + pub(crate) const FALSE: Self = Self { bits: 0 }; + pub(crate) const TRUE: Self = Self { bits: 1 }; + pub fn maybe_from_u64(val: u64) -> Option { + let sign_bit = 1 << 11; + if val == 0 { + Some(Imm12 { bits: 0 }) + } else if (val & sign_bit) != 0 && (val >> 12) == 0xffff_ffff_ffff_f { + Some(Imm12 { + bits: (val & 0xffff) as i16, + }) + } else if (val & sign_bit) == 0 && (val >> 12) == 0 { + Some(Imm12 { + bits: (val & 0xffff) as i16, + }) + } else { + None + } + } + #[inline] + pub fn from_bits(bits: i16) -> Self { + Self { bits: bits & 0xfff } + } + /// Create a zero immediate of this format. + #[inline] + pub fn zero() -> Self { + Imm12 { bits: 0 } + } + #[inline] + pub fn as_i16(self) -> i16 { + self.bits + } + #[inline] + pub fn as_u32(&self) -> u32 { + (self.bits as u32) & 0xfff + } +} + +impl Imm32 { + pub fn maybe_from_u64(val: u64) -> Option { + let sign_bit = 1 << 31; + if val == 0 { + Some(Imm32 { bits: 0 }) + } else if (val & sign_bit) != 0 && (val >> 31) == 0xffff_ffff { + Some(Imm32 { + bits: (val & 0xffff_ffff) as i32, + }) + } else if (val & sign_bit) == 0 && (val >> 32) == 0 { + Some(Imm32 { + bits: (val & 0xffff_ffff) as i32, + }) + } else { + None + } + } +} + +impl Into for Imm12 { + fn into(self) -> i64 { + self.bits as i64 + } +} + +impl Display for Imm12 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{:+}", self.bits) + } +} + +impl Display for Imm32 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{:+}", self.bits) + } +} + +impl std::ops::Neg for Imm12 { + type Output = Self; + fn neg(self) -> Self::Output { + Self { bits: -self.bits } + } +} + +// singed +#[derive(Clone, Copy, Default)] +pub struct Imm20 { + /// The immediate bits. + pub bits: i32, +} + +impl Imm20 { + #[inline] + pub fn from_bits(bits: i32) -> Self { + Self { + bits: bits & 0xf_ffff, + } + } + #[inline] + pub fn as_u32(&self) -> u32 { + (self.bits as u32) & 0xf_ffff + } +} + +impl Debug for Imm20 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{}", self.bits) + } +} + +impl Display for Imm20 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{}", self.bits) + } +} + +/// An unsigned 5-bit immediate. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct UImm5 { + value: u8, +} + +impl UImm5 { + /// Create an unsigned 5-bit immediate from u8. + pub fn maybe_from_u8(value: u8) -> Option { + if value < 32 { + Some(UImm5 { value }) + } else { + None + } + } + + /// Bits for encoding. + pub fn bits(&self) -> u32 { + u32::from(self.value) + } +} + +impl Display for UImm5 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{}", self.value) + } +} + +/// A Signed 5-bit immediate. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Imm5 { + value: i8, +} + +impl Imm5 { + /// Create an signed 5-bit immediate from an i8. + pub fn maybe_from_i8(value: i8) -> Option { + if value >= -16 && value <= 15 { + Some(Imm5 { value }) + } else { + None + } + } + + pub fn from_bits(value: u8) -> Imm5 { + assert_eq!(value & 0x1f, value); + let signed = ((value << 3) as i8) >> 3; + Imm5 { value: signed } + } + + /// Bits for encoding. + pub fn bits(&self) -> u8 { + self.value as u8 & 0x1f + } +} + +impl Display for Imm5 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{}", self.value) + } +} + +impl Inst { + pub(crate) fn imm_min() -> i64 { + let imm20_max: i64 = (1 << 19) << 12; + let imm12_max = 1 << 11; + -imm20_max - imm12_max + } + pub(crate) fn imm_max() -> i64 { + let imm20_max: i64 = ((1 << 19) - 1) << 12; + let imm12_max = (1 << 11) - 1; + imm20_max + imm12_max + } + + /// An imm20 immediate and an Imm12 immediate can generate a 32-bit immediate. + /// This helper produces an imm12, imm20, or both to generate the value. + /// + /// `value` must be between `imm_min()` and `imm_max()`, or else + /// this helper returns `None`. + pub(crate) fn generate_imm( + value: u64, + mut handle_imm: impl FnMut(Option, Option) -> R, + ) -> Option { + if let Some(imm12) = Imm12::maybe_from_u64(value) { + // can be load using single imm12. + let r = handle_imm(None, Some(imm12)); + return Some(r); + } + let value = value as i64; + if !(value >= Self::imm_min() && value <= Self::imm_max()) { + // not in range, return None. + return None; + } + const MOD_NUM: i64 = 4096; + let (imm20, imm12) = if value > 0 { + let mut imm20 = value / MOD_NUM; + let mut imm12 = value % MOD_NUM; + if imm12 >= 2048 { + imm12 -= MOD_NUM; + imm20 += 1; + } + assert!(imm12 >= -2048 && imm12 <= 2047); + (imm20, imm12) + } else { + // this is the abs value. + let value_abs = value.abs(); + let imm20 = value_abs / MOD_NUM; + let imm12 = value_abs % MOD_NUM; + let mut imm20 = -imm20; + let mut imm12 = -imm12; + if imm12 < -2048 { + imm12 += MOD_NUM; + imm20 -= 1; + } + (imm20, imm12) + }; + assert!(imm20 >= -(0x7_ffff + 1) && imm20 <= 0x7_ffff); + assert!(imm20 != 0 || imm12 != 0); + Some(handle_imm( + if imm20 != 0 { + Some(Imm20::from_bits(imm20 as i32)) + } else { + None + }, + if imm12 != 0 { + Some(Imm12::from_bits(imm12 as i16)) + } else { + None + }, + )) + } +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn test_imm12() { + let x = Imm12::zero(); + assert_eq!(0, x.as_u32()); + Imm12::maybe_from_u64(0xffff_ffff_ffff_ffff).unwrap(); + } + + #[test] + fn imm20_and_imm12() { + assert!(Inst::imm_max() == (i32::MAX - 2048) as i64); + assert!(Inst::imm_min() == i32::MIN as i64 - 2048); + } +} diff --git a/cranelift/codegen/src/isa/zkasm/inst/mod.rs b/cranelift/codegen/src/isa/zkasm/inst/mod.rs new file mode 100644 index 000000000000..ef612569dcda --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/mod.rs @@ -0,0 +1,2195 @@ +//! This module defines zkasm-specific machine instruction types. + +// Some variants are not constructed, but we still want them as options in the future. +#![allow(dead_code)] +#![allow(non_camel_case_types)] +#![allow(warnings)] + +use super::lower::isle::generated_code::{VecAMode, VecElementWidth, VecOpMasking}; +use crate::binemit::{Addend, CodeOffset, Reloc}; +pub use crate::ir::condcodes::IntCC; +use crate::ir::types::{self, F32, F64, I128, I16, I32, I64, I8, I8X16, R32, R64}; + +pub use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, Type, ValueLabel}; +use crate::isa::{CallConv, FunctionAlignment}; +use crate::machinst::*; +use crate::{settings, CodegenError, CodegenResult}; + +pub use crate::ir::condcodes::FloatCC; + +use alloc::vec::Vec; +use regalloc2::{PRegSet, RegClass, VReg}; +use smallvec::{smallvec, SmallVec}; +use std::boxed::Box; +use std::fmt::Write; +use std::string::{String, ToString}; + +pub mod regs; +pub use self::regs::*; +pub mod imms; +pub use self::imms::*; +pub mod args; +pub use self::args::*; +pub mod emit; +pub use self::emit::*; +pub mod vector; +pub use self::vector::*; +pub mod encode; +pub use self::encode::*; +pub mod unwind; + +use crate::isa::zkasm::abi::Riscv64MachineDeps; + +#[cfg(test)] +mod emit_tests; + +use std::fmt::{Display, Formatter}; + +pub(crate) type OptionReg = Option; +pub(crate) type OptionImm12 = Option; +pub(crate) type VecBranchTarget = Vec; +pub(crate) type OptionUimm5 = Option; +pub(crate) type OptionFloatRoundingMode = Option; +pub(crate) type VecU8 = Vec; +pub(crate) type VecWritableReg = Vec>; +//============================================================================= +// Instructions (top level): definition + +pub use crate::isa::zkasm::lower::isle::generated_code::{ + AluOPRRI, AluOPRRR, AtomicOP, FClassResult, FFlagsException, FloatRoundOP, FloatSelectOP, + FpuOPRR, FpuOPRRR, FpuOPRRRR, IntSelectOP, LoadOP, MInst as Inst, StoreOP, FRM, +}; +use crate::isa::zkasm::lower::isle::generated_code::{MInst, VecAluOpRRImm5, VecAluOpRRR}; + +type BoxCallInfo = Box; +type BoxCallIndInfo = Box; +type BoxReturnCallInfo = Box; + +/// Additional information for (direct) Call instructions, left out of line to lower the size of +/// the Inst enum. +#[derive(Clone, Debug)] +pub struct CallInfo { + pub dest: ExternalName, + pub uses: CallArgList, + pub defs: CallRetList, + pub opcode: Opcode, + pub caller_callconv: CallConv, + pub callee_callconv: CallConv, + pub clobbers: PRegSet, + pub callee_pop_size: u32, +} + +/// Additional information for CallInd instructions, left out of line to lower the size of the Inst +/// enum. +#[derive(Clone, Debug)] +pub struct CallIndInfo { + pub rn: Reg, + pub uses: CallArgList, + pub defs: CallRetList, + pub opcode: Opcode, + pub caller_callconv: CallConv, + pub callee_callconv: CallConv, + pub clobbers: PRegSet, + pub callee_pop_size: u32, +} + +/// Additional information for `return_call[_ind]` instructions, left out of +/// line to lower the size of the `Inst` enum. +#[derive(Clone, Debug)] +pub struct ReturnCallInfo { + pub uses: CallArgList, + pub opcode: Opcode, + pub old_stack_arg_size: u32, + pub new_stack_arg_size: u32, +} + +/// A branch target. Either unresolved (basic-block index) or resolved (offset +/// from end of current instruction). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum BranchTarget { + /// An unresolved reference to a Label, as passed into + /// `lower_branch_group()`. + Label(MachLabel), + /// A fixed PC offset. + ResolvedOffset(i32), +} + +impl BranchTarget { + /// Return the target's label, if it is a label-based target. + pub(crate) fn as_label(self) -> Option { + match self { + BranchTarget::Label(l) => Some(l), + _ => None, + } + } + /// offset zero. + #[inline] + pub(crate) fn zero() -> Self { + Self::ResolvedOffset(0) + } + #[inline] + pub(crate) fn offset(off: i32) -> Self { + Self::ResolvedOffset(off) + } + #[inline] + pub(crate) fn is_zero(self) -> bool { + match self { + BranchTarget::Label(_) => false, + BranchTarget::ResolvedOffset(off) => off == 0, + } + } + #[inline] + pub(crate) fn as_offset(self) -> Option { + match self { + BranchTarget::Label(_) => None, + BranchTarget::ResolvedOffset(off) => Some(off), + } + } +} + +impl Display for BranchTarget { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + BranchTarget::Label(l) => write!(f, "{}", l.to_string()), + BranchTarget::ResolvedOffset(off) => write!(f, "{}", off), + } + } +} + +pub(crate) fn enc_auipc(rd: Writable, imm: Imm20) -> u32 { + let x = 0b0010111 | reg_to_gpr_num(rd.to_reg()) << 7 | imm.as_u32() << 12; + x +} + +pub(crate) fn enc_jalr(rd: Writable, base: Reg, offset: Imm12) -> u32 { + let x = 0b1100111 + | reg_to_gpr_num(rd.to_reg()) << 7 + | 0b000 << 12 + | reg_to_gpr_num(base) << 15 + | offset.as_u32() << 20; + x +} + +/// rd and src must have the same length. +pub(crate) fn gen_moves(rd: &[Writable], src: &[Reg]) -> SmallInstVec { + assert!(rd.len() == src.len()); + assert!(rd.len() > 0); + let mut insts = SmallInstVec::new(); + for (dst, src) in rd.iter().zip(src.iter()) { + let ty = Inst::canonical_type_for_rc(dst.to_reg().class()); + insts.push(Inst::gen_move(*dst, *src, ty)); + } + insts +} + +impl Inst { + const INSTRUCTION_SIZE: i32 = 4; + + #[inline] + pub(crate) fn load_imm12(rd: Writable, imm: Imm12) -> Inst { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd, + rs: zero_reg(), + imm12: imm, + } + } + + /// Immediates can be loaded using lui and addi instructions. + fn load_const_imm Writable>( + rd: Writable, + value: u64, + alloc_tmp: &mut F, + ) -> Option> { + Inst::generate_imm(value, |imm20, imm12| { + let mut insts = SmallVec::new(); + + let rs = if let Some(imm) = imm20 { + let rd = if imm12.is_some() { alloc_tmp(I64) } else { rd }; + insts.push(Inst::Lui { rd, imm }); + rd.to_reg() + } else { + zero_reg() + }; + + if let Some(imm12) = imm12 { + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd, + rs, + imm12, + }) + } + + insts + }) + } + + pub(crate) fn load_constant_u32 Writable>( + rd: Writable, + value: u64, + alloc_tmp: &mut F, + ) -> SmallInstVec { + let insts = Inst::load_const_imm(rd, value, alloc_tmp); + insts.unwrap_or_else(|| { + smallvec![Inst::LoadConst32 { + rd, + imm: value as u32 + }] + }) + } + + pub fn load_constant_u64 Writable>( + rd: Writable, + value: u64, + alloc_tmp: &mut F, + ) -> SmallInstVec { + let insts = Inst::load_const_imm(rd, value, alloc_tmp); + insts.unwrap_or_else(|| smallvec![Inst::LoadConst64 { rd, imm: value }]) + } + + pub(crate) fn construct_auipc_and_jalr( + link: Option>, + tmp: Writable, + offset: i64, + ) -> [Inst; 2] { + Inst::generate_imm(offset as u64, |imm20, imm12| { + let a = Inst::Auipc { + rd: tmp, + imm: imm20.unwrap_or_default(), + }; + let b = Inst::Jalr { + rd: link.unwrap_or(writable_zero_reg()), + base: tmp.to_reg(), + offset: imm12.unwrap_or_default(), + }; + [a, b] + }) + .expect("code range is too big.") + } + + /// Create instructions that load a 32-bit floating-point constant. + pub fn load_fp_constant32 Writable>( + rd: Writable, + const_data: u32, + mut alloc_tmp: F, + ) -> SmallVec<[Inst; 4]> { + let mut insts = SmallVec::new(); + let tmp = alloc_tmp(I64); + insts.extend(Self::load_constant_u32( + tmp, + const_data as u64, + &mut alloc_tmp, + )); + insts.push(Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::move_x_to_f_op(F32), + rd, + rs: tmp.to_reg(), + }); + insts + } + + /// Create instructions that load a 64-bit floating-point constant. + pub fn load_fp_constant64 Writable>( + rd: Writable, + const_data: u64, + mut alloc_tmp: F, + ) -> SmallVec<[Inst; 4]> { + let mut insts = SmallInstVec::new(); + let tmp = alloc_tmp(I64); + insts.extend(Self::load_constant_u64(tmp, const_data, &mut alloc_tmp)); + insts.push(Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::move_x_to_f_op(F64), + rd, + rs: tmp.to_reg(), + }); + insts + } + + /// Generic constructor for a load (zero-extending where appropriate). + pub fn gen_load(into_reg: Writable, mem: AMode, ty: Type, flags: MemFlags) -> Inst { + if ty.is_vector() { + Inst::VecLoad { + eew: VecElementWidth::from_type(ty), + to: into_reg, + from: VecAMode::UnitStride { base: mem }, + flags, + mask: VecOpMasking::Disabled, + vstate: VState::from_type(ty), + } + } else { + Inst::Load { + rd: into_reg, + op: LoadOP::from_type(ty), + from: mem, + flags, + } + } + } + + /// Generic constructor for a store. + pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst { + if ty.is_vector() { + Inst::VecStore { + eew: VecElementWidth::from_type(ty), + to: VecAMode::UnitStride { base: mem }, + from: from_reg, + flags, + mask: VecOpMasking::Disabled, + vstate: VState::from_type(ty), + } + } else { + Inst::Store { + src: from_reg, + op: StoreOP::from_type(ty), + to: mem, + flags, + } + } + } +} + +//============================================================================= + +fn vec_mask_operands VReg>( + mask: &VecOpMasking, + collector: &mut OperandCollector<'_, F>, +) { + match mask { + VecOpMasking::Enabled { reg } => { + collector.reg_fixed_use(*reg, pv_reg(0).into()); + } + VecOpMasking::Disabled => {} + } +} +fn vec_mask_late_operands VReg>( + mask: &VecOpMasking, + collector: &mut OperandCollector<'_, F>, +) { + match mask { + VecOpMasking::Enabled { reg } => { + collector.reg_fixed_late_use(*reg, pv_reg(0).into()); + } + VecOpMasking::Disabled => {} + } +} + +fn zkasm_get_operands VReg>(inst: &Inst, collector: &mut OperandCollector<'_, F>) { + match inst { + &Inst::Nop0 => {} + &Inst::Nop4 => {} + &Inst::Label { .. } => {} + &Inst::BrTable { + index, tmp1, tmp2, .. + } => { + collector.reg_use(index); + collector.reg_early_def(tmp1); + collector.reg_early_def(tmp2); + } + &Inst::Auipc { rd, .. } => collector.reg_def(rd), + &Inst::Lui { rd, .. } => collector.reg_def(rd), + &Inst::LoadConst32 { rd, .. } => collector.reg_def(rd), + &Inst::LoadConst64 { rd, .. } => collector.reg_def(rd), + &Inst::AluRRR { rd, rs1, rs2, .. } => { + collector.reg_fixed_use(rs1, a0()); + collector.reg_fixed_use(rs2, b0()); + collector.reg_def(rd); + } + &Inst::FpuRRR { rd, rs1, rs2, .. } => { + collector.reg_use(rs1); + collector.reg_use(rs2); + collector.reg_def(rd); + } + &Inst::AluRRImm12 { rd, rs, .. } => { + collector.reg_use(rs); + collector.reg_def(rd); + } + &Inst::Load { rd, from, .. } => { + if let Some(r) = from.get_allocatable_register() { + collector.reg_use(r); + } + collector.reg_def(rd); + } + &Inst::Store { to, src, .. } => { + if let Some(r) = to.get_allocatable_register() { + collector.reg_use(r); + } + collector.reg_use(src); + } + + &Inst::Args { ref args } => { + for arg in args { + collector.reg_fixed_def(arg.vreg, arg.preg); + } + } + &Inst::Ret { ref rets, .. } => { + for ret in rets { + collector.reg_fixed_use(ret.vreg, ret.preg); + } + } + + &Inst::Extend { rd, rn, .. } => { + collector.reg_use(rn); + collector.reg_def(rd); + } + &Inst::AdjustSp { .. } => {} + &Inst::Call { ref info } => { + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + for d in &info.defs { + collector.reg_fixed_def(d.vreg, d.preg); + } + collector.reg_clobbers(info.clobbers); + } + &Inst::CallInd { ref info } => { + if info.callee_callconv == CallConv::Tail { + // TODO(https://github.com/bytecodealliance/regalloc2/issues/145): + // This shouldn't be a fixed register constraint. + collector.reg_fixed_use(info.rn, x_reg(5)); + } else { + collector.reg_use(info.rn); + } + + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + for d in &info.defs { + collector.reg_fixed_def(d.vreg, d.preg); + } + collector.reg_clobbers(info.clobbers); + } + &Inst::ReturnCall { + callee: _, + ref info, + } => { + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + } + &Inst::ReturnCallInd { ref info, callee } => { + collector.reg_use(callee); + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + } + &Inst::TrapIf { test, .. } => { + collector.reg_use(test); + } + &Inst::Jal { .. } => {} + &Inst::CondBr { kind, .. } => { + collector.reg_use(kind.rs1); + collector.reg_use(kind.rs2); + } + &Inst::LoadExtName { rd, .. } => { + collector.reg_def(rd); + } + &Inst::LoadAddr { rd, mem } => { + if let Some(r) = mem.get_allocatable_register() { + collector.reg_use(r); + } + collector.reg_early_def(rd); + } + + &Inst::VirtualSPOffsetAdj { .. } => {} + &Inst::Mov { rd, rm, .. } => { + collector.reg_use(rm); + collector.reg_def(rd); + } + &Inst::MovFromPReg { rd, rm } => { + debug_assert!([px_reg(2), px_reg(8)].contains(&rm)); + collector.reg_def(rd); + } + &Inst::Fence { .. } => {} + &Inst::FenceI => {} + &Inst::ECall => {} + &Inst::EBreak => {} + &Inst::Udf { .. } => {} + &Inst::FpuRR { rd, rs, .. } => { + collector.reg_use(rs); + collector.reg_def(rd); + } + &Inst::FpuRRRR { + rd, rs1, rs2, rs3, .. + } => { + collector.reg_uses(&[rs1, rs2, rs3]); + collector.reg_def(rd); + } + + &Inst::Jalr { rd, base, .. } => { + collector.reg_use(base); + collector.reg_def(rd); + } + &Inst::Atomic { rd, addr, src, .. } => { + collector.reg_use(addr); + collector.reg_use(src); + collector.reg_def(rd); + } + &Inst::Select { + ref dst, + condition, + x, + y, + .. + } => { + collector.reg_use(condition); + collector.reg_uses(x.regs()); + collector.reg_uses(y.regs()); + for d in dst.iter() { + collector.reg_early_def(d.clone()); + } + } + &Inst::AtomicCas { + offset, + t0, + dst, + e, + addr, + v, + .. + } => { + collector.reg_uses(&[offset, e, addr, v]); + collector.reg_early_def(t0); + collector.reg_early_def(dst); + } + &Inst::IntSelect { + ref dst, + ref x, + ref y, + .. + } => { + collector.reg_uses(x.regs()); + collector.reg_uses(y.regs()); + for d in dst.iter() { + collector.reg_early_def(d.clone()); + } + } + + &Inst::Icmp { rd, a, b, .. } => { + // TODO(akashin): Why would Icmp have multiple input registers? + // collector.reg_uses(a.regs()); + // collector.reg_uses(b.regs()); + collector.reg_fixed_use( + a.only_reg() + .expect("Only support 1 register in comparison now"), + a0(), + ); + collector.reg_fixed_use( + b.only_reg() + .expect("Only support 1 register in comparison now"), + b0(), + ); + collector.reg_def(rd); + } + + &Inst::SelectReg { + rd, + rs1, + rs2, + condition, + } => { + collector.reg_use(condition.rs1); + collector.reg_use(condition.rs2); + collector.reg_use(rs1); + collector.reg_use(rs2); + collector.reg_def(rd); + } + &Inst::FcvtToInt { rd, rs, tmp, .. } => { + collector.reg_use(rs); + collector.reg_early_def(tmp); + collector.reg_early_def(rd); + } + &Inst::RawData { .. } => {} + &Inst::AtomicStore { src, p, .. } => { + collector.reg_use(src); + collector.reg_use(p); + } + &Inst::AtomicLoad { rd, p, .. } => { + collector.reg_use(p); + collector.reg_def(rd); + } + &Inst::AtomicRmwLoop { + offset, + dst, + p, + x, + t0, + .. + } => { + collector.reg_uses(&[offset, p, x]); + collector.reg_early_def(t0); + collector.reg_early_def(dst); + } + &Inst::TrapIfC { rs1, rs2, .. } => { + collector.reg_use(rs1); + collector.reg_use(rs2); + } + &Inst::Unwind { .. } => {} + &Inst::DummyUse { reg } => { + collector.reg_use(reg); + } + &Inst::FloatRound { + rd, + int_tmp, + f_tmp, + rs, + .. + } => { + collector.reg_use(rs); + collector.reg_early_def(int_tmp); + collector.reg_early_def(f_tmp); + collector.reg_early_def(rd); + } + &Inst::FloatSelect { + rd, tmp, rs1, rs2, .. + } => { + collector.reg_uses(&[rs1, rs2]); + collector.reg_early_def(tmp); + collector.reg_early_def(rd); + } + &Inst::Popcnt { + sum, step, rs, tmp, .. + } => { + collector.reg_use(rs); + collector.reg_early_def(tmp); + collector.reg_early_def(step); + collector.reg_early_def(sum); + } + &Inst::Rev8 { rs, rd, tmp, step } => { + collector.reg_use(rs); + collector.reg_early_def(tmp); + collector.reg_early_def(step); + collector.reg_early_def(rd); + } + &Inst::Cltz { + sum, step, tmp, rs, .. + } => { + collector.reg_use(rs); + collector.reg_early_def(tmp); + collector.reg_early_def(step); + collector.reg_early_def(sum); + } + &Inst::Brev8 { + rs, + rd, + step, + tmp, + tmp2, + .. + } => { + collector.reg_use(rs); + collector.reg_early_def(step); + collector.reg_early_def(tmp); + collector.reg_early_def(tmp2); + collector.reg_early_def(rd); + } + &Inst::StackProbeLoop { .. } => { + // StackProbeLoop has a tmp register and StackProbeLoop used at gen_prologue. + // t3 will do the job. (t3 is caller-save register and not used directly by compiler like writable_spilltmp_reg) + // gen_prologue is called at emit stage. + // no need let reg alloc know. + } + &Inst::VecAluRRRR { + op, + vd, + vd_src, + vs1, + vs2, + ref mask, + .. + } => { + debug_assert_eq!(vd_src.class(), RegClass::Vector); + debug_assert_eq!(vd.to_reg().class(), RegClass::Vector); + debug_assert_eq!(vs2.class(), RegClass::Vector); + debug_assert_eq!(vs1.class(), op.vs1_regclass()); + + collector.reg_late_use(vs1); + collector.reg_late_use(vs2); + collector.reg_use(vd_src); + collector.reg_reuse_def(vd, 2); // `vd` == `vd_src`. + vec_mask_late_operands(mask, collector); + } + + Inst::AddImm32 { rd, src1, src2 } => { + collector.reg_def(*rd); + } + + Inst::MulImm32 { rd, src1, src2 } => { + collector.reg_def(*rd); + } + + &Inst::VecAluRRRImm5 { + op, + vd, + vd_src, + vs2, + ref mask, + .. + } => { + debug_assert_eq!(vd_src.class(), RegClass::Vector); + debug_assert_eq!(vd.to_reg().class(), RegClass::Vector); + debug_assert_eq!(vs2.class(), RegClass::Vector); + + // If the operation forbids source/destination overlap we need to + // ensure that the source and destination registers are different. + if op.forbids_src_dst_overlaps() { + collector.reg_late_use(vs2); + collector.reg_use(vd_src); + collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`. + vec_mask_late_operands(mask, collector); + } else { + collector.reg_use(vs2); + collector.reg_use(vd_src); + collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`. + vec_mask_operands(mask, collector); + } + } + &Inst::VecAluRRR { + op, + vd, + vs1, + vs2, + ref mask, + .. + } => { + debug_assert_eq!(vd.to_reg().class(), RegClass::Vector); + debug_assert_eq!(vs2.class(), RegClass::Vector); + debug_assert_eq!(vs1.class(), op.vs1_regclass()); + + collector.reg_use(vs1); + collector.reg_use(vs2); + + // If the operation forbids source/destination overlap, then we must + // register it as an early_def. This encodes the constraint that + // these must not overlap. + if op.forbids_src_dst_overlaps() { + collector.reg_early_def(vd); + } else { + collector.reg_def(vd); + } + + vec_mask_operands(mask, collector); + } + &Inst::VecAluRRImm5 { + op, + vd, + vs2, + ref mask, + .. + } => { + debug_assert_eq!(vd.to_reg().class(), RegClass::Vector); + debug_assert_eq!(vs2.class(), RegClass::Vector); + + collector.reg_use(vs2); + + // If the operation forbids source/destination overlap, then we must + // register it as an early_def. This encodes the constraint that + // these must not overlap. + if op.forbids_src_dst_overlaps() { + collector.reg_early_def(vd); + } else { + collector.reg_def(vd); + } + + vec_mask_operands(mask, collector); + } + &Inst::VecAluRR { + op, + vd, + vs, + ref mask, + .. + } => { + debug_assert_eq!(vd.to_reg().class(), op.dst_regclass()); + debug_assert_eq!(vs.class(), op.src_regclass()); + + collector.reg_use(vs); + + // If the operation forbids source/destination overlap, then we must + // register it as an early_def. This encodes the constraint that + // these must not overlap. + if op.forbids_src_dst_overlaps() { + collector.reg_early_def(vd); + } else { + collector.reg_def(vd); + } + + vec_mask_operands(mask, collector); + } + &Inst::VecAluRImm5 { vd, ref mask, .. } => { + debug_assert_eq!(vd.to_reg().class(), RegClass::Vector); + + collector.reg_def(vd); + vec_mask_operands(mask, collector); + } + &Inst::VecSetState { rd, .. } => { + collector.reg_def(rd); + } + &Inst::VecLoad { + to, + ref from, + ref mask, + .. + } => { + if let Some(r) = from.get_allocatable_register() { + collector.reg_use(r); + } + collector.reg_def(to); + vec_mask_operands(mask, collector); + } + &Inst::VecStore { + ref to, + from, + ref mask, + .. + } => { + if let Some(r) = to.get_allocatable_register() { + collector.reg_use(r); + } + collector.reg_use(from); + vec_mask_operands(mask, collector); + } + } +} + +impl MachInst for Inst { + type LabelUse = LabelUse; + type ABIMachineSpec = Riscv64MachineDeps; + + // https://github.com/riscv/riscv-isa-manual/issues/850 + // all zero will cause invalid opcode. + const TRAP_OPCODE: &'static [u8] = &[0; 4]; + + fn gen_dummy_use(reg: Reg) -> Self { + Inst::DummyUse { reg } + } + + fn gen_block_start( + block_index: usize, + _is_indirect_branch_target: bool, + _is_forward_edge_cfi_enabled: bool, + ) -> Option { + Some(Inst::Label { imm: block_index }) + } + + fn canonical_type_for_rc(rc: RegClass) -> Type { + match rc { + regalloc2::RegClass::Int => I64, + regalloc2::RegClass::Float => F64, + regalloc2::RegClass::Vector => I8X16, + } + } + + fn is_safepoint(&self) -> bool { + match self { + &Inst::Call { .. } + | &Inst::CallInd { .. } + | &Inst::TrapIf { .. } + | &Inst::Udf { .. } => true, + _ => false, + } + } + + fn get_operands VReg>(&self, collector: &mut OperandCollector<'_, F>) { + zkasm_get_operands(self, collector); + } + + fn is_move(&self) -> Option<(Writable, Reg)> { + match self { + Inst::Mov { rd, rm, .. } => Some((rd.clone(), rm.clone())), + _ => None, + } + } + + fn is_included_in_clobbers(&self) -> bool { + match self { + &Inst::Args { .. } => false, + _ => true, + } + } + + fn is_trap(&self) -> bool { + match self { + Self::Udf { .. } => true, + _ => false, + } + } + + fn is_args(&self) -> bool { + match self { + Self::Args { .. } => true, + _ => false, + } + } + + fn is_term(&self) -> MachTerminator { + match self { + &Inst::Jal { .. } => MachTerminator::Uncond, + &Inst::CondBr { .. } => MachTerminator::Cond, + &Inst::Jalr { .. } => MachTerminator::Uncond, + &Inst::Ret { .. } => MachTerminator::Ret, + &Inst::BrTable { .. } => MachTerminator::Indirect, + &Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall, + _ => MachTerminator::None, + } + } + + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { + let x = Inst::Mov { + rd: to_reg, + rm: from_reg, + ty, + }; + x + } + + fn gen_nop(preferred_size: usize) -> Inst { + if preferred_size == 0 { + return Inst::Nop0; + } + // We can't give a NOP (or any insn) < 4 bytes. + assert!(preferred_size >= 4); + Inst::Nop4 + } + + fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> { + match ty { + I8 => Ok((&[RegClass::Int], &[I8])), + I16 => Ok((&[RegClass::Int], &[I16])), + I32 => Ok((&[RegClass::Int], &[I32])), + I64 => Ok((&[RegClass::Int], &[I64])), + R32 => panic!("32-bit reftype pointer should never be seen on zkasm"), + R64 => Ok((&[RegClass::Int], &[R64])), + F32 => Ok((&[RegClass::Float], &[F32])), + F64 => Ok((&[RegClass::Float], &[F64])), + I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])), + _ if ty.is_vector() => { + debug_assert!(ty.bits() <= 512); + + // Here we only need to return a SIMD type with the same size as `ty`. + // We use these types for spills and reloads, so prefer types with lanes <= 31 + // since that fits in the immediate field of `vsetivli`. + const SIMD_TYPES: [[Type; 1]; 6] = [ + [types::I8X2], + [types::I8X4], + [types::I8X8], + [types::I8X16], + [types::I16X16], + [types::I32X16], + ]; + let idx = (ty.bytes().ilog2() - 1) as usize; + let ty = &SIMD_TYPES[idx][..]; + + Ok((&[RegClass::Vector], ty)) + } + _ => Err(CodegenError::Unsupported(format!( + "Unexpected SSA-value type: {}", + ty + ))), + } + } + + fn gen_jump(target: MachLabel) -> Inst { + Inst::Jal { + dest: BranchTarget::Label(target), + } + } + + fn worst_case_size() -> CodeOffset { + // calculate by test function zkasm_worst_case_instruction_size() + 1_000_000 + } + + fn ref_type_regclass(_settings: &settings::Flags) -> RegClass { + RegClass::Int + } + + fn function_alignment() -> FunctionAlignment { + FunctionAlignment { + minimum: 4, + preferred: 4, + } + } +} + +//============================================================================= +// Pretty-printing of instructions. +pub fn reg_name(reg: Reg) -> String { + match reg.to_real_reg() { + Some(real) => match real.class() { + RegClass::Int => match real.hw_enc() { + 0 => "0".into(), + 1 => "RR".into(), + 2 => "SP".into(), + // TODO(akashin): Do we have a global pointer register in ZK ASM? + // https://www.five-embeddev.com/quickref/global_pointer.html + // Supposed to be unallocatable. + 3 => "gp".into(), + // TODO(akashin): Do we have a thread pointer register in ZK ASM? + // https://groups.google.com/a/groups.riscv.org/g/sw-dev/c/cov47bNy5gY?pli=1 + // Supposed to be unallocatable. + 4 => "tp".into(), + // Temporary registers. + 5 => "C".into(), + 6 => "D".into(), + 7 => "E".into(), + 8 => "fp".into(), + 9 => "s1".into(), + 10 => "A".into(), + 11 => "B".into(), + 12 => "CTX".into(), + 13..=17 => format!("a{}", real.hw_enc() - 10), + 18..=27 => format!("s{}", real.hw_enc() - 16), + 28..=31 => format!("t{}", real.hw_enc() - 25), + _ => unreachable!(), + }, + RegClass::Float => match real.hw_enc() { + 0..=7 => format!("ft{}", real.hw_enc() - 0), + 8..=9 => format!("fs{}", real.hw_enc() - 8), + 10..=17 => format!("fa{}", real.hw_enc() - 10), + 18..=27 => format!("fs{}", real.hw_enc() - 16), + 28..=31 => format!("ft{}", real.hw_enc() - 20), + _ => unreachable!(), + }, + RegClass::Vector => format!("v{}", real.hw_enc()), + }, + None => { + format!("{:?}", reg) + } + } +} + +impl Inst { + fn print_with_state( + &self, + _state: &mut EmitState, + allocs: &mut AllocationConsumer<'_>, + ) -> String { + let format_reg = |reg: Reg, allocs: &mut AllocationConsumer<'_>| -> String { + let reg = allocs.next(reg); + reg_name(reg) + }; + + let format_vec_amode = |amode: &VecAMode, allocs: &mut AllocationConsumer<'_>| -> String { + match amode { + VecAMode::UnitStride { base } => base.to_string_with_alloc(allocs), + } + }; + + let format_mask = |mask: &VecOpMasking, allocs: &mut AllocationConsumer<'_>| -> String { + match mask { + VecOpMasking::Enabled { reg } => format!(",{}.t", format_reg(*reg, allocs)), + VecOpMasking::Disabled => format!(""), + } + }; + + let format_regs = |regs: &[Reg], allocs: &mut AllocationConsumer<'_>| -> String { + let mut x = if regs.len() > 1 { + String::from("[") + } else { + String::default() + }; + regs.iter().for_each(|i| { + x.push_str(format_reg(i.clone(), allocs).as_str()); + if *i != *regs.last().unwrap() { + x.push_str(","); + } + }); + if regs.len() > 1 { + x.push_str("]"); + } + x + }; + let format_labels = |labels: &[MachLabel]| -> String { + if labels.len() == 0 { + return String::from("[_]"); + } + let mut x = String::from("["); + labels.iter().for_each(|l| { + x.push_str( + format!( + "{:?}{}", + l, + if l != labels.last().unwrap() { "," } else { "" }, + ) + .as_str(), + ); + }); + x.push_str("]"); + x + }; + + fn format_frm(rounding_mode: Option) -> String { + if let Some(r) = rounding_mode { + format!(",{}", r.to_static_str(),) + } else { + "".into() + } + } + + let mut empty_allocs = AllocationConsumer::default(); + match self { + &Inst::Nop0 => { + format!("##zero length nop") + } + &Inst::Nop4 => { + format!("##fixed 4-size nop") + } + &Inst::Label { imm } => { + format!("##label=L{imm}") + } + &Inst::StackProbeLoop { + guard_size, + probe_count, + tmp, + } => { + let tmp = format_reg(tmp.to_reg(), allocs); + format!( + "inline_stack_probe##guard_size={} probe_count={} tmp={}", + guard_size, probe_count, tmp + ) + } + &Inst::FloatRound { + op, + rd, + int_tmp, + f_tmp, + rs, + ty, + } => { + let rs = format_reg(rs, allocs); + let int_tmp = format_reg(int_tmp.to_reg(), allocs); + let f_tmp = format_reg(f_tmp.to_reg(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "{} {},{}##int_tmp={} f_tmp={} ty={}", + op.op_name(), + rd, + rs, + int_tmp, + f_tmp, + ty + ) + } + &Inst::FloatSelect { + op, + rd, + tmp, + rs1, + rs2, + ty, + } => { + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "f{}.{} {},{},{}##tmp={} ty={}", + op.op_name(), + if ty == F32 { "s" } else { "d" }, + rd, + rs1, + rs2, + tmp, + ty + ) + } + &Inst::AtomicStore { src, ty, p } => { + let src = format_reg(src, allocs); + let p = format_reg(p, allocs); + format!("atomic_store.{} {},({})", ty, src, p) + } + &Inst::DummyUse { reg } => { + let reg = format_reg(reg, allocs); + format!("dummy_use {}", reg) + } + + &Inst::AtomicLoad { rd, ty, p } => { + let p = format_reg(p, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("atomic_load.{} {},({})", ty, rd, p) + } + &Inst::AtomicRmwLoop { + offset, + op, + dst, + ty, + p, + x, + t0, + } => { + let offset = format_reg(offset, allocs); + let p = format_reg(p, allocs); + let x = format_reg(x, allocs); + let t0 = format_reg(t0.to_reg(), allocs); + let dst = format_reg(dst.to_reg(), allocs); + format!( + "atomic_rmw.{} {} {},{},({})##t0={} offset={}", + ty, op, dst, x, p, t0, offset + ) + } + + &Inst::RawData { ref data } => match data.len() { + 4 => { + let mut bytes = [0; 4]; + for i in 0..bytes.len() { + bytes[i] = data[i]; + } + format!(".4byte 0x{:x}", u32::from_le_bytes(bytes)) + } + 8 => { + let mut bytes = [0; 8]; + for i in 0..bytes.len() { + bytes[i] = data[i]; + } + format!(".8byte 0x{:x}", u64::from_le_bytes(bytes)) + } + _ => { + format!(".data {:?}", data) + } + }, + &Inst::Unwind { ref inst } => { + todo!() + } + &Inst::Brev8 { + rs, + ty, + step, + tmp, + tmp2, + rd, + } => { + let rs = format_reg(rs, allocs); + let step = format_reg(step.to_reg(), allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let tmp2 = format_reg(tmp2.to_reg(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "brev8 {},{}##tmp={} tmp2={} step={} ty={}", + rd, rs, tmp, tmp2, step, ty + ) + } + &Inst::Popcnt { + sum, + step, + rs, + tmp, + ty, + } => { + let rs = format_reg(rs, allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let step = format_reg(step.to_reg(), allocs); + let sum = format_reg(sum.to_reg(), allocs); + format!("popcnt {},{}##ty={} tmp={} step={}", sum, rs, ty, tmp, step) + } + &Inst::Rev8 { rs, rd, tmp, step } => { + let rs = format_reg(rs, allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let step = format_reg(step.to_reg(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("rev8 {},{}##step={} tmp={}", rd, rs, step, tmp) + } + &Inst::Cltz { + sum, + step, + rs, + tmp, + ty, + leading, + } => { + let rs = format_reg(rs, allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let step = format_reg(step.to_reg(), allocs); + let sum = format_reg(sum.to_reg(), allocs); + format!( + "{} {},{}##ty={} tmp={} step={}", + if leading { "clz" } else { "ctz" }, + sum, + rs, + ty, + tmp, + step + ) + } + &Inst::FcvtToInt { + is_sat, + rd, + rs, + is_signed, + in_type, + out_type, + tmp, + } => { + let rs = format_reg(rs, allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "fcvt_to_{}int{}.{} {},{}##in_ty={} tmp={}", + if is_signed { "s" } else { "u" }, + if is_sat { "_sat" } else { "" }, + out_type, + rd, + rs, + in_type, + tmp + ) + } + &Inst::SelectReg { + rd, + rs1, + rs2, + ref condition, + } => { + let c_rs1 = format_reg(condition.rs1, allocs); + let c_rs2 = format_reg(condition.rs2, allocs); + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "select_reg {},{},{}##condition={}", + rd, + rs1, + rs2, + format!("({} {} {})", c_rs1, condition.kind.to_static_str(), c_rs2), + ) + } + &Inst::AtomicCas { + offset, + t0, + dst, + e, + addr, + v, + ty, + } => { + let offset = format_reg(offset, allocs); + let e = format_reg(e, allocs); + let addr = format_reg(addr, allocs); + let v = format_reg(v, allocs); + let t0 = format_reg(t0.to_reg(), allocs); + let dst = format_reg(dst.to_reg(), allocs); + format!( + "atomic_cas.{} {},{},{},({})##t0={} offset={}", + ty, dst, e, v, addr, t0, offset, + ) + } + &Inst::Icmp { cc, rd, a, b, ty } => { + let a = format_regs(a.regs(), allocs); + let b = format_regs(b.regs(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("{} {},{},{}##ty={}", cc.to_static_str(), rd, a, b, ty) + } + &Inst::IntSelect { + op, + ref dst, + x, + y, + ty, + } => { + let x = format_regs(x.regs(), allocs); + let y = format_regs(y.regs(), allocs); + let dst: Vec<_> = dst.iter().map(|r| r.to_reg()).collect(); + let dst = format_regs(&dst[..], allocs); + format!("{} {},{},{}##ty={}", op.op_name(), dst, x, y, ty,) + } + &Inst::BrTable { + index, + tmp1, + tmp2, + ref targets, + } => { + let targets: Vec<_> = targets.iter().map(|x| x.as_label().unwrap()).collect(); + format!( + "{} {},{}##tmp1={},tmp2={}", + "br_table", + format_reg(index, allocs), + format_labels(&targets[..]), + format_reg(tmp1.to_reg(), allocs), + format_reg(tmp2.to_reg(), allocs), + ) + } + &Inst::Auipc { rd, imm } => { + format!( + "{} {},{}", + "auipc", + format_reg(rd.to_reg(), allocs), + imm.bits + ) + } + &Inst::Jalr { rd, base, offset } => { + let base = format_reg(base, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("{} {},{}({})", "jalr", rd, offset.bits, base) + } + &Inst::Lui { rd, ref imm } => { + format!("{} {},{}", "lui", format_reg(rd.to_reg(), allocs), imm.bits) + } + &Inst::LoadConst32 { rd, imm } => { + let rd = format_reg(rd.to_reg(), allocs); + let mut buf = String::new(); + write!(&mut buf, "auipc {},0; ", rd).unwrap(); + write!(&mut buf, "ld {},12({}); ", rd, rd).unwrap(); + write!(&mut buf, "j {}; ", Inst::INSTRUCTION_SIZE + 4).unwrap(); + write!(&mut buf, ".4byte 0x{:x}", imm).unwrap(); + buf + } + &Inst::LoadConst64 { rd, imm } => { + let rd = format_reg(rd.to_reg(), allocs); + let mut buf = String::new(); + write!(&mut buf, "auipc {},0; ", rd).unwrap(); + write!(&mut buf, "ld {},12({}); ", rd, rd).unwrap(); + write!(&mut buf, "j {}; ", Inst::INSTRUCTION_SIZE + 8).unwrap(); + write!(&mut buf, ".8byte 0x{:x}", imm).unwrap(); + buf + } + &Inst::AluRRR { + alu_op, + rd, + rs1, + rs2, + } => { + let rs1_s = format_reg(rs1, allocs); + let rs2_s = format_reg(rs2, allocs); + let rd_s = format_reg(rd.to_reg(), allocs); + match alu_op { + AluOPRRR::Adduw if rs2 == zero_reg() => { + format!("zext.w {},{}", rd_s, rs1_s) + } + _ => { + format!("{} {},{},{}", alu_op.op_name(), rd_s, rs1_s, rs2_s) + } + } + } + + Inst::AddImm32 { rd, src1, src2 } => { + let rd = format_reg(rd.to_reg(), allocs); + format!("{src1} + {src2} => {rd};") + } + + Inst::MulImm32 { rd, src1, src2 } => { + let rd = format_reg(rd.to_reg(), allocs); + format!("{src1} * {src2} => {rd};") + } + + &Inst::FpuRR { + frm, + alu_op, + rd, + rs, + } => { + let rs = format_reg(rs, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("{} {},{}{}", alu_op.op_name(), rd, rs, format_frm(frm)) + } + &Inst::FpuRRR { + alu_op, + rd, + rs1, + rs2, + frm, + } => { + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + let rd = format_reg(rd.to_reg(), allocs); + let rs1_is_rs2 = rs1 == rs2; + if rs1_is_rs2 && alu_op.is_copy_sign() { + // this is move instruction. + format!( + "fmv.{} {},{}", + if alu_op.is_32() { "s" } else { "d" }, + rd, + rs1 + ) + } else if rs1_is_rs2 && alu_op.is_copy_neg_sign() { + format!( + "fneg.{} {},{}", + if alu_op.is_32() { "s" } else { "d" }, + rd, + rs1 + ) + } else if rs1_is_rs2 && alu_op.is_copy_xor_sign() { + format!( + "fabs.{} {},{}", + if alu_op.is_32() { "s" } else { "d" }, + rd, + rs1 + ) + } else { + format!( + "{} {},{},{}{}", + alu_op.op_name(), + rd, + rs1, + rs2, + format_frm(frm) + ) + } + } + &Inst::FpuRRRR { + alu_op, + rd, + rs1, + rs2, + rs3, + frm, + } => { + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + let rs3 = format_reg(rs3, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "{} {},{},{},{}{}", + alu_op.op_name(), + rd, + rs1, + rs2, + rs3, + format_frm(frm) + ) + } + &Inst::AluRRImm12 { + alu_op, + rd, + rs, + ref imm12, + } => { + let rs_s = format_reg(rs, allocs); + let rd = format_reg(rd.to_reg(), allocs); + + // Some of these special cases are better known as + // their pseudo-instruction version, so prefer printing those. + match (alu_op, rs, imm12) { + (AluOPRRI::Addi, rs, _) if rs == zero_reg() => { + return format!("li {},{}", rd, imm12.as_i16()); + } + (AluOPRRI::Addiw, _, imm12) if imm12.as_i16() == 0 => { + return format!("sext.w {},{}", rd, rs_s); + } + (AluOPRRI::Xori, _, imm12) if imm12.as_i16() == -1 => { + return format!("not {},{}", rd, rs_s); + } + (AluOPRRI::SltiU, _, imm12) if imm12.as_i16() == 1 => { + return format!("seqz {},{}", rd, rs_s); + } + (alu_op, _, _) if alu_op.option_funct12().is_some() => { + format!("{} {},{}", alu_op.op_name(), rd, rs_s) + } + (alu_op, _, imm12) => { + format!("{} {},{},{}", alu_op.op_name(), rd, rs_s, imm12.as_i16()) + } + } + } + &Inst::Load { + rd, + op, + from, + flags: _flags, + } => { + let base = from.to_string_with_alloc(allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("{} {},{}", op.op_name(), rd, base,) + } + &Inst::Store { + to, + src, + op, + flags: _flags, + } => { + let base = to.to_string_with_alloc(allocs); + let src = format_reg(src, allocs); + format!("{} {},{}", op.op_name(), src, base,) + } + &Inst::Args { ref args } => { + let mut s = "args".to_string(); + let mut empty_allocs = AllocationConsumer::default(); + for arg in args { + let preg = format_reg(arg.preg, &mut empty_allocs); + let def = format_reg(arg.vreg.to_reg(), allocs); + write!(&mut s, " {}={}", def, preg).unwrap(); + } + s + } + &Inst::Ret { + ref rets, + stack_bytes_to_pop, + } => { + let mut s = if stack_bytes_to_pop == 0 { + "ret".to_string() + } else { + format!("add sp, sp, #{stack_bytes_to_pop} ; ret") + }; + + let mut empty_allocs = AllocationConsumer::default(); + for ret in rets { + let preg = format_reg(ret.preg, &mut empty_allocs); + let vreg = format_reg(ret.vreg, allocs); + write!(&mut s, " {vreg}={preg}").unwrap(); + } + s + } + + &MInst::Extend { + rd, + rn, + signed, + from_bits, + .. + } => { + let rn = format_reg(rn, allocs); + let rd = format_reg(rd.to_reg(), allocs); + return if signed == false && from_bits == 8 { + format!("andi {rd},{rn}") + } else { + let op = if signed { "srai" } else { "srli" }; + let shift_bits = (64 - from_bits) as i16; + format!("slli {rd},{rn},{shift_bits}; {op} {rd},{rd},{shift_bits}") + }; + } + &MInst::AdjustSp { amount } => { + format!("{} sp,{:+}", "add", amount) + } + &MInst::Call { ref info } => format!("call {}", info.dest.display(None)), + &MInst::CallInd { ref info } => { + let rd = format_reg(info.rn, allocs); + format!("callind {}", rd) + } + &MInst::ReturnCall { + ref callee, + ref info, + } => { + let mut s = format!( + "return_call {callee:?} old_stack_arg_size:{} new_stack_arg_size:{}", + info.old_stack_arg_size, info.new_stack_arg_size + ); + for ret in &info.uses { + let preg = format_reg(ret.preg, &mut empty_allocs); + let vreg = format_reg(ret.vreg, allocs); + write!(&mut s, " {vreg}={preg}").unwrap(); + } + s + } + &MInst::ReturnCallInd { callee, ref info } => { + let callee = format_reg(callee, allocs); + let mut s = format!( + "return_call_ind {callee} old_stack_arg_size:{} new_stack_arg_size:{}", + info.old_stack_arg_size, info.new_stack_arg_size + ); + for ret in &info.uses { + let preg = format_reg(ret.preg, &mut empty_allocs); + let vreg = format_reg(ret.vreg, allocs); + write!(&mut s, " {vreg}={preg}").unwrap(); + } + s + } + &MInst::TrapIf { test, trap_code } => { + format!("trap_if {},{}", format_reg(test, allocs), trap_code,) + } + &MInst::TrapIfC { + rs1, + rs2, + cc, + trap_code, + } => { + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + format!("trap_ifc {}##({} {} {})", trap_code, rs1, cc, rs2) + } + &MInst::Jal { dest, .. } => { + format!("{} {}", "j", dest) + } + &MInst::CondBr { + taken, + not_taken, + kind, + .. + } => { + let rs1 = format_reg(kind.rs1, allocs); + let rs2 = format_reg(kind.rs2, allocs); + if not_taken.is_zero() && taken.as_label().is_none() { + let off = taken.as_offset().unwrap(); + format!("{} {},{},{}", kind.op_name(), rs1, rs2, off) + } else { + let x = format!( + "{} {},{},taken({}),not_taken({})", + kind.op_name(), + rs1, + rs2, + taken, + not_taken + ); + x + } + } + &MInst::Atomic { + op, + rd, + addr, + src, + amo, + } => { + let op_name = op.op_name(amo); + let addr = format_reg(addr, allocs); + let src = format_reg(src, allocs); + let rd = format_reg(rd.to_reg(), allocs); + if op.is_load() { + format!("{} {},({})", op_name, rd, addr) + } else { + format!("{} {},{},({})", op_name, rd, src, addr) + } + } + &MInst::LoadExtName { + rd, + ref name, + offset, + } => { + let rd = format_reg(rd.to_reg(), allocs); + format!("load_sym {},{}{:+}", rd, name.display(None), offset) + } + &MInst::LoadAddr { ref rd, ref mem } => { + let rs = mem.to_string_with_alloc(allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("load_addr {},{}", rd, rs) + } + &MInst::VirtualSPOffsetAdj { amount } => { + format!("virtual_sp_offset_adj {:+}", amount) + } + &MInst::Mov { rd, rm, ty } => { + let rd = format_reg(rd.to_reg(), allocs); + let rm = format_reg(rm, allocs); + + let op = match ty { + F32 => "fmv.s", + F64 => "fmv.d", + ty if ty.is_vector() => "vmv1r.v", + _ => "mv", + }; + + format!("{op} {rd},{rm}") + } + &MInst::MovFromPReg { rd, rm } => { + let rd = format_reg(rd.to_reg(), allocs); + debug_assert!([px_reg(2), px_reg(8)].contains(&rm)); + let rm = reg_name(Reg::from(rm)); + format!("mv {},{}", rd, rm) + } + &MInst::Fence { pred, succ } => { + format!( + "fence {},{}", + Inst::fence_req_to_string(pred), + Inst::fence_req_to_string(succ), + ) + } + &MInst::FenceI => "fence.i".into(), + &MInst::Select { + ref dst, + condition, + ref x, + ref y, + ty, + } => { + let condition = format_reg(condition, allocs); + let x = format_regs(x.regs(), allocs); + let y = format_regs(y.regs(), allocs); + let dst: Vec<_> = dst.clone().into_iter().map(|r| r.to_reg()).collect(); + let dst = format_regs(&dst[..], allocs); + format!("select_{} {},{},{}##condition={}", ty, dst, x, y, condition) + } + &MInst::Udf { trap_code } => format!("udf##trap_code={}", trap_code), + &MInst::EBreak {} => String::from("ebreak"), + &MInst::ECall {} => String::from("ecall"), + &Inst::VecAluRRRR { + op, + vd, + vd_src, + vs1, + vs2, + ref mask, + ref vstate, + } => { + let vs1_s = format_reg(vs1, allocs); + let vs2_s = format_reg(vs2, allocs); + let vd_src_s = format_reg(vd_src, allocs); + let vd_s = format_reg(vd.to_reg(), allocs); + let mask = format_mask(mask, allocs); + + let vd_fmt = if vd_s != vd_src_s { + format!("{},{}", vd_s, vd_src_s) + } else { + vd_s + }; + + // Note: vs2 and vs1 here are opposite to the standard scalar ordering. + // This is noted in Section 10.1 of the RISC-V Vector spec. + format!("{op} {vd_fmt},{vs2_s},{vs1_s}{mask} {vstate}") + } + &Inst::VecAluRRRImm5 { + op, + vd, + imm, + vs2, + ref mask, + ref vstate, + .. + } => { + let vs2_s = format_reg(vs2, allocs); + let vd_s = format_reg(vd.to_reg(), allocs); + let mask = format_mask(mask, allocs); + + // Some opcodes interpret the immediate as unsigned, lets show the + // correct number here. + let imm_s = if op.imm_is_unsigned() { + format!("{}", imm.bits()) + } else { + format!("{}", imm) + }; + + format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}") + } + &Inst::VecAluRRR { + op, + vd, + vs1, + vs2, + ref mask, + ref vstate, + } => { + let vs1_s = format_reg(vs1, allocs); + let vs2_s = format_reg(vs2, allocs); + let vd_s = format_reg(vd.to_reg(), allocs); + let mask = format_mask(mask, allocs); + + // Note: vs2 and vs1 here are opposite to the standard scalar ordering. + // This is noted in Section 10.1 of the RISC-V Vector spec. + match (op, vs2, vs1) { + (VecAluOpRRR::VrsubVX, _, vs1) if vs1 == zero_reg() => { + format!("vneg.v {vd_s},{vs2_s}{mask} {vstate}") + } + (VecAluOpRRR::VfsgnjnVV, vs2, vs1) if vs2 == vs1 => { + format!("vfneg.v {vd_s},{vs2_s}{mask} {vstate}") + } + (VecAluOpRRR::VfsgnjxVV, vs2, vs1) if vs2 == vs1 => { + format!("vfabs.v {vd_s},{vs2_s}{mask} {vstate}") + } + (VecAluOpRRR::VmnandMM, vs2, vs1) if vs2 == vs1 => { + format!("vmnot.m {vd_s},{vs2_s}{mask} {vstate}") + } + _ => format!("{op} {vd_s},{vs2_s},{vs1_s}{mask} {vstate}"), + } + } + &Inst::VecAluRRImm5 { + op, + vd, + imm, + vs2, + ref mask, + ref vstate, + } => { + let vs2_s = format_reg(vs2, allocs); + let vd_s = format_reg(vd.to_reg(), allocs); + let mask = format_mask(mask, allocs); + + // Some opcodes interpret the immediate as unsigned, lets show the + // correct number here. + let imm_s = if op.imm_is_unsigned() { + format!("{}", imm.bits()) + } else { + format!("{}", imm) + }; + + match (op, imm) { + (VecAluOpRRImm5::VxorVI, imm) if imm == Imm5::maybe_from_i8(-1).unwrap() => { + format!("vnot.v {vd_s},{vs2_s}{mask} {vstate}") + } + _ => format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}"), + } + } + &Inst::VecAluRR { + op, + vd, + vs, + ref mask, + ref vstate, + } => { + let vs_s = format_reg(vs, allocs); + let vd_s = format_reg(vd.to_reg(), allocs); + let mask = format_mask(mask, allocs); + + format!("{op} {vd_s},{vs_s}{mask} {vstate}") + } + &Inst::VecAluRImm5 { + op, + vd, + imm, + ref mask, + ref vstate, + } => { + let vd_s = format_reg(vd.to_reg(), allocs); + let mask = format_mask(mask, allocs); + + format!("{op} {vd_s},{imm}{mask} {vstate}") + } + &Inst::VecSetState { rd, ref vstate } => { + let rd_s = format_reg(rd.to_reg(), allocs); + assert!(vstate.avl.is_static()); + format!("vsetivli {}, {}, {}", rd_s, vstate.avl, vstate.vtype) + } + Inst::VecLoad { + eew, + to, + from, + ref mask, + ref vstate, + .. + } => { + let base = format_vec_amode(from, allocs); + let vd = format_reg(to.to_reg(), allocs); + let mask = format_mask(mask, allocs); + + format!("vl{eew}.v {vd},{base}{mask} {vstate}") + } + Inst::VecStore { + eew, + to, + from, + ref mask, + ref vstate, + .. + } => { + let dst = format_vec_amode(to, allocs); + let vs3 = format_reg(*from, allocs); + let mask = format_mask(mask, allocs); + + format!("vs{eew}.v {vs3},{dst}{mask} {vstate}") + } + } + } +} + +/// Different forms of label references for different instruction formats. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum LabelUse { + /// 20-bit branch offset (unconditional branches). PC-rel, offset is + /// imm << 1. Immediate is 20 signed bits. Use in Jal instructions. + Jal20, + + /// The unconditional jump instructions all use PC-relative + /// addressing to help support position independent code. The JALR + /// instruction was defined to enable a two-instruction sequence to + /// jump anywhere in a 32-bit absolute address range. A LUI + /// instruction can first load rs1 with the upper 20 bits of a + /// target address, then JALR can add in the lower bits. Similarly, + /// AUIPC then JALR can jump anywhere in a 32-bit pc-relative + /// address range. + PCRel32, + + /// All branch instructions use the B-type instruction format. The + /// 12-bit B-immediate encodes signed offsets in multiples of 2, and + /// is added to the current pc to give the target address. The + /// conditional branch range is ±4 KiB. + B12, + + /// Equivalent to the `R_RISCV_PCREL_HI20` relocation, Allows setting + /// the immediate field of an `auipc` instruction. + PCRelHi20, + + /// Similar to the `R_RISCV_PCREL_LO12_I` relocation but pointing to + /// the final address, instead of the `PCREL_HI20` label. Allows setting + /// the immediate field of I Type instructions such as `addi` or `lw`. + /// + /// Since we currently don't support offsets in labels, this relocation has + /// an implicit offset of 4. + PCRelLo12I, +} + +impl MachInstLabelUse for LabelUse { + /// Alignment for veneer code. Every Riscv64 instruction must be + /// 4-byte-aligned. + const ALIGN: CodeOffset = 4; + + /// Maximum PC-relative range (positive), inclusive. + fn max_pos_range(self) -> CodeOffset { + match self { + LabelUse::Jal20 => ((1 << 19) - 1) * 2, + LabelUse::PCRelLo12I | LabelUse::PCRelHi20 | LabelUse::PCRel32 => { + Inst::imm_max() as CodeOffset + } + LabelUse::B12 => ((1 << 11) - 1) * 2, + } + } + + /// Maximum PC-relative range (negative). + fn max_neg_range(self) -> CodeOffset { + match self { + LabelUse::PCRel32 => Inst::imm_min().abs() as CodeOffset, + _ => self.max_pos_range() + 2, + } + } + + /// Size of window into code needed to do the patch. + fn patch_size(self) -> CodeOffset { + match self { + LabelUse::Jal20 | LabelUse::B12 | LabelUse::PCRelHi20 | LabelUse::PCRelLo12I => 4, + LabelUse::PCRel32 => 8, + } + } + + /// Perform the patch. + fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) { + assert!(use_offset % 4 == 0); + assert!(label_offset % 4 == 0); + let offset = (label_offset as i64) - (use_offset as i64); + + // re-check range + assert!( + offset >= -(self.max_neg_range() as i64) && offset <= (self.max_pos_range() as i64), + "{:?} offset '{}' use_offset:'{}' label_offset:'{}' must not exceed max range.", + self, + offset, + use_offset, + label_offset, + ); + self.patch_raw_offset(buffer, offset); + } + + /// Is a veneer supported for this label reference type? + fn supports_veneer(self) -> bool { + match self { + Self::Jal20 | Self::B12 => true, + _ => false, + } + } + + /// How large is the veneer, if supported? + fn veneer_size(self) -> CodeOffset { + match self { + Self::B12 | Self::Jal20 => 8, + _ => unreachable!(), + } + } + + /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return + /// an offset and label-use for the veneer's use of the original label. + fn generate_veneer( + self, + buffer: &mut [u8], + veneer_offset: CodeOffset, + ) -> (CodeOffset, LabelUse) { + let base = writable_spilltmp_reg(); + { + let x = enc_auipc(base, Imm20::from_bits(0)).to_le_bytes(); + buffer[0] = x[0]; + buffer[1] = x[1]; + buffer[2] = x[2]; + buffer[3] = x[3]; + } + { + let x = enc_jalr(writable_zero_reg(), base.to_reg(), Imm12::from_bits(0)).to_le_bytes(); + buffer[4] = x[0]; + buffer[5] = x[1]; + buffer[6] = x[2]; + buffer[7] = x[3]; + } + (veneer_offset, Self::PCRel32) + } + + fn from_reloc(reloc: Reloc, addend: Addend) -> Option { + match (reloc, addend) { + (Reloc::RiscvCall, _) => Some(Self::PCRel32), + _ => None, + } + } +} + +impl LabelUse { + fn offset_in_range(self, offset: i64) -> bool { + let min = -(self.max_neg_range() as i64); + let max = self.max_pos_range() as i64; + offset >= min && offset <= max + } + + fn patch_raw_offset(self, buffer: &mut [u8], offset: i64) { + let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]); + match self { + LabelUse::Jal20 => { + let offset = offset as u32; + let v = ((offset >> 12 & 0b1111_1111) << 12) + | ((offset >> 11 & 0b1) << 20) + | ((offset >> 1 & 0b11_1111_1111) << 21) + | ((offset >> 20 & 0b1) << 31); + buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v)); + } + LabelUse::PCRel32 => { + let insn2 = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]); + Inst::generate_imm(offset as u64, |imm20, imm12| { + let imm20 = imm20.unwrap_or_default(); + let imm12 = imm12.unwrap_or_default(); + // Encode the OR-ed-in value with zero_reg(). The + // register parameter must be in the original + // encoded instruction and or'ing in zeroes does not + // change it. + buffer[0..4].clone_from_slice(&u32::to_le_bytes( + insn | enc_auipc(writable_zero_reg(), imm20), + )); + buffer[4..8].clone_from_slice(&u32::to_le_bytes( + insn2 | enc_jalr(writable_zero_reg(), zero_reg(), imm12), + )); + }) + // expect make sure we handled. + .expect("we have check the range before,this is a compiler error."); + } + + LabelUse::B12 => { + let offset = offset as u32; + let v = ((offset >> 11 & 0b1) << 7) + | ((offset >> 1 & 0b1111) << 8) + | ((offset >> 5 & 0b11_1111) << 25) + | ((offset >> 12 & 0b1) << 31); + buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v)); + } + + LabelUse::PCRelHi20 => { + // See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses + // + // We need to add 0x800 to ensure that we land at the next page as soon as it goes out of range for the + // Lo12 relocation. That relocation is signed and has a maximum range of -2048..2047. So when we get an + // offset of 2048, we need to land at the next page and subtract instead. + let offset = offset as u32; + let hi20 = offset.wrapping_add(0x800) >> 12; + let insn = (insn & 0xFFF) | (hi20 << 12); + buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn)); + } + + LabelUse::PCRelLo12I => { + // `offset` is the offset from the current instruction to the target address. + // + // However we are trying to compute the offset to the target address from the previous instruction. + // The previous instruction should be the one that contains the PCRelHi20 relocation and + // stores/references the program counter (`auipc` usually). + // + // Since we are trying to compute the offset from the previous instruction, we can + // represent it as offset = target_address - (current_instruction_address - 4) + // which is equivalent to offset = target_address - current_instruction_address + 4. + // + // Thus we need to add 4 to the offset here. + let lo12 = (offset + 4) as u32 & 0xFFF; + let insn = (insn & 0xFFFFF) | (lo12 << 20); + buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn)); + } + } + } +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn label_use_max_range() { + assert!(LabelUse::B12.max_neg_range() == LabelUse::B12.max_pos_range() + 2); + assert!(LabelUse::Jal20.max_neg_range() == LabelUse::Jal20.max_pos_range() + 2); + assert!(LabelUse::PCRel32.max_pos_range() == (Inst::imm_max() as CodeOffset)); + assert!(LabelUse::PCRel32.max_neg_range() == (Inst::imm_min().abs() as CodeOffset)); + assert!(LabelUse::B12.max_pos_range() == ((1 << 11) - 1) * 2); + } +} diff --git a/cranelift/codegen/src/isa/zkasm/inst/regs.rs b/cranelift/codegen/src/isa/zkasm/inst/regs.rs new file mode 100644 index 000000000000..7f76b9baa070 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/regs.rs @@ -0,0 +1,237 @@ +//! Riscv64 ISA definitions: registers. +//! + +use crate::settings; + +use crate::machinst::{Reg, Writable}; + +use crate::machinst::RealReg; +use alloc::vec; +use alloc::vec::Vec; + +use regalloc2::VReg; +use regalloc2::{MachineEnv, PReg, RegClass}; + +#[inline] +pub fn a0() -> Reg { + x_reg(10) +} + +#[inline] +pub fn b0() -> Reg { + x_reg(11) +} + +// second argument of function call +#[inline] +pub fn a1() -> Reg { + x_reg(5) +} + +// third argument of function call +#[inline] +pub fn a2() -> Reg { + x_reg(6) +} + +#[inline] +pub fn writable_a0() -> Writable { + Writable::from_reg(a0()) +} +#[inline] +pub fn writable_a1() -> Writable { + Writable::from_reg(a1()) +} +#[inline] +pub fn writable_a2() -> Writable { + Writable::from_reg(a2()) +} + +#[inline] +pub fn fa0() -> Reg { + f_reg(10) +} +#[inline] +pub fn writable_fa0() -> Writable { + Writable::from_reg(fa0()) +} +#[inline] +pub fn writable_fa1() -> Writable { + Writable::from_reg(fa1()) +} +#[inline] +pub fn fa1() -> Reg { + f_reg(11) +} + +#[inline] +pub fn fa7() -> Reg { + f_reg(17) +} + +/// Get a reference to the zero-register. +#[inline] +pub fn zero_reg() -> Reg { + x_reg(0) +} + +/// Get a writable reference to the zero-register (this discards a result). +#[inline] +pub fn writable_zero_reg() -> Writable { + Writable::from_reg(zero_reg()) +} + +#[inline] +pub fn stack_reg() -> Reg { + x_reg(2) +} + +/// Get a writable reference to the stack-pointer register. +#[inline] +pub fn writable_stack_reg() -> Writable { + Writable::from_reg(stack_reg()) +} + +/// Get a reference to the link register (x1). +pub fn link_reg() -> Reg { + x_reg(1) +} + +/// Get a writable reference to the link register. +#[inline] +pub fn writable_link_reg() -> Writable { + Writable::from_reg(link_reg()) +} + +/// Get a reference to the context register (CTX). +pub fn context_reg() -> Reg { + x_reg(12) +} + +/// Get a reference to the frame pointer (x29). +#[inline] +pub fn fp_reg() -> Reg { + x_reg(8) +} + +/// Get a writable reference to the frame pointer. +#[inline] +pub fn writable_fp_reg() -> Writable { + Writable::from_reg(fp_reg()) +} + +/// Get a reference to the first temporary, sometimes "spill temporary", +/// register. This register is used in various ways as a temporary. +#[inline] +pub fn spilltmp_reg() -> Reg { + x_reg(31) +} + +/// Get a writable reference to the spilltmp reg. +#[inline] +pub fn writable_spilltmp_reg() -> Writable { + Writable::from_reg(spilltmp_reg()) +} + +///spilltmp2 +#[inline] +pub fn spilltmp_reg2() -> Reg { + x_reg(30) +} + +/// Get a writable reference to the spilltmp2 reg. +#[inline] +pub fn writable_spilltmp_reg2() -> Writable { + Writable::from_reg(spilltmp_reg2()) +} + +pub fn crate_reg_eviroment(_flags: &settings::Flags) -> MachineEnv { + let preferred_regs_by_class: [Vec; 3] = { + // Registers are A, B, C, D, E. + let x_registers: Vec = (5..=7) + .chain(10..=12) + .map(|i| PReg::new(i, RegClass::Int)) + .collect(); + + let f_registers: Vec = Vec::new(); + // (0..=7) + // .chain(10..=17) + // .chain(28..=31) + // .map(|i| PReg::new(i, RegClass::Float)) + // .collect(); + + let v_registers: Vec = Vec::new(); + // (0..=31).map(|i| PReg::new(i, RegClass::Vector)).collect(); + + [x_registers, f_registers, v_registers] + }; + + let non_preferred_regs_by_class: [Vec; 3] = { + let x_registers: Vec = Vec::new(); + // (9..=9) + // .chain(18..=27) + // .map(|i| PReg::new(i, RegClass::Int)) + // .collect(); + + let f_registers: Vec = Vec::new(); + // (8..=9) + // .chain(18..=27) + // .map(|i| PReg::new(i, RegClass::Float)) + // .collect(); + + let v_registers = vec![]; + + [x_registers, f_registers, v_registers] + }; + + MachineEnv { + preferred_regs_by_class, + non_preferred_regs_by_class, + fixed_stack_slots: vec![], + scratch_by_class: [None, None, None], + } +} + +#[inline] +pub fn x_reg(enc: usize) -> Reg { + let p_reg = PReg::new(enc, RegClass::Int); + let v_reg = VReg::new(p_reg.index(), p_reg.class()); + Reg::from(v_reg) +} +pub const fn px_reg(enc: usize) -> PReg { + PReg::new(enc, RegClass::Int) +} + +#[inline] +pub fn f_reg(enc: usize) -> Reg { + let p_reg = PReg::new(enc, RegClass::Float); + let v_reg = VReg::new(p_reg.index(), p_reg.class()); + Reg::from(v_reg) +} +pub const fn pf_reg(enc: usize) -> PReg { + PReg::new(enc, RegClass::Float) +} +#[inline] +pub(crate) fn real_reg_to_reg(x: RealReg) -> Reg { + let v_reg = VReg::new(x.hw_enc() as usize, x.class()); + Reg::from(v_reg) +} + +#[allow(dead_code)] +pub(crate) fn x_reg_range(start: usize, end: usize) -> Vec> { + let mut regs = vec![]; + for i in start..=end { + regs.push(Writable::from_reg(x_reg(i))); + } + regs +} + +#[inline] +pub fn v_reg(enc: usize) -> Reg { + let p_reg = PReg::new(enc, RegClass::Vector); + let v_reg = VReg::new(p_reg.index(), p_reg.class()); + Reg::from(v_reg) +} +pub const fn pv_reg(enc: usize) -> PReg { + PReg::new(enc, RegClass::Vector) +} diff --git a/cranelift/codegen/src/isa/zkasm/inst/unwind.rs b/cranelift/codegen/src/isa/zkasm/inst/unwind.rs new file mode 100644 index 000000000000..1e2bb904db74 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/unwind.rs @@ -0,0 +1,2 @@ +#[cfg(feature = "unwind")] +pub(crate) mod systemv; diff --git a/cranelift/codegen/src/isa/zkasm/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/zkasm/inst/unwind/systemv.rs new file mode 100644 index 000000000000..d050560ffec0 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/unwind/systemv.rs @@ -0,0 +1,174 @@ +//! Unwind information for System V ABI (Riscv64). + +use crate::isa::unwind::systemv::RegisterMappingError; +use crate::isa::zkasm::inst::regs; +use crate::machinst::Reg; +use gimli::{write::CommonInformationEntry, Encoding, Format, Register}; +use regalloc2::RegClass; + +/// Creates a new zkasm common information entry (CIE). +pub fn create_cie() -> CommonInformationEntry { + use gimli::write::CallFrameInstruction; + + let mut entry = CommonInformationEntry::new( + Encoding { + address_size: 8, + format: Format::Dwarf32, + version: 1, + }, + 4, // Code alignment factor + -8, // Data alignment factor + Register(regs::link_reg().to_real_reg().unwrap().hw_enc() as u16), + ); + + // Every frame will start with the call frame address (CFA) at SP + let sp = Register(regs::stack_reg().to_real_reg().unwrap().hw_enc().into()); + entry.add_instruction(CallFrameInstruction::Cfa(sp, 0)); + + entry +} + +/// Map Cranelift registers to their corresponding Gimli registers. +pub fn map_reg(reg: Reg) -> Result { + let reg_offset = match reg.class() { + RegClass::Int => 0, + RegClass::Float => 32, + RegClass::Vector => 64, + }; + + let reg = reg.to_real_reg().unwrap().hw_enc() as u16; + Ok(Register(reg_offset + reg)) +} + +pub(crate) struct RegisterMapper; + +impl crate::isa::unwind::systemv::RegisterMapper for RegisterMapper { + fn map(&self, reg: Reg) -> Result { + Ok(map_reg(reg)?.0) + } + fn sp(&self) -> u16 { + regs::stack_reg().to_real_reg().unwrap().hw_enc() as u16 + } + fn fp(&self) -> Option { + Some(regs::fp_reg().to_real_reg().unwrap().hw_enc() as u16) + } + fn lr(&self) -> Option { + Some(regs::link_reg().to_real_reg().unwrap().hw_enc() as u16) + } + fn lr_offset(&self) -> Option { + Some(8) + } +} + +#[cfg(test)] +mod tests { + use crate::cursor::{Cursor, FuncCursor}; + + use crate::ir::{ + types, AbiParam, Function, InstBuilder, Signature, StackSlotData, StackSlotKind, + UserFuncName, + }; + use crate::isa::{lookup, CallConv}; + use crate::settings::{builder, Flags}; + use crate::Context; + use gimli::write::Address; + use std::str::FromStr; + use target_lexicon::triple; + + #[test] + fn test_simple_func() { + let isa = lookup(triple!("zkasm")) + .expect("expect zkasm ISA") + .finish(Flags::new(builder())) + .expect("Creating compiler backend"); + + let mut context = Context::for_function(create_function( + CallConv::SystemV, + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), + )); + + let code = context + .compile(&*isa, &mut Default::default()) + .expect("expected compilation"); + + let fde = match code + .create_unwind_info(isa.as_ref()) + .expect("can create unwind info") + { + Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { + info.to_fde(Address::Constant(1234)) + } + _ => panic!("expected unwind information"), + }; + + assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 40, lsda: None, instructions: [(12, CfaOffset(16)), (12, Offset(Register(8), -16)), (12, Offset(Register(1), -8)), (16, CfaRegister(Register(8)))] }"); + } + + fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { + let mut func = + Function::with_name_signature(UserFuncName::user(0, 0), Signature::new(call_conv)); + + let block0 = func.dfg.make_block(); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + pos.ins().return_(&[]); + + if let Some(stack_slot) = stack_slot { + func.sized_stack_slots.push(stack_slot); + } + + func + } + + #[test] + fn test_multi_return_func() { + let isa = lookup(triple!("zkasm")) + .expect("expect zkasm ISA") + .finish(Flags::new(builder())) + .expect("Creating compiler backend"); + + let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV)); + + let code = context + .compile(&*isa, &mut Default::default()) + .expect("expected compilation"); + + let fde = match code + .create_unwind_info(isa.as_ref()) + .expect("can create unwind info") + { + Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { + info.to_fde(Address::Constant(4321)) + } + _ => panic!("expected unwind information"), + }; + + assert_eq!( + format!("{:?}", fde), + "FrameDescriptionEntry { address: Constant(4321), length: 20, lsda: None, instructions: [] }" + ); + } + + fn create_multi_return_function(call_conv: CallConv) -> Function { + let mut sig = Signature::new(call_conv); + sig.params.push(AbiParam::new(types::I32)); + let mut func = Function::with_name_signature(UserFuncName::user(0, 0), sig); + + let block0 = func.dfg.make_block(); + let v0 = func.dfg.append_block_param(block0, types::I32); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + pos.ins().brif(v0, block2, &[], block1, &[]); + + pos.insert_block(block1); + pos.ins().return_(&[]); + + pos.insert_block(block2); + pos.ins().return_(&[]); + + func + } +} diff --git a/cranelift/codegen/src/isa/zkasm/inst/vector.rs b/cranelift/codegen/src/isa/zkasm/inst/vector.rs new file mode 100644 index 000000000000..afd248379875 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/vector.rs @@ -0,0 +1,996 @@ +use crate::isa::zkasm::inst::AllocationConsumer; +use crate::isa::zkasm::inst::EmitState; +use crate::isa::zkasm::lower::isle::generated_code::VecAluOpRRRR; +use crate::isa::zkasm::lower::isle::generated_code::{ + VecAMode, VecAluOpRImm5, VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, VecAluOpRRRImm5, VecAvl, + VecElementWidth, VecLmul, VecMaskMode, VecOpCategory, VecOpMasking, VecTailMode, +}; +use crate::machinst::RegClass; +use crate::Reg; +use core::fmt; + +use super::{Type, UImm5}; + +impl VecAvl { + pub fn _static(size: u32) -> Self { + VecAvl::Static { + size: UImm5::maybe_from_u8(size as u8).expect("Invalid size for AVL"), + } + } + + pub fn is_static(&self) -> bool { + match self { + VecAvl::Static { .. } => true, + } + } + + pub fn unwrap_static(&self) -> UImm5 { + match self { + VecAvl::Static { size } => *size, + } + } +} + +// TODO: Can we tell ISLE to derive this? +impl Copy for VecAvl {} + +// TODO: Can we tell ISLE to derive this? +impl PartialEq for VecAvl { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (VecAvl::Static { size: lhs }, VecAvl::Static { size: rhs }) => lhs == rhs, + } + } +} + +impl fmt::Display for VecAvl { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + VecAvl::Static { size } => write!(f, "{}", size), + } + } +} + +impl VecElementWidth { + pub fn from_type(ty: Type) -> Self { + Self::from_bits(ty.lane_bits()) + } + + pub fn from_bits(bits: u32) -> Self { + match bits { + 8 => VecElementWidth::E8, + 16 => VecElementWidth::E16, + 32 => VecElementWidth::E32, + 64 => VecElementWidth::E64, + _ => panic!("Invalid number of bits for VecElementWidth: {}", bits), + } + } + + pub fn bits(&self) -> u32 { + match self { + VecElementWidth::E8 => 8, + VecElementWidth::E16 => 16, + VecElementWidth::E32 => 32, + VecElementWidth::E64 => 64, + } + } + + pub fn encode(&self) -> u32 { + match self { + VecElementWidth::E8 => 0b000, + VecElementWidth::E16 => 0b001, + VecElementWidth::E32 => 0b010, + VecElementWidth::E64 => 0b011, + } + } +} + +impl fmt::Display for VecElementWidth { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "e{}", self.bits()) + } +} + +impl VecLmul { + pub fn encode(&self) -> u32 { + match self { + VecLmul::LmulF8 => 0b101, + VecLmul::LmulF4 => 0b110, + VecLmul::LmulF2 => 0b111, + VecLmul::Lmul1 => 0b000, + VecLmul::Lmul2 => 0b001, + VecLmul::Lmul4 => 0b010, + VecLmul::Lmul8 => 0b011, + } + } +} + +impl fmt::Display for VecLmul { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + VecLmul::LmulF8 => write!(f, "mf8"), + VecLmul::LmulF4 => write!(f, "mf4"), + VecLmul::LmulF2 => write!(f, "mf2"), + VecLmul::Lmul1 => write!(f, "m1"), + VecLmul::Lmul2 => write!(f, "m2"), + VecLmul::Lmul4 => write!(f, "m4"), + VecLmul::Lmul8 => write!(f, "m8"), + } + } +} + +impl VecTailMode { + pub fn encode(&self) -> u32 { + match self { + VecTailMode::Agnostic => 1, + VecTailMode::Undisturbed => 0, + } + } +} + +impl fmt::Display for VecTailMode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + VecTailMode::Agnostic => write!(f, "ta"), + VecTailMode::Undisturbed => write!(f, "tu"), + } + } +} + +impl VecMaskMode { + pub fn encode(&self) -> u32 { + match self { + VecMaskMode::Agnostic => 1, + VecMaskMode::Undisturbed => 0, + } + } +} + +impl fmt::Display for VecMaskMode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + VecMaskMode::Agnostic => write!(f, "ma"), + VecMaskMode::Undisturbed => write!(f, "mu"), + } + } +} + +/// Vector Type (VType) +/// +/// vtype provides the default type used to interpret the contents of the vector register file. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct VType { + pub sew: VecElementWidth, + pub lmul: VecLmul, + pub tail_mode: VecTailMode, + pub mask_mode: VecMaskMode, +} + +impl VType { + // https://github.com/riscv/riscv-v-spec/blob/master/vtype-format.adoc + pub fn encode(&self) -> u32 { + let mut bits = 0; + bits |= self.lmul.encode(); + bits |= self.sew.encode() << 3; + bits |= self.tail_mode.encode() << 6; + bits |= self.mask_mode.encode() << 7; + bits + } +} + +impl fmt::Display for VType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{}, {}, {}, {}", + self.sew, self.lmul, self.tail_mode, self.mask_mode + ) + } +} + +/// Vector State (VState) +/// +/// VState represents the state of the vector unit that each instruction expects before execution. +/// Unlike VType or any of the other types here, VState is not a part of the RISC-V ISA. It is +/// used by our instruction emission code to ensure that the vector unit is in the correct state. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct VState { + pub avl: VecAvl, + pub vtype: VType, +} + +impl VState { + pub fn from_type(ty: Type) -> Self { + VState { + avl: VecAvl::_static(ty.lane_count()), + vtype: VType { + sew: VecElementWidth::from_type(ty), + lmul: VecLmul::Lmul1, + tail_mode: VecTailMode::Agnostic, + mask_mode: VecMaskMode::Agnostic, + }, + } + } +} + +impl fmt::Display for VState { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "#avl={}, #vtype=({})", self.avl, self.vtype) + } +} + +impl VecOpCategory { + pub fn encode(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#101-vector-arithmetic-instruction-encoding + match self { + VecOpCategory::OPIVV => 0b000, + VecOpCategory::OPFVV => 0b001, + VecOpCategory::OPMVV => 0b010, + VecOpCategory::OPIVI => 0b011, + VecOpCategory::OPIVX => 0b100, + VecOpCategory::OPFVF => 0b101, + VecOpCategory::OPMVX => 0b110, + VecOpCategory::OPCFG => 0b111, + } + } +} + +impl VecOpMasking { + pub fn encode(&self) -> u32 { + match self { + VecOpMasking::Enabled { .. } => 0, + VecOpMasking::Disabled => 1, + } + } + + pub(crate) fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self { + match self { + VecOpMasking::Enabled { reg } => VecOpMasking::Enabled { + reg: allocs.next(*reg), + }, + VecOpMasking::Disabled => VecOpMasking::Disabled, + } + } +} + +impl VecAluOpRRRR { + pub fn opcode(&self) -> u32 { + // Vector Opcode + 0x57 + } + pub fn funct3(&self) -> u32 { + self.category().encode() + } + + pub fn funct6(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc + match self { + VecAluOpRRRR::VmaccVV | VecAluOpRRRR::VmaccVX => 0b101101, + VecAluOpRRRR::VnmsacVV | VecAluOpRRRR::VnmsacVX => 0b101111, + VecAluOpRRRR::VfmaccVV | VecAluOpRRRR::VfmaccVF => 0b101100, + VecAluOpRRRR::VfnmaccVV | VecAluOpRRRR::VfnmaccVF => 0b101101, + VecAluOpRRRR::VfmsacVV | VecAluOpRRRR::VfmsacVF => 0b101110, + VecAluOpRRRR::VfnmsacVV | VecAluOpRRRR::VfnmsacVF => 0b101111, + } + } + + pub fn category(&self) -> VecOpCategory { + match self { + VecAluOpRRRR::VmaccVV | VecAluOpRRRR::VnmsacVV => VecOpCategory::OPMVV, + VecAluOpRRRR::VmaccVX | VecAluOpRRRR::VnmsacVX => VecOpCategory::OPMVX, + VecAluOpRRRR::VfmaccVV + | VecAluOpRRRR::VfnmaccVV + | VecAluOpRRRR::VfmsacVV + | VecAluOpRRRR::VfnmsacVV => VecOpCategory::OPFVV, + VecAluOpRRRR::VfmaccVF + | VecAluOpRRRR::VfnmaccVF + | VecAluOpRRRR::VfmsacVF + | VecAluOpRRRR::VfnmsacVF => VecOpCategory::OPFVF, + } + } + + // vs1 is the only variable source, vs2 is fixed. + pub fn vs1_regclass(&self) -> RegClass { + match self.category() { + VecOpCategory::OPMVV | VecOpCategory::OPFVV => RegClass::Vector, + VecOpCategory::OPMVX => RegClass::Int, + VecOpCategory::OPFVF => RegClass::Float, + _ => unreachable!(), + } + } +} + +impl fmt::Display for VecAluOpRRRR { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut s = format!("{self:?}"); + s.make_ascii_lowercase(); + let (opcode, category) = s.split_at(s.len() - 2); + f.write_str(&format!("{opcode}.{category}")) + } +} + +impl VecAluOpRRRImm5 { + pub fn opcode(&self) -> u32 { + // Vector Opcode + 0x57 + } + pub fn funct3(&self) -> u32 { + self.category().encode() + } + + pub fn funct6(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc + match self { + VecAluOpRRRImm5::VslideupVI => 0b001110, + } + } + + pub fn category(&self) -> VecOpCategory { + match self { + VecAluOpRRRImm5::VslideupVI => VecOpCategory::OPIVI, + } + } + + pub fn imm_is_unsigned(&self) -> bool { + match self { + VecAluOpRRRImm5::VslideupVI => true, + } + } + + /// Some instructions do not allow the source and destination registers to overlap. + pub fn forbids_src_dst_overlaps(&self) -> bool { + match self { + VecAluOpRRRImm5::VslideupVI => true, + } + } +} + +impl fmt::Display for VecAluOpRRRImm5 { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut s = format!("{self:?}"); + s.make_ascii_lowercase(); + let (opcode, category) = s.split_at(s.len() - 2); + f.write_str(&format!("{opcode}.{category}")) + } +} + +impl VecAluOpRRR { + pub fn opcode(&self) -> u32 { + // Vector Opcode + 0x57 + } + pub fn funct3(&self) -> u32 { + self.category().encode() + } + pub fn funct6(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc + match self { + VecAluOpRRR::VaddVV + | VecAluOpRRR::VaddVX + | VecAluOpRRR::VfaddVV + | VecAluOpRRR::VfaddVF => 0b000000, + VecAluOpRRR::VsubVV + | VecAluOpRRR::VsubVX + | VecAluOpRRR::VfsubVV + | VecAluOpRRR::VfsubVF => 0b000010, + VecAluOpRRR::VrsubVX => 0b000011, + VecAluOpRRR::VmulVV | VecAluOpRRR::VmulVX => 0b100101, + VecAluOpRRR::VmulhVV | VecAluOpRRR::VmulhVX => 0b100111, + VecAluOpRRR::VmulhuVV + | VecAluOpRRR::VmulhuVX + | VecAluOpRRR::VfmulVV + | VecAluOpRRR::VfmulVF => 0b100100, + VecAluOpRRR::VsmulVV | VecAluOpRRR::VsmulVX => 0b100111, + VecAluOpRRR::VsllVV | VecAluOpRRR::VsllVX => 0b100101, + VecAluOpRRR::VsrlVV | VecAluOpRRR::VsrlVX => 0b101000, + VecAluOpRRR::VsraVV | VecAluOpRRR::VsraVX => 0b101001, + VecAluOpRRR::VandVV | VecAluOpRRR::VandVX => 0b001001, + VecAluOpRRR::VorVV | VecAluOpRRR::VorVX => 0b001010, + VecAluOpRRR::VxorVV | VecAluOpRRR::VxorVX => 0b001011, + VecAluOpRRR::VminuVV | VecAluOpRRR::VminuVX | VecAluOpRRR::VredminuVS => 0b000100, + VecAluOpRRR::VminVV | VecAluOpRRR::VminVX => 0b000101, + VecAluOpRRR::VmaxuVV | VecAluOpRRR::VmaxuVX | VecAluOpRRR::VredmaxuVS => 0b000110, + VecAluOpRRR::VmaxVV | VecAluOpRRR::VmaxVX => 0b000111, + VecAluOpRRR::VslidedownVX => 0b001111, + VecAluOpRRR::VfrsubVF => 0b100111, + VecAluOpRRR::VmergeVVM + | VecAluOpRRR::VmergeVXM + | VecAluOpRRR::VfmergeVFM + | VecAluOpRRR::VcompressVM => 0b010111, + VecAluOpRRR::VfdivVV + | VecAluOpRRR::VfdivVF + | VecAluOpRRR::VsadduVV + | VecAluOpRRR::VsadduVX => 0b100000, + VecAluOpRRR::VfrdivVF | VecAluOpRRR::VsaddVV | VecAluOpRRR::VsaddVX => 0b100001, + VecAluOpRRR::VfminVV => 0b000100, + VecAluOpRRR::VfmaxVV => 0b000110, + VecAluOpRRR::VssubuVV | VecAluOpRRR::VssubuVX => 0b100010, + VecAluOpRRR::VssubVV | VecAluOpRRR::VssubVX => 0b100011, + VecAluOpRRR::VfsgnjVV | VecAluOpRRR::VfsgnjVF => 0b001000, + VecAluOpRRR::VfsgnjnVV => 0b001001, + VecAluOpRRR::VfsgnjxVV => 0b001010, + VecAluOpRRR::VrgatherVV | VecAluOpRRR::VrgatherVX => 0b001100, + VecAluOpRRR::VwadduVV | VecAluOpRRR::VwadduVX => 0b110000, + VecAluOpRRR::VwaddVV | VecAluOpRRR::VwaddVX => 0b110001, + VecAluOpRRR::VwsubuVV | VecAluOpRRR::VwsubuVX => 0b110010, + VecAluOpRRR::VwsubVV | VecAluOpRRR::VwsubVX => 0b110011, + VecAluOpRRR::VwadduWV | VecAluOpRRR::VwadduWX => 0b110100, + VecAluOpRRR::VwaddWV | VecAluOpRRR::VwaddWX => 0b110101, + VecAluOpRRR::VwsubuWV | VecAluOpRRR::VwsubuWX => 0b110110, + VecAluOpRRR::VwsubWV | VecAluOpRRR::VwsubWX => 0b110111, + VecAluOpRRR::VmseqVV + | VecAluOpRRR::VmseqVX + | VecAluOpRRR::VmfeqVV + | VecAluOpRRR::VmfeqVF => 0b011000, + VecAluOpRRR::VmsneVV + | VecAluOpRRR::VmsneVX + | VecAluOpRRR::VmfleVV + | VecAluOpRRR::VmfleVF + | VecAluOpRRR::VmandMM => 0b011001, + VecAluOpRRR::VmsltuVV | VecAluOpRRR::VmsltuVX | VecAluOpRRR::VmorMM => 0b011010, + VecAluOpRRR::VmsltVV + | VecAluOpRRR::VmsltVX + | VecAluOpRRR::VmfltVV + | VecAluOpRRR::VmfltVF => 0b011011, + VecAluOpRRR::VmsleuVV + | VecAluOpRRR::VmsleuVX + | VecAluOpRRR::VmfneVV + | VecAluOpRRR::VmfneVF => 0b011100, + VecAluOpRRR::VmsleVV + | VecAluOpRRR::VmsleVX + | VecAluOpRRR::VmfgtVF + | VecAluOpRRR::VmnandMM => 0b011101, + VecAluOpRRR::VmsgtuVX | VecAluOpRRR::VmnorMM => 0b011110, + VecAluOpRRR::VmsgtVX | VecAluOpRRR::VmfgeVF => 0b011111, + } + } + + pub fn category(&self) -> VecOpCategory { + match self { + VecAluOpRRR::VaddVV + | VecAluOpRRR::VsaddVV + | VecAluOpRRR::VsadduVV + | VecAluOpRRR::VsubVV + | VecAluOpRRR::VssubVV + | VecAluOpRRR::VssubuVV + | VecAluOpRRR::VsmulVV + | VecAluOpRRR::VsllVV + | VecAluOpRRR::VsrlVV + | VecAluOpRRR::VsraVV + | VecAluOpRRR::VandVV + | VecAluOpRRR::VorVV + | VecAluOpRRR::VxorVV + | VecAluOpRRR::VminuVV + | VecAluOpRRR::VminVV + | VecAluOpRRR::VmaxuVV + | VecAluOpRRR::VmaxVV + | VecAluOpRRR::VmergeVVM + | VecAluOpRRR::VrgatherVV + | VecAluOpRRR::VmseqVV + | VecAluOpRRR::VmsneVV + | VecAluOpRRR::VmsltuVV + | VecAluOpRRR::VmsltVV + | VecAluOpRRR::VmsleuVV + | VecAluOpRRR::VmsleVV => VecOpCategory::OPIVV, + VecAluOpRRR::VwaddVV + | VecAluOpRRR::VwaddWV + | VecAluOpRRR::VwadduVV + | VecAluOpRRR::VwadduWV + | VecAluOpRRR::VwsubVV + | VecAluOpRRR::VwsubWV + | VecAluOpRRR::VwsubuVV + | VecAluOpRRR::VwsubuWV + | VecAluOpRRR::VmulVV + | VecAluOpRRR::VmulhVV + | VecAluOpRRR::VmulhuVV + | VecAluOpRRR::VredmaxuVS + | VecAluOpRRR::VredminuVS + | VecAluOpRRR::VcompressVM + | VecAluOpRRR::VmandMM + | VecAluOpRRR::VmorMM + | VecAluOpRRR::VmnandMM + | VecAluOpRRR::VmnorMM => VecOpCategory::OPMVV, + VecAluOpRRR::VwaddVX + | VecAluOpRRR::VwadduVX + | VecAluOpRRR::VwadduWX + | VecAluOpRRR::VwaddWX + | VecAluOpRRR::VwsubVX + | VecAluOpRRR::VwsubuVX + | VecAluOpRRR::VwsubuWX + | VecAluOpRRR::VwsubWX + | VecAluOpRRR::VmulVX + | VecAluOpRRR::VmulhVX + | VecAluOpRRR::VmulhuVX => VecOpCategory::OPMVX, + VecAluOpRRR::VaddVX + | VecAluOpRRR::VsaddVX + | VecAluOpRRR::VsadduVX + | VecAluOpRRR::VsubVX + | VecAluOpRRR::VssubVX + | VecAluOpRRR::VssubuVX + | VecAluOpRRR::VrsubVX + | VecAluOpRRR::VsmulVX + | VecAluOpRRR::VsllVX + | VecAluOpRRR::VsrlVX + | VecAluOpRRR::VsraVX + | VecAluOpRRR::VandVX + | VecAluOpRRR::VorVX + | VecAluOpRRR::VxorVX + | VecAluOpRRR::VminuVX + | VecAluOpRRR::VminVX + | VecAluOpRRR::VmaxuVX + | VecAluOpRRR::VmaxVX + | VecAluOpRRR::VslidedownVX + | VecAluOpRRR::VmergeVXM + | VecAluOpRRR::VrgatherVX + | VecAluOpRRR::VmseqVX + | VecAluOpRRR::VmsneVX + | VecAluOpRRR::VmsltuVX + | VecAluOpRRR::VmsltVX + | VecAluOpRRR::VmsleuVX + | VecAluOpRRR::VmsleVX + | VecAluOpRRR::VmsgtuVX + | VecAluOpRRR::VmsgtVX => VecOpCategory::OPIVX, + VecAluOpRRR::VfaddVV + | VecAluOpRRR::VfsubVV + | VecAluOpRRR::VfmulVV + | VecAluOpRRR::VfdivVV + | VecAluOpRRR::VfmaxVV + | VecAluOpRRR::VfminVV + | VecAluOpRRR::VfsgnjVV + | VecAluOpRRR::VfsgnjnVV + | VecAluOpRRR::VfsgnjxVV + | VecAluOpRRR::VmfeqVV + | VecAluOpRRR::VmfneVV + | VecAluOpRRR::VmfltVV + | VecAluOpRRR::VmfleVV => VecOpCategory::OPFVV, + VecAluOpRRR::VfaddVF + | VecAluOpRRR::VfsubVF + | VecAluOpRRR::VfrsubVF + | VecAluOpRRR::VfmulVF + | VecAluOpRRR::VfdivVF + | VecAluOpRRR::VfrdivVF + | VecAluOpRRR::VfmergeVFM + | VecAluOpRRR::VfsgnjVF + | VecAluOpRRR::VmfeqVF + | VecAluOpRRR::VmfneVF + | VecAluOpRRR::VmfltVF + | VecAluOpRRR::VmfleVF + | VecAluOpRRR::VmfgtVF + | VecAluOpRRR::VmfgeVF => VecOpCategory::OPFVF, + } + } + + // vs1 is the only variable source, vs2 is fixed. + pub fn vs1_regclass(&self) -> RegClass { + match self.category() { + VecOpCategory::OPIVV | VecOpCategory::OPFVV | VecOpCategory::OPMVV => RegClass::Vector, + VecOpCategory::OPIVX | VecOpCategory::OPMVX => RegClass::Int, + VecOpCategory::OPFVF => RegClass::Float, + _ => unreachable!(), + } + } + + /// Some instructions do not allow the source and destination registers to overlap. + pub fn forbids_src_dst_overlaps(&self) -> bool { + match self { + VecAluOpRRR::VrgatherVV + | VecAluOpRRR::VrgatherVX + | VecAluOpRRR::VcompressVM + | VecAluOpRRR::VwadduVV + | VecAluOpRRR::VwadduVX + | VecAluOpRRR::VwaddVV + | VecAluOpRRR::VwaddVX + | VecAluOpRRR::VwadduWV + | VecAluOpRRR::VwadduWX + | VecAluOpRRR::VwaddWV + | VecAluOpRRR::VwaddWX + | VecAluOpRRR::VwsubuVV + | VecAluOpRRR::VwsubuVX + | VecAluOpRRR::VwsubVV + | VecAluOpRRR::VwsubVX + | VecAluOpRRR::VwsubuWV + | VecAluOpRRR::VwsubuWX + | VecAluOpRRR::VwsubWV + | VecAluOpRRR::VwsubWX => true, + _ => false, + } + } +} + +impl fmt::Display for VecAluOpRRR { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let suffix_length = match self { + VecAluOpRRR::VmergeVVM | VecAluOpRRR::VmergeVXM | VecAluOpRRR::VfmergeVFM => 3, + _ => 2, + }; + + let mut s = format!("{self:?}"); + s.make_ascii_lowercase(); + let (opcode, category) = s.split_at(s.len() - suffix_length); + f.write_str(&format!("{opcode}.{category}")) + } +} + +impl VecAluOpRRImm5 { + pub fn opcode(&self) -> u32 { + // Vector Opcode + 0x57 + } + pub fn funct3(&self) -> u32 { + self.category().encode() + } + + pub fn funct6(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc + match self { + VecAluOpRRImm5::VaddVI => 0b000000, + VecAluOpRRImm5::VrsubVI => 0b000011, + VecAluOpRRImm5::VsllVI => 0b100101, + VecAluOpRRImm5::VsrlVI => 0b101000, + VecAluOpRRImm5::VsraVI => 0b101001, + VecAluOpRRImm5::VandVI => 0b001001, + VecAluOpRRImm5::VorVI => 0b001010, + VecAluOpRRImm5::VxorVI => 0b001011, + VecAluOpRRImm5::VslidedownVI => 0b001111, + VecAluOpRRImm5::VssrlVI => 0b101010, + VecAluOpRRImm5::VmergeVIM => 0b010111, + VecAluOpRRImm5::VsadduVI => 0b100000, + VecAluOpRRImm5::VsaddVI => 0b100001, + VecAluOpRRImm5::VrgatherVI => 0b001100, + VecAluOpRRImm5::VmvrV => 0b100111, + VecAluOpRRImm5::VnclipWI => 0b101111, + VecAluOpRRImm5::VnclipuWI => 0b101110, + VecAluOpRRImm5::VmseqVI => 0b011000, + VecAluOpRRImm5::VmsneVI => 0b011001, + VecAluOpRRImm5::VmsleuVI => 0b011100, + VecAluOpRRImm5::VmsleVI => 0b011101, + VecAluOpRRImm5::VmsgtuVI => 0b011110, + VecAluOpRRImm5::VmsgtVI => 0b011111, + } + } + + pub fn category(&self) -> VecOpCategory { + match self { + VecAluOpRRImm5::VaddVI + | VecAluOpRRImm5::VrsubVI + | VecAluOpRRImm5::VsllVI + | VecAluOpRRImm5::VsrlVI + | VecAluOpRRImm5::VsraVI + | VecAluOpRRImm5::VandVI + | VecAluOpRRImm5::VorVI + | VecAluOpRRImm5::VxorVI + | VecAluOpRRImm5::VssrlVI + | VecAluOpRRImm5::VslidedownVI + | VecAluOpRRImm5::VmergeVIM + | VecAluOpRRImm5::VsadduVI + | VecAluOpRRImm5::VsaddVI + | VecAluOpRRImm5::VrgatherVI + | VecAluOpRRImm5::VmvrV + | VecAluOpRRImm5::VnclipWI + | VecAluOpRRImm5::VnclipuWI + | VecAluOpRRImm5::VmseqVI + | VecAluOpRRImm5::VmsneVI + | VecAluOpRRImm5::VmsleuVI + | VecAluOpRRImm5::VmsleVI + | VecAluOpRRImm5::VmsgtuVI + | VecAluOpRRImm5::VmsgtVI => VecOpCategory::OPIVI, + } + } + + pub fn imm_is_unsigned(&self) -> bool { + match self { + VecAluOpRRImm5::VsllVI + | VecAluOpRRImm5::VsrlVI + | VecAluOpRRImm5::VssrlVI + | VecAluOpRRImm5::VsraVI + | VecAluOpRRImm5::VslidedownVI + | VecAluOpRRImm5::VrgatherVI + | VecAluOpRRImm5::VmvrV + | VecAluOpRRImm5::VnclipWI + | VecAluOpRRImm5::VnclipuWI => true, + VecAluOpRRImm5::VaddVI + | VecAluOpRRImm5::VrsubVI + | VecAluOpRRImm5::VandVI + | VecAluOpRRImm5::VorVI + | VecAluOpRRImm5::VxorVI + | VecAluOpRRImm5::VmergeVIM + | VecAluOpRRImm5::VsadduVI + | VecAluOpRRImm5::VsaddVI + | VecAluOpRRImm5::VmseqVI + | VecAluOpRRImm5::VmsneVI + | VecAluOpRRImm5::VmsleuVI + | VecAluOpRRImm5::VmsleVI + | VecAluOpRRImm5::VmsgtuVI + | VecAluOpRRImm5::VmsgtVI => false, + } + } + + /// Some instructions do not allow the source and destination registers to overlap. + pub fn forbids_src_dst_overlaps(&self) -> bool { + match self { + VecAluOpRRImm5::VrgatherVI => true, + _ => false, + } + } +} + +impl fmt::Display for VecAluOpRRImm5 { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let suffix_length = match self { + VecAluOpRRImm5::VmergeVIM => 3, + _ => 2, + }; + + let mut s = format!("{self:?}"); + s.make_ascii_lowercase(); + let (opcode, category) = s.split_at(s.len() - suffix_length); + f.write_str(&format!("{opcode}.{category}")) + } +} + +impl VecAluOpRR { + pub fn opcode(&self) -> u32 { + // Vector Opcode + 0x57 + } + + pub fn funct3(&self) -> u32 { + self.category().encode() + } + + pub fn funct6(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc + match self { + VecAluOpRR::VmvSX | VecAluOpRR::VmvXS | VecAluOpRR::VfmvSF | VecAluOpRR::VfmvFS => { + 0b010000 + } + VecAluOpRR::VzextVF2 + | VecAluOpRR::VzextVF4 + | VecAluOpRR::VzextVF8 + | VecAluOpRR::VsextVF2 + | VecAluOpRR::VsextVF4 + | VecAluOpRR::VsextVF8 => 0b010010, + VecAluOpRR::VfsqrtV => 0b010011, + VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0b010111, + } + } + + pub fn category(&self) -> VecOpCategory { + match self { + VecAluOpRR::VmvSX => VecOpCategory::OPMVX, + VecAluOpRR::VmvXS + | VecAluOpRR::VzextVF2 + | VecAluOpRR::VzextVF4 + | VecAluOpRR::VzextVF8 + | VecAluOpRR::VsextVF2 + | VecAluOpRR::VsextVF4 + | VecAluOpRR::VsextVF8 => VecOpCategory::OPMVV, + VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => VecOpCategory::OPFVF, + VecAluOpRR::VfmvFS | VecAluOpRR::VfsqrtV => VecOpCategory::OPFVV, + VecAluOpRR::VmvVV => VecOpCategory::OPIVV, + VecAluOpRR::VmvVX => VecOpCategory::OPIVX, + } + } + + /// Returns the auxiliary encoding field for the instruction, if any. + pub fn aux_encoding(&self) -> u32 { + match self { + // VRXUNARY0 + VecAluOpRR::VmvSX => 0b00000, + // VWXUNARY0 + VecAluOpRR::VmvXS => 0b00000, + // VRFUNARY0 + VecAluOpRR::VfmvSF => 0b00000, + // VWFUNARY0 + VecAluOpRR::VfmvFS => 0b00000, + // VFUNARY1 + VecAluOpRR::VfsqrtV => 0b00000, + // VXUNARY0 + VecAluOpRR::VzextVF8 => 0b00010, + VecAluOpRR::VsextVF8 => 0b00011, + VecAluOpRR::VzextVF4 => 0b00100, + VecAluOpRR::VsextVF4 => 0b00101, + VecAluOpRR::VzextVF2 => 0b00110, + VecAluOpRR::VsextVF2 => 0b00111, + // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states: + // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved. + VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0, + } + } + + /// Most of these opcodes have the source register encoded in the VS2 field and + /// the `aux_encoding` field in VS1. However some special snowflakes have it the + /// other way around. As far as I can tell only vmv.v.* are backwards. + pub fn vs_is_vs2_encoded(&self) -> bool { + match self { + VecAluOpRR::VmvXS + | VecAluOpRR::VfmvFS + | VecAluOpRR::VfsqrtV + | VecAluOpRR::VzextVF2 + | VecAluOpRR::VzextVF4 + | VecAluOpRR::VzextVF8 + | VecAluOpRR::VsextVF2 + | VecAluOpRR::VsextVF4 + | VecAluOpRR::VsextVF8 => true, + VecAluOpRR::VmvSX + | VecAluOpRR::VfmvSF + | VecAluOpRR::VmvVV + | VecAluOpRR::VmvVX + | VecAluOpRR::VfmvVF => false, + } + } + + pub fn dst_regclass(&self) -> RegClass { + match self { + VecAluOpRR::VfmvSF + | VecAluOpRR::VmvSX + | VecAluOpRR::VmvVV + | VecAluOpRR::VmvVX + | VecAluOpRR::VfmvVF + | VecAluOpRR::VfsqrtV + | VecAluOpRR::VzextVF2 + | VecAluOpRR::VzextVF4 + | VecAluOpRR::VzextVF8 + | VecAluOpRR::VsextVF2 + | VecAluOpRR::VsextVF4 + | VecAluOpRR::VsextVF8 => RegClass::Vector, + VecAluOpRR::VmvXS => RegClass::Int, + VecAluOpRR::VfmvFS => RegClass::Float, + } + } + + pub fn src_regclass(&self) -> RegClass { + match self { + VecAluOpRR::VmvXS + | VecAluOpRR::VfmvFS + | VecAluOpRR::VmvVV + | VecAluOpRR::VfsqrtV + | VecAluOpRR::VzextVF2 + | VecAluOpRR::VzextVF4 + | VecAluOpRR::VzextVF8 + | VecAluOpRR::VsextVF2 + | VecAluOpRR::VsextVF4 + | VecAluOpRR::VsextVF8 => RegClass::Vector, + VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => RegClass::Float, + VecAluOpRR::VmvSX | VecAluOpRR::VmvVX => RegClass::Int, + } + } + + /// Some instructions do not allow the source and destination registers to overlap. + pub fn forbids_src_dst_overlaps(&self) -> bool { + match self { + VecAluOpRR::VzextVF2 + | VecAluOpRR::VzextVF4 + | VecAluOpRR::VzextVF8 + | VecAluOpRR::VsextVF2 + | VecAluOpRR::VsextVF4 + | VecAluOpRR::VsextVF8 => true, + _ => false, + } + } +} + +impl fmt::Display for VecAluOpRR { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + VecAluOpRR::VmvSX => "vmv.s.x", + VecAluOpRR::VmvXS => "vmv.x.s", + VecAluOpRR::VfmvSF => "vfmv.s.f", + VecAluOpRR::VfmvFS => "vfmv.f.s", + VecAluOpRR::VfsqrtV => "vfsqrt.v", + VecAluOpRR::VzextVF2 => "vzext.vf2", + VecAluOpRR::VzextVF4 => "vzext.vf4", + VecAluOpRR::VzextVF8 => "vzext.vf8", + VecAluOpRR::VsextVF2 => "vsext.vf2", + VecAluOpRR::VsextVF4 => "vsext.vf4", + VecAluOpRR::VsextVF8 => "vsext.vf8", + VecAluOpRR::VmvVV => "vmv.v.v", + VecAluOpRR::VmvVX => "vmv.v.x", + VecAluOpRR::VfmvVF => "vfmv.v.f", + }) + } +} + +impl VecAluOpRImm5 { + pub fn opcode(&self) -> u32 { + // Vector Opcode + 0x57 + } + pub fn funct3(&self) -> u32 { + self.category().encode() + } + + pub fn funct6(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc + match self { + VecAluOpRImm5::VmvVI => 0b010111, + } + } + + pub fn category(&self) -> VecOpCategory { + match self { + VecAluOpRImm5::VmvVI => VecOpCategory::OPIVI, + } + } + + /// Returns the auxiliary encoding field for the instruction, if any. + pub fn aux_encoding(&self) -> u32 { + match self { + // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states: + // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved. + VecAluOpRImm5::VmvVI => 0, + } + } +} + +impl fmt::Display for VecAluOpRImm5 { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + VecAluOpRImm5::VmvVI => "vmv.v.i", + }) + } +} + +impl VecAMode { + pub fn get_base_register(&self) -> Option { + match self { + VecAMode::UnitStride { base, .. } => base.get_base_register(), + } + } + + pub fn get_allocatable_register(&self) -> Option { + match self { + VecAMode::UnitStride { base, .. } => base.get_allocatable_register(), + } + } + + pub(crate) fn with_allocs(self, allocs: &mut AllocationConsumer<'_>) -> Self { + match self { + VecAMode::UnitStride { base } => VecAMode::UnitStride { + base: base.with_allocs(allocs), + }, + } + } + + pub(crate) fn get_offset_with_state(&self, state: &EmitState) -> i64 { + match self { + VecAMode::UnitStride { base, .. } => base.get_offset_with_state(state), + } + } + + /// `mop` field, described in Table 7 of Section 7.2. Vector Load/Store Addressing Modes + /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes + pub fn mop(&self) -> u32 { + match self { + VecAMode::UnitStride { .. } => 0b00, + } + } + + /// `lumop` field, described in Table 9 of Section 7.2. Vector Load/Store Addressing Modes + /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes + pub fn lumop(&self) -> u32 { + match self { + VecAMode::UnitStride { .. } => 0b00000, + } + } + + /// `sumop` field, described in Table 10 of Section 7.2. Vector Load/Store Addressing Modes + /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes + pub fn sumop(&self) -> u32 { + match self { + VecAMode::UnitStride { .. } => 0b00000, + } + } + + /// The `nf[2:0]` field encodes the number of fields in each segment. For regular vector loads and + /// stores, nf=0, indicating that a single value is moved between a vector register group and memory + /// at each element position. Larger values in the nf field are used to access multiple contiguous + /// fields within a segment as described in Section 7.8 Vector Load/Store Segment Instructions. + /// + /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes + pub fn nf(&self) -> u32 { + match self { + VecAMode::UnitStride { .. } => 0b000, + } + } +} diff --git a/cranelift/codegen/src/isa/zkasm/inst_vector.isle b/cranelift/codegen/src/isa/zkasm/inst_vector.isle new file mode 100644 index 000000000000..cadf4911f989 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst_vector.isle @@ -0,0 +1,1773 @@ +;; Represents the possible widths of an element when used in an operation. +(type VecElementWidth (enum + (E8) + (E16) + (E32) + (E64) +)) + +;; Vector Register Group Multiplier (LMUL) +;; +;; The LMUL setting specifies how we should group registers together. LMUL can +;; also be a fractional value, reducing the number of bits used in a single +;; vector register. Fractional LMUL is used to increase the number of effective +;; usable vector register groups when operating on mixed-width values. +(type VecLmul (enum + (LmulF8) + (LmulF4) + (LmulF2) + (Lmul1) + (Lmul2) + (Lmul4) + (Lmul8) +)) + +;; Tail Mode +;; +;; The tail mode specifies how the tail elements of a vector register are handled. +(type VecTailMode (enum + ;; Tail Agnostic means that the tail elements are left in an undefined state. + (Agnostic) + ;; Tail Undisturbed means that the tail elements are left in their original values. + (Undisturbed) +)) + +;; Mask Mode +;; +;; The mask mode specifies how the masked elements of a vector register are handled. +(type VecMaskMode (enum + ;; Mask Agnostic means that the masked out elements are left in an undefined state. + (Agnostic) + ;; Mask Undisturbed means that the masked out elements are left in their original values. + (Undisturbed) +)) + +;; Application Vector Length (AVL) +;; +;; This setting specifies the number of elements that are going to be processed +;; in a single instruction. Note: We may end up processing fewer elements than +;; the AVL setting, if they don't fit in a single register. +(type VecAvl (enum + ;; Static AVL emits a `vsetivli` that uses a constant value + (Static (size UImm5)) + ;; TODO: Add a dynamic, register based AVL mode when we are able to properly test it +)) + +(type VType (primitive VType)) +(type VState (primitive VState)) + + +;; Vector Opcode Category +;; +;; These categories are used to determine the type of operands that are allowed in the +;; instruction. +(type VecOpCategory (enum + (OPIVV) + (OPFVV) + (OPMVV) + (OPIVI) + (OPIVX) + (OPFVF) + (OPMVX) + (OPCFG) +)) + +;; Vector Opcode Masking +;; +;; When masked, the instruction will only operate on the elements that are dictated by +;; the mask register. Currently this is always fixed to v0. +(type VecOpMasking (enum + (Enabled (reg Reg)) + (Disabled) +)) + +(decl pure masked (VReg) VecOpMasking) +(rule (masked reg) (VecOpMasking.Enabled reg)) + +(decl pure unmasked () VecOpMasking) +(rule (unmasked) (VecOpMasking.Disabled)) + +;; Register to Register ALU Ops +(type VecAluOpRRR (enum + ;; Vector-Vector Opcodes + (VaddVV) + (VsaddVV) + (VsadduVV) + (VwaddVV) + (VwaddWV) + (VwadduVV) + (VwadduWV) + (VsubVV) + (VwsubVV) + (VwsubWV) + (VwsubuVV) + (VwsubuWV) + (VssubVV) + (VssubuVV) + (VmulVV) + (VmulhVV) + (VmulhuVV) + (VsmulVV) + (VsllVV) + (VsrlVV) + (VsraVV) + (VandVV) + (VorVV) + (VxorVV) + (VmaxVV) + (VmaxuVV) + (VminVV) + (VminuVV) + (VfaddVV) + (VfsubVV) + (VfmulVV) + (VfdivVV) + (VfminVV) + (VfmaxVV) + (VfsgnjVV) + (VfsgnjnVV) + (VfsgnjxVV) + (VmergeVVM) + (VredmaxuVS) + (VredminuVS) + (VrgatherVV) + (VcompressVM) + (VmseqVV) + (VmsneVV) + (VmsltuVV) + (VmsltVV) + (VmsleuVV) + (VmsleVV) + (VmfeqVV) + (VmfneVV) + (VmfltVV) + (VmfleVV) + (VmandMM) + (VmorMM) + (VmnandMM) + (VmnorMM) + + + ;; Vector-Scalar Opcodes + (VaddVX) + (VsaddVX) + (VsadduVX) + (VwaddVX) + (VwaddWX) + (VwadduVX) + (VwadduWX) + (VsubVX) + (VrsubVX) + (VwsubVX) + (VwsubWX) + (VwsubuVX) + (VwsubuWX) + (VssubVX) + (VssubuVX) + (VmulVX) + (VmulhVX) + (VmulhuVX) + (VsmulVX) + (VsllVX) + (VsrlVX) + (VsraVX) + (VandVX) + (VorVX) + (VxorVX) + (VmaxVX) + (VmaxuVX) + (VminVX) + (VminuVX) + (VslidedownVX) + (VfaddVF) + (VfsubVF) + (VfrsubVF) + (VfmulVF) + (VfdivVF) + (VfsgnjVF) + (VfrdivVF) + (VmergeVXM) + (VfmergeVFM) + (VrgatherVX) + (VmseqVX) + (VmsneVX) + (VmsltuVX) + (VmsltVX) + (VmsleuVX) + (VmsleVX) + (VmsgtuVX) + (VmsgtVX) + (VmfeqVF) + (VmfneVF) + (VmfltVF) + (VmfleVF) + (VmfgtVF) + (VmfgeVF) +)) + + + +;; Register-Imm ALU Ops that modify the destination register +(type VecAluOpRRRImm5 (enum + (VslideupVI) +)) + +;; Register-Register ALU Ops that modify the destination register +(type VecAluOpRRRR (enum + ;; Vector-Vector Opcodes + (VmaccVV) + (VnmsacVV) + (VfmaccVV) + (VfnmaccVV) + (VfmsacVV) + (VfnmsacVV) + + ;; Vector-Scalar Opcodes + (VmaccVX) + (VnmsacVX) + (VfmaccVF) + (VfnmaccVF) + (VfmsacVF) + (VfnmsacVF) +)) + +;; Register-Imm ALU Ops +(type VecAluOpRRImm5 (enum + ;; Regular VI Opcodes + (VaddVI) + (VsaddVI) + (VsadduVI) + (VrsubVI) + (VsllVI) + (VsrlVI) + (VsraVI) + (VandVI) + (VorVI) + (VxorVI) + (VssrlVI) + (VslidedownVI) + (VmergeVIM) + (VrgatherVI) + ;; This opcode represents multiple instructions `vmv1r`/`vmv2r`/`vmv4r`/etc... + ;; The immediate field specifies how many registers should be copied. + (VmvrV) + (VnclipWI) + (VnclipuWI) + (VmseqVI) + (VmsneVI) + (VmsleuVI) + (VmsleVI) + (VmsgtuVI) + (VmsgtVI) +)) + +;; Imm only ALU Ops +(type VecAluOpRImm5 (enum + (VmvVI) +)) + +;; These are all of the special cases that have weird encodings. They are all +;; single source, single destination instructions, and usually use one of +;; the two source registers as auxiliary encoding space. +(type VecAluOpRR (enum + (VmvSX) + (VmvXS) + (VfmvSF) + (VfmvFS) + ;; vmv.v* is special in that vs2 must be v0 (and is ignored) otherwise the instruction is illegal. + (VmvVV) + (VmvVX) + (VfmvVF) + (VfsqrtV) + (VsextVF2) + (VsextVF4) + (VsextVF8) + (VzextVF2) + (VzextVF4) + (VzextVF8) +)) + +;; Returns the canonical destination type for a VecAluOpRRImm5. +(decl pure vec_alu_rr_dst_type (VecAluOpRR) Type) +(extern constructor vec_alu_rr_dst_type vec_alu_rr_dst_type) + + +;; Vector Addressing Mode +(type VecAMode (enum + ;; Vector unit-stride operations access elements stored contiguously in memory + ;; starting from the base effective address. + (UnitStride + (base AMode)) + ;; TODO: Constant Stride + ;; TODO: Indexed Operations +)) + + +;; Builds a static VState matching a SIMD type. +;; The VState is guaranteed to be static with AVL set to the number of lanes. +;; Element size is set to the size of the type. +;; LMUL is set to 1. +;; Tail mode is set to agnostic. +;; Mask mode is set to agnostic. +(decl pure vstate_from_type (Type) VState) +(extern constructor vstate_from_type vstate_from_type) +(convert Type VState vstate_from_type) + +;; Alters the LMUL of a VState to mf2 +(decl pure vstate_mf2 (VState) VState) +(extern constructor vstate_mf2 vstate_mf2) + +;; Extracts an element width from a SIMD type. +(decl pure element_width_from_type (Type) VecElementWidth) +(rule (element_width_from_type ty) + (if-let $I8 (lane_type ty)) + (VecElementWidth.E8)) +(rule (element_width_from_type ty) + (if-let $I16 (lane_type ty)) + (VecElementWidth.E16)) +(rule (element_width_from_type ty) + (if-let $I32 (lane_type ty)) + (VecElementWidth.E32)) +(rule (element_width_from_type ty) + (if-let $F32 (lane_type ty)) + (VecElementWidth.E32)) +(rule (element_width_from_type ty) + (if-let $I64 (lane_type ty)) + (VecElementWidth.E64)) +(rule (element_width_from_type ty) + (if-let $F64 (lane_type ty)) + (VecElementWidth.E64)) + +(decl pure min_vec_reg_size () u64) +(extern constructor min_vec_reg_size min_vec_reg_size) + +;; An extractor that matches any type that is known to fit in a single vector +;; register. +(decl ty_vec_fits_in_register (Type) Type) +(extern extractor ty_vec_fits_in_register ty_vec_fits_in_register) + +;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; As noted in the RISC-V Vector Extension Specification, rs2 is the first +;; source register and rs1 is the second source register. This is the opposite +;; of the usual RISC-V register order. +;; See Section 10.1 of the RISC-V Vector Extension Specification. + + +;; Helper for emitting `MInst.VecAluRRRR` instructions. +;; These instructions modify the destination register. +(decl vec_alu_rrrr (VecAluOpRRRR VReg VReg Reg VecOpMasking VState) VReg) +(rule (vec_alu_rrrr op vd_src vs2 vs1 mask vstate) + (let ((vd WritableVReg (temp_writable_vreg)) + (_ Unit (emit (MInst.VecAluRRRR op vd vd_src vs2 vs1 mask vstate)))) + vd)) + +;; Helper for emitting `MInst.VecAluRRRImm5` instructions. +;; These instructions modify the destination register. +(decl vec_alu_rrr_imm5 (VecAluOpRRRImm5 VReg VReg Imm5 VecOpMasking VState) VReg) +(rule (vec_alu_rrr_imm5 op vd_src vs2 imm mask vstate) + (let ((vd WritableVReg (temp_writable_vreg)) + (_ Unit (emit (MInst.VecAluRRRImm5 op vd vd_src vs2 imm mask vstate)))) + vd)) + +;; Helper for emitting `MInst.VecAluRRRImm5` instructions where the immediate +;; is zero extended instead of sign extended. +(decl vec_alu_rrr_uimm5 (VecAluOpRRRImm5 VReg VReg UImm5 VecOpMasking VState) VReg) +(rule (vec_alu_rrr_uimm5 op vd_src vs2 imm mask vstate) + (vec_alu_rrr_imm5 op vd_src vs2 (uimm5_bitcast_to_imm5 imm) mask vstate)) + +;; Helper for emitting `MInst.VecAluRRR` instructions. +(decl vec_alu_rrr (VecAluOpRRR Reg Reg VecOpMasking VState) Reg) +(rule (vec_alu_rrr op vs2 vs1 mask vstate) + (let ((vd WritableVReg (temp_writable_vreg)) + (_ Unit (emit (MInst.VecAluRRR op vd vs2 vs1 mask vstate)))) + vd)) + +;; Helper for emitting `MInst.VecAluRRImm5` instructions. +(decl vec_alu_rr_imm5 (VecAluOpRRImm5 Reg Imm5 VecOpMasking VState) Reg) +(rule (vec_alu_rr_imm5 op vs2 imm mask vstate) + (let ((vd WritableVReg (temp_writable_vreg)) + (_ Unit (emit (MInst.VecAluRRImm5 op vd vs2 imm mask vstate)))) + vd)) + +;; Helper for emitting `MInst.VecAluRRImm5` instructions where the immediate +;; is zero extended instead of sign extended. +(decl vec_alu_rr_uimm5 (VecAluOpRRImm5 Reg UImm5 VecOpMasking VState) Reg) +(rule (vec_alu_rr_uimm5 op vs2 imm mask vstate) + (vec_alu_rr_imm5 op vs2 (uimm5_bitcast_to_imm5 imm) mask vstate)) + +;; Helper for emitting `MInst.VecAluRRImm5` instructions that use the Imm5 as +;; auxiliary encoding space. +(decl vec_alu_rr (VecAluOpRR Reg VecOpMasking VState) Reg) +(rule (vec_alu_rr op vs mask vstate) + (let ((vd WritableReg (temp_writable_reg (vec_alu_rr_dst_type op))) + (_ Unit (emit (MInst.VecAluRR op vd vs mask vstate)))) + vd)) + +;; Helper for emitting `MInst.VecAluRImm5` instructions. +(decl vec_alu_r_imm5 (VecAluOpRImm5 Imm5 VecOpMasking VState) Reg) +(rule (vec_alu_r_imm5 op imm mask vstate) + (let ((vd WritableVReg (temp_writable_vreg)) + (_ Unit (emit (MInst.VecAluRImm5 op vd imm mask vstate)))) + vd)) + +;; Helper for emitting `MInst.VecLoad` instructions. +(decl vec_load (VecElementWidth VecAMode MemFlags VecOpMasking VState) Reg) +(rule (vec_load eew from flags mask vstate) + (let ((vd WritableVReg (temp_writable_vreg)) + (_ Unit (emit (MInst.VecLoad eew vd from flags mask vstate)))) + vd)) + +;; Helper for emitting `MInst.VecStore` instructions. +(decl vec_store (VecElementWidth VecAMode VReg MemFlags VecOpMasking VState) InstOutput) +(rule (vec_store eew to from flags mask vstate) + (side_effect + (SideEffectNoResult.Inst (MInst.VecStore eew to from flags mask vstate)))) + +;; Helper for emitting the `vadd.vv` instruction. +(decl rv_vadd_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vadd_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VaddVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vadd.vx` instruction. +(decl rv_vadd_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vadd_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VaddVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vadd.vi` instruction. +(decl rv_vadd_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vadd_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VaddVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vsadd.vv` instruction. +(decl rv_vsadd_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vsadd_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsaddVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsadd.vx` instruction. +(decl rv_vsadd_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vsadd_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsaddVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsadd.vi` instruction. +(decl rv_vsadd_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vsadd_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VsaddVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vsaddu.vv` instruction. +(decl rv_vsaddu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vsaddu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsadduVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsaddu.vx` instruction. +(decl rv_vsaddu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vsaddu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsadduVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsaddu.vi` instruction. +(decl rv_vsaddu_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vsaddu_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VsadduVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vwadd.vv` instruction. +;; +;; Widening integer add, 2*SEW = SEW + SEW +(decl rv_vwadd_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vwadd_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwaddVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwadd.vx` instruction. +;; +;; Widening integer add, 2*SEW = SEW + SEW +(decl rv_vwadd_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vwadd_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwaddVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwadd.wv` instruction. +;; +;; Widening integer add, 2*SEW = 2*SEW + SEW +(decl rv_vwadd_wv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vwadd_wv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwaddWV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwadd.wx` instruction. +;; +;; Widening integer add, 2*SEW = 2*SEW + SEW +(decl rv_vwadd_wx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vwadd_wx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwaddWX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwaddu.vv` instruction. +;; +;; Widening unsigned integer add, 2*SEW = SEW + SEW +(decl rv_vwaddu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vwaddu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwadduVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwaddu.vv` instruction. +;; +;; Widening unsigned integer add, 2*SEW = SEW + SEW +(decl rv_vwaddu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vwaddu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwadduVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwaddu.wv` instruction. +;; +;; Widening integer add, 2*SEW = 2*SEW + SEW +(decl rv_vwaddu_wv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vwaddu_wv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwadduWV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwaddu.wx` instruction. +;; +;; Widening integer add, 2*SEW = 2*SEW + SEW +(decl rv_vwaddu_wx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vwaddu_wx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwadduWX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsub.vv` instruction. +(decl rv_vsub_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vsub_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsubVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsub.vx` instruction. +(decl rv_vsub_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vsub_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsubVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vrsub.vx` instruction. +(decl rv_vrsub_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vrsub_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VrsubVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwsub.vv` instruction. +;; +;; Widening integer sub, 2*SEW = SEW + SEW +(decl rv_vwsub_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vwsub_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwsubVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwsub.vx` instruction. +;; +;; Widening integer sub, 2*SEW = SEW + SEW +(decl rv_vwsub_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vwsub_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwsubVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwsub.wv` instruction. +;; +;; Widening integer sub, 2*SEW = 2*SEW + SEW +(decl rv_vwsub_wv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vwsub_wv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwsubWV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwsub.wx` instruction. +;; +;; Widening integer sub, 2*SEW = 2*SEW + SEW +(decl rv_vwsub_wx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vwsub_wx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwsubWX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwsubu.vv` instruction. +;; +;; Widening unsigned integer sub, 2*SEW = SEW + SEW +(decl rv_vwsubu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vwsubu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwsubuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwsubu.vv` instruction. +;; +;; Widening unsigned integer sub, 2*SEW = SEW + SEW +(decl rv_vwsubu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vwsubu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwsubuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwsubu.wv` instruction. +;; +;; Widening integer sub, 2*SEW = 2*SEW + SEW +(decl rv_vwsubu_wv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vwsubu_wv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwsubuWV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwsubu.wx` instruction. +;; +;; Widening integer sub, 2*SEW = 2*SEW + SEW +(decl rv_vwsubu_wx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vwsubu_wx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwsubuWX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vssub.vv` instruction. +(decl rv_vssub_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vssub_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VssubVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vssub.vx` instruction. +(decl rv_vssub_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vssub_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VssubVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vssubu.vv` instruction. +(decl rv_vssubu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vssubu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VssubuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vssubu.vx` instruction. +(decl rv_vssubu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vssubu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VssubuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vneg.v` pseudo-instruction. +(decl rv_vneg_v (VReg VecOpMasking VState) VReg) +(rule (rv_vneg_v vs2 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VrsubVX) vs2 (zero_reg) mask vstate)) + +;; Helper for emitting the `vrsub.vi` instruction. +(decl rv_vrsub_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vrsub_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VrsubVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmul.vv` instruction. +(decl rv_vmul_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmul_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmulVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmul.vx` instruction. +(decl rv_vmul_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmul_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmulVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmulh.vv` instruction. +(decl rv_vmulh_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmulh_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmulhVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmulh.vx` instruction. +(decl rv_vmulh_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmulh_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmulhVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmulhu.vv` instruction. +(decl rv_vmulhu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmulhu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmulhuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmulhu.vx` instruction. +(decl rv_vmulhu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmulhu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmulhuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsmul.vv` instruction. +;; +;; Signed saturating and rounding fractional multiply +;; # vd[i] = clip(roundoff_signed(vs2[i]*vs1[i], SEW-1)) +(decl rv_vsmul_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vsmul_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsmulVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsmul.vx` instruction. +;; +;; Signed saturating and rounding fractional multiply +;; # vd[i] = clip(roundoff_signed(vs2[i]*x[rs1], SEW-1)) +(decl rv_vsmul_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vsmul_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsmulVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmacc.vv` instruction. +;; +;; Integer multiply-add, overwrite addend +;; # vd[i] = +(vs1[i] * vs2[i]) + vd[i] +(decl rv_vmacc_vv (VReg VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmacc_vv vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VmaccVV) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmacc.vx` instruction. +;; +;; Integer multiply-add, overwrite addend +;; # vd[i] = +(x[rs1] * vs2[i]) + vd[i] +(decl rv_vmacc_vx (VReg VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmacc_vx vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VmaccVX) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vnmsac.vv` instruction. +;; +;; Integer multiply-sub, overwrite minuend +;; # vd[i] = -(vs1[i] * vs2[i]) + vd[i] +(decl rv_vnmsac_vv (VReg VReg VReg VecOpMasking VState) VReg) +(rule (rv_vnmsac_vv vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VnmsacVV) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vnmsac.vx` instruction. +;; +;; Integer multiply-sub, overwrite minuend +;; # vd[i] = -(x[rs1] * vs2[i]) + vd[i] +(decl rv_vnmsac_vx (VReg VReg XReg VecOpMasking VState) VReg) +(rule (rv_vnmsac_vx vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VnmsacVX) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `sll.vv` instruction. +(decl rv_vsll_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vsll_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsllVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `sll.vx` instruction. +(decl rv_vsll_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vsll_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsllVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsll.vi` instruction. +(decl rv_vsll_vi (VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vsll_vi vs2 imm mask vstate) + (vec_alu_rr_uimm5 (VecAluOpRRImm5.VsllVI) vs2 imm mask vstate)) + +;; Helper for emitting the `srl.vv` instruction. +(decl rv_vsrl_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vsrl_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsrlVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `srl.vx` instruction. +(decl rv_vsrl_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vsrl_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsrlVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsrl.vi` instruction. +(decl rv_vsrl_vi (VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vsrl_vi vs2 imm mask vstate) + (vec_alu_rr_uimm5 (VecAluOpRRImm5.VsrlVI) vs2 imm mask vstate)) + +;; Helper for emitting the `sra.vv` instruction. +(decl rv_vsra_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vsra_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsraVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `sra.vx` instruction. +(decl rv_vsra_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vsra_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsraVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsra.vi` instruction. +(decl rv_vsra_vi (VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vsra_vi vs2 imm mask vstate) + (vec_alu_rr_uimm5 (VecAluOpRRImm5.VsraVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vand.vv` instruction. +(decl rv_vand_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vand_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VandVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vand.vx` instruction. +(decl rv_vand_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vand_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VandVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vand.vi` instruction. +(decl rv_vand_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vand_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VandVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vor.vv` instruction. +(decl rv_vor_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vor_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VorVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vor.vx` instruction. +(decl rv_vor_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vor_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VorVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vor.vi` instruction. +(decl rv_vor_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vor_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VorVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vxor.vv` instruction. +(decl rv_vxor_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vxor_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VxorVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vxor.vx` instruction. +(decl rv_vxor_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vxor_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VxorVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vxor.vi` instruction. +(decl rv_vxor_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vxor_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VxorVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vssrl.vi` instruction. +;; +;; vd[i] = (unsigned(vs2[i]) >> imm) + r +;; +;; `r` here is the rounding mode currently selected. +(decl rv_vssrl_vi (VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vssrl_vi vs2 imm mask vstate) + (vec_alu_rr_uimm5 (VecAluOpRRImm5.VssrlVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vnot.v` instruction. +;; This is just a mnemonic for `vxor.vi vd, vs, -1` +(decl rv_vnot_v (VReg VecOpMasking VState) VReg) +(rule (rv_vnot_v vs2 mask vstate) + (if-let neg1 (imm5_from_i8 -1)) + (rv_vxor_vi vs2 neg1 mask vstate)) + +;; Helper for emitting the `vmax.vv` instruction. +(decl rv_vmax_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmax_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmaxVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmax.vx` instruction. +(decl rv_vmax_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmax_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmaxVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmin.vv` instruction. +(decl rv_vmin_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmin_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VminVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmin.vx` instruction. +(decl rv_vmin_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmin_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VminVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmaxu.vv` instruction. +(decl rv_vmaxu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmaxu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmaxuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmaxu.vx` instruction. +(decl rv_vmaxu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmaxu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmaxuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vminu.vv` instruction. +(decl rv_vminu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vminu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VminuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vminu.vx` instruction. +(decl rv_vminu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vminu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VminuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfadd.vv` instruction. +(decl rv_vfadd_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfadd_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfaddVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfadd.vf` instruction. +(decl rv_vfadd_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfadd_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfaddVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfsub.vv` instruction. +(decl rv_vfsub_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfsub_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfsubVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfsub.vf` instruction. +(decl rv_vfsub_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfsub_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfsubVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfrsub.vf` instruction. +(decl rv_vfrsub_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfrsub_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfrsubVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfmul.vv` instruction. +(decl rv_vfmul_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfmul_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfmulVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfmul.vf` instruction. +(decl rv_vfmul_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfmul_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfmulVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfmacc.vv` instruction. +;; +;; FP multiply-accumulate, overwrites addend +;; # vd[i] = +(vs1[i] * vs2[i]) + vd[i] +(decl rv_vfmacc_vv (VReg VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfmacc_vv vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VfmaccVV) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfmacc.vf` instruction. +;; +;; FP multiply-accumulate, overwrites addend +;; # vd[i] = +(f[rs1] * vs2[i]) + vd[i] +(decl rv_vfmacc_vf (VReg VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfmacc_vf vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VfmaccVF) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfnmacc.vv` instruction. +;; +;; FP negate-(multiply-accumulate), overwrites subtrahend +;; # vd[i] = -(vs1[i] * vs2[i]) - vd[i] +(decl rv_vfnmacc_vv (VReg VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfnmacc_vv vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VfnmaccVV) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfnmacc.vf` instruction. +;; +;; FP negate-(multiply-accumulate), overwrites subtrahend +;; # vd[i] = -(f[rs1] * vs2[i]) - vd[i] +(decl rv_vfnmacc_vf (VReg VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfnmacc_vf vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VfnmaccVF) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfmsac.vv` instruction. +;; +;; FP multiply-subtract-accumulator, overwrites subtrahend +;; # vd[i] = +(vs1[i] * vs2[i]) - vd[i] +(decl rv_vfmsac_vv (VReg VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfmsac_vv vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VfmsacVV) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfmsac.vf` instruction. +;; +;; FP multiply-subtract-accumulator, overwrites subtrahend +;; # vd[i] = +(f[rs1] * vs2[i]) - vd[i] +(decl rv_vfmsac_vf (VReg VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfmsac_vf vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VfmsacVF) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfnmsac.vv` instruction. +;; +;; FP negate-(multiply-subtract-accumulator), overwrites minuend +;; # vd[i] = -(vs1[i] * vs2[i]) + vd[i] +(decl rv_vfnmsac_vv (VReg VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfnmsac_vv vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VfnmsacVV) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfnmsac.vf` instruction. +;; +;; FP negate-(multiply-subtract-accumulator), overwrites minuend +;; # vd[i] = -(f[rs1] * vs2[i]) + vd[i] +(decl rv_vfnmsac_vf (VReg VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfnmsac_vf vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VfnmsacVF) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfdiv.vv` instruction. +(decl rv_vfdiv_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfdiv_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfdivVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfdiv.vf` instruction. +(decl rv_vfdiv_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfdiv_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfdivVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfrdiv.vf` instruction. +(decl rv_vfrdiv_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfrdiv_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfrdivVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfmin.vv` instruction. +(decl rv_vfmin_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfmin_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfminVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfmax.vv` instruction. +(decl rv_vfmax_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfmax_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfmaxVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfsgnj.vv` ("Floating Point Sign Injection") instruction. +;; The output of this instruction is `vs2` with the sign bit from `vs1` +(decl rv_vfsgnj_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfsgnj_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfsgnjVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfsgnj.vf` ("Floating Point Sign Injection") instruction. +(decl rv_vfsgnj_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfsgnj_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfsgnjVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfsgnjn.vv` ("Floating Point Sign Injection Negated") instruction. +;; The output of this instruction is `vs2` with the negated sign bit from `vs1` +(decl rv_vfsgnjn_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfsgnjn_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfsgnjnVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfneg.v` instruction. +;; This instruction is a mnemonic for `vfsgnjn.vv vd, vs, vs` +(decl rv_vfneg_v (VReg VecOpMasking VState) VReg) +(rule (rv_vfneg_v vs mask vstate) (rv_vfsgnjn_vv vs vs mask vstate)) + +;; Helper for emitting the `vfsgnjx.vv` ("Floating Point Sign Injection Exclusive") instruction. +;; The output of this instruction is `vs2` with the XOR of the sign bits from `vs2` and `vs1`. +;; When `vs2 == vs1` this implements `fabs` +(decl rv_vfsgnjx_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfsgnjx_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfsgnjxVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfabs.v` instruction. +;; This instruction is a mnemonic for `vfsgnjx.vv vd, vs, vs` +(decl rv_vfabs_v (VReg VecOpMasking VState) VReg) +(rule (rv_vfabs_v vs mask vstate) (rv_vfsgnjx_vv vs vs mask vstate)) + +;; Helper for emitting the `vfsqrt.v` instruction. +;; This instruction splats the F regsiter into all elements of the destination vector. +(decl rv_vfsqrt_v (VReg VecOpMasking VState) VReg) +(rule (rv_vfsqrt_v vs mask vstate) + (vec_alu_rr (VecAluOpRR.VfsqrtV) vs mask vstate)) + +;; Helper for emitting the `vslidedown.vx` instruction. +;; `vslidedown` moves all elements in the vector down by n elements. +;; The top most elements are up to the tail policy. +(decl rv_vslidedown_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vslidedown_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VslidedownVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vslidedown.vi` instruction. +;; Unlike other `vi` instructions the immediate is zero extended. +(decl rv_vslidedown_vi (VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vslidedown_vi vs2 imm mask vstate) + (vec_alu_rr_uimm5 (VecAluOpRRImm5.VslidedownVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vslideup.vi` instruction. +;; Unlike other `vi` instructions the immediate is zero extended. +;; This is implemented as a 2 source operand instruction, since it only +;; partially modifies the destination register. +(decl rv_vslideup_vvi (VReg VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vslideup_vvi vd vs2 imm mask vstate) + (vec_alu_rrr_uimm5 (VecAluOpRRRImm5.VslideupVI) vd vs2 imm mask vstate)) + +;; Helper for emitting the `vmv.x.s` instruction. +;; This instruction copies the first element of the source vector to the destination X register. +;; Masked versions of this instuction are not supported. +(decl rv_vmv_xs (VReg VState) XReg) +(rule (rv_vmv_xs vs vstate) + (vec_alu_rr (VecAluOpRR.VmvXS) vs (unmasked) vstate)) + +;; Helper for emitting the `vfmv.f.s` instruction. +;; This instruction copies the first element of the source vector to the destination F register. +;; Masked versions of this instuction are not supported. +(decl rv_vfmv_fs (VReg VState) FReg) +(rule (rv_vfmv_fs vs vstate) + (vec_alu_rr (VecAluOpRR.VfmvFS) vs (unmasked) vstate)) + +;; Helper for emitting the `vmv.s.x` instruction. +;; This instruction copies the source X register into first element of the source vector. +;; Masked versions of this instuction are not supported. +(decl rv_vmv_sx (XReg VState) VReg) +(rule (rv_vmv_sx vs vstate) + (vec_alu_rr (VecAluOpRR.VmvSX) vs (unmasked) vstate)) + +;; Helper for emitting the `vfmv.s.f` instruction. +;; This instruction copies the source F register into first element of the source vector. +;; Masked versions of this instuction are not supported. +(decl rv_vfmv_sf (FReg VState) VReg) +(rule (rv_vfmv_sf vs vstate) + (vec_alu_rr (VecAluOpRR.VfmvSF) vs (unmasked) vstate)) + +;; Helper for emitting the `vmv.v.x` instruction. +;; This instruction splats the X regsiter into all elements of the destination vector. +;; Masked versions of this instruction are called `vmerge` +(decl rv_vmv_vx (XReg VState) VReg) +(rule (rv_vmv_vx vs vstate) + (vec_alu_rr (VecAluOpRR.VmvVX) vs (unmasked) vstate)) + +;; Helper for emitting the `vfmv.v.f` instruction. +;; This instruction splats the F regsiter into all elements of the destination vector. +;; Masked versions of this instruction are called `vmerge` +(decl rv_vfmv_vf (FReg VState) VReg) +(rule (rv_vfmv_vf vs vstate) + (vec_alu_rr (VecAluOpRR.VfmvVF) vs (unmasked) vstate)) + +;; Helper for emitting the `vmv.v.i` instruction. +;; This instruction splat's the immediate value into all elements of the destination vector. +;; Masked versions of this instruction are called `vmerge` +(decl rv_vmv_vi (Imm5 VState) VReg) +(rule (rv_vmv_vi imm vstate) + (vec_alu_r_imm5 (VecAluOpRImm5.VmvVI) imm (unmasked) vstate)) + +;; Helper for emitting the `vmerge.vvm` instruction. +;; This instruction merges the elements of the two source vectors into the destination vector +;; based on a mask. Elements are taken from the first source vector if the mask bit is clear, +;; and from the second source vector if the mask bit is set. This instruction is always masked. +;; +;; vd[i] = v0.mask[i] ? vs1[i] : vs2[i] +(decl rv_vmerge_vvm (VReg VReg VReg VState) VReg) +(rule (rv_vmerge_vvm vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmergeVVM) vs2 vs1 (masked mask) vstate)) + +;; Helper for emitting the `vmerge.vxm` instruction. +;; Elements are taken from the first source vector if the mask bit is clear, and from the X +;; register if the mask bit is set. This instruction is always masked. +;; +;; vd[i] = v0.mask[i] ? x[rs1] : vs2[i] +(decl rv_vmerge_vxm (VReg XReg VReg VState) VReg) +(rule (rv_vmerge_vxm vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmergeVXM) vs2 vs1 (masked mask) vstate)) + +;; Helper for emitting the `vfmerge.vfm` instruction. +;; Elements are taken from the first source vector if the mask bit is clear, and from the F +;; register if the mask bit is set. This instruction is always masked. +;; +;; vd[i] = v0.mask[i] ? f[rs1] : vs2[i] +(decl rv_vfmerge_vfm (VReg FReg VReg VState) VReg) +(rule (rv_vfmerge_vfm vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfmergeVFM) vs2 vs1 (masked mask) vstate)) + +;; Helper for emitting the `vmerge.vim` instruction. +;; Elements are taken from the first source vector if the mask bit is clear, and from the +;; immediate value if the mask bit is set. This instruction is always masked. +;; +;; vd[i] = v0.mask[i] ? imm : vs2[i] +(decl rv_vmerge_vim (VReg Imm5 VReg VState) VReg) +(rule (rv_vmerge_vim vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmergeVIM) vs2 imm (masked mask) vstate)) + + +;; Helper for emitting the `vredminu.vs` instruction. +;; +;; vd[0] = minu( vs1[0] , vs2[*] ) +(decl rv_vredminu_vs (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vredminu_vs vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VredminuVS) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vredmaxu.vs` instruction. +;; +;; vd[0] = maxu( vs1[0] , vs2[*] ) +(decl rv_vredmaxu_vs (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vredmaxu_vs vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VredmaxuVS) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vrgather.vv` instruction. +;; +;; vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; +(decl rv_vrgather_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vrgather_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VrgatherVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vrgather.vx` instruction. +;; +;; vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[x[rs1]] +(decl rv_vrgather_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vrgather_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VrgatherVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vrgather.vi` instruction. +(decl rv_vrgather_vi (VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vrgather_vi vs2 imm mask vstate) + (vec_alu_rr_uimm5 (VecAluOpRRImm5.VrgatherVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vcompress.vm` instruction. +;; +;; The vector compress instruction allows elements selected by a vector mask +;; register from a source vector register group to be packed into contiguous +;; elements at the start of the destination vector register group. +;; +;; The mask register is specified through vs1 +(decl rv_vcompress_vm (VReg VReg VState) VReg) +(rule (rv_vcompress_vm vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.VcompressVM) vs2 vs1 (unmasked) vstate)) + +;; Helper for emitting the `vmseq.vv` (Vector Mask Set If Equal) instruction. +(decl rv_vmseq_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmseq_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmseqVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmseq.vx` (Vector Mask Set If Equal) instruction. +(decl rv_vmseq_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmseq_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmseqVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmseq.vi` (Vector Mask Set If Equal) instruction. +(decl rv_vmseq_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmseq_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmseqVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsne.vv` (Vector Mask Set If Not Equal) instruction. +(decl rv_vmsne_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsne_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsneVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsne.vx` (Vector Mask Set If Not Equal) instruction. +(decl rv_vmsne_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsne_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsneVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsne.vi` (Vector Mask Set If Not Equal) instruction. +(decl rv_vmsne_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsne_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsneVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsltu.vv` (Vector Mask Set If Less Than, Unsigned) instruction. +(decl rv_vmsltu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsltu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsltuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsltu.vx` (Vector Mask Set If Less Than, Unsigned) instruction. +(decl rv_vmsltu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsltu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsltuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmslt.vv` (Vector Mask Set If Less Than) instruction. +(decl rv_vmslt_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmslt_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsltVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmslt.vx` (Vector Mask Set If Less Than) instruction. +(decl rv_vmslt_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmslt_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsltVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsleu.vv` (Vector Mask Set If Less Than or Equal, Unsigned) instruction. +(decl rv_vmsleu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsleu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsleuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsleu.vx` (Vector Mask Set If Less Than or Equal, Unsigned) instruction. +(decl rv_vmsleu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsleu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsleuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsleu.vi` (Vector Mask Set If Less Than or Equal, Unsigned) instruction. +(decl rv_vmsleu_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsleu_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsleuVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsle.vv` (Vector Mask Set If Less Than or Equal) instruction. +(decl rv_vmsle_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsle_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsleVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsle.vx` (Vector Mask Set If Less Than or Equal) instruction. +(decl rv_vmsle_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsle_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsleVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsle.vi` (Vector Mask Set If Less Than or Equal) instruction. +(decl rv_vmsle_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsle_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsleVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsgt.vv` (Vector Mask Set If Greater Than, Unsigned) instruction. +;; This is an alias for `vmsltu.vv` with the operands inverted. +(decl rv_vmsgtu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsgtu_vv vs2 vs1 mask vstate) (rv_vmsltu_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmsgtu.vx` (Vector Mask Set If Greater Than, Unsigned) instruction. +(decl rv_vmsgtu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsgtu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsgtuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsgtu.vi` (Vector Mask Set If Greater Than, Unsigned) instruction. +(decl rv_vmsgtu_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsgtu_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsgtuVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsgt.vv` (Vector Mask Set If Greater Than) instruction. +;; This is an alias for `vmslt.vv` with the operands inverted. +(decl rv_vmsgt_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsgt_vv vs2 vs1 mask vstate) (rv_vmslt_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmsgt.vx` (Vector Mask Set If Greater Than) instruction. +(decl rv_vmsgt_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsgt_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsgtVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsgt.vi` (Vector Mask Set If Greater Than) instruction. +(decl rv_vmsgt_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsgt_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsgtVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsgeu.vv` (Vector Mask Set If Greater Than or Equal, Unsigned) instruction. +;; This is an alias for `vmsleu.vv` with the operands inverted. +(decl rv_vmsgeu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsgeu_vv vs2 vs1 mask vstate) (rv_vmsleu_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmsge.vv` (Vector Mask Set If Greater Than or Equal) instruction. +;; This is an alias for `vmsle.vv` with the operands inverted. +(decl rv_vmsge_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsge_vv vs2 vs1 mask vstate) (rv_vmsle_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmfeq.vv` (Vector Mask Set If Float Equal) instruction. +(decl rv_vmfeq_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmfeq_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfeqVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfeq.vf` (Vector Mask Set If Float Equal) instruction. +(decl rv_vmfeq_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmfeq_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfeqVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfne.vv` (Vector Mask Set If Float Not Equal) instruction. +(decl rv_vmfne_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmfne_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfneVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfne.vf` (Vector Mask Set If Float Not Equal) instruction. +(decl rv_vmfne_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmfne_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfneVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmflt.vv` (Vector Mask Set If Float Less Than) instruction. +(decl rv_vmflt_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmflt_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfltVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmflt.vf` (Vector Mask Set If Float Less Than) instruction. +(decl rv_vmflt_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmflt_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfltVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfle.vv` (Vector Mask Set If Float Less Than Or Equal) instruction. +(decl rv_vmfle_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmfle_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfleVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfle.vf` (Vector Mask Set If Float Less Than Or Equal) instruction. +(decl rv_vmfle_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmfle_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfleVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfgt.vv` (Vector Mask Set If Float Greater Than) instruction. +;; This is an alias for `vmflt.vv` with the operands inverted. +(decl rv_vmfgt_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmfgt_vv vs2 vs1 mask vstate) (rv_vmflt_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmfgt.vf` (Vector Mask Set If Float Greater Than) instruction. +(decl rv_vmfgt_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmfgt_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfgtVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfge.vv` (Vector Mask Set If Float Greater Than Or Equal) instruction. +;; This is an alias for `vmfle.vv` with the operands inverted. +(decl rv_vmfge_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmfge_vv vs2 vs1 mask vstate) (rv_vmfle_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmfge.vf` (Vector Mask Set If Float Greater Than Or Equal) instruction. +(decl rv_vmfge_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmfge_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfgeVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vzext.vf2` instruction. +;; Zero-extend SEW/2 source to SEW destination +(decl rv_vzext_vf2 (VReg VecOpMasking VState) VReg) +(rule (rv_vzext_vf2 vs mask vstate) + (vec_alu_rr (VecAluOpRR.VzextVF2) vs mask vstate)) + +;; Helper for emitting the `vzext.vf4` instruction. +;; Zero-extend SEW/4 source to SEW destination +(decl rv_vzext_vf4 (VReg VecOpMasking VState) VReg) +(rule (rv_vzext_vf4 vs mask vstate) + (vec_alu_rr (VecAluOpRR.VzextVF4) vs mask vstate)) + +;; Helper for emitting the `vzext.vf8` instruction. +;; Zero-extend SEW/8 source to SEW destination +(decl rv_vzext_vf8 (VReg VecOpMasking VState) VReg) +(rule (rv_vzext_vf8 vs mask vstate) + (vec_alu_rr (VecAluOpRR.VzextVF8) vs mask vstate)) + +;; Helper for emitting the `vsext.vf2` instruction. +;; Sign-extend SEW/2 source to SEW destination +(decl rv_vsext_vf2 (VReg VecOpMasking VState) VReg) +(rule (rv_vsext_vf2 vs mask vstate) + (vec_alu_rr (VecAluOpRR.VsextVF2) vs mask vstate)) + +;; Helper for emitting the `vsext.vf4` instruction. +;; Sign-extend SEW/4 source to SEW destination +(decl rv_vsext_vf4 (VReg VecOpMasking VState) VReg) +(rule (rv_vsext_vf4 vs mask vstate) + (vec_alu_rr (VecAluOpRR.VsextVF4) vs mask vstate)) + +;; Helper for emitting the `vsext.vf8` instruction. +;; Sign-extend SEW/8 source to SEW destination +(decl rv_vsext_vf8 (VReg VecOpMasking VState) VReg) +(rule (rv_vsext_vf8 vs mask vstate) + (vec_alu_rr (VecAluOpRR.VsextVF8) vs mask vstate)) + +;; Helper for emitting the `vnclip.wi` instruction. +;; +;; vd[i] = clip(roundoff_signed(vs2[i], uimm)) +(decl rv_vnclip_wi (VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vnclip_wi vs2 imm mask vstate) + (vec_alu_rr_uimm5 (VecAluOpRRImm5.VnclipWI) vs2 imm mask vstate)) + +;; Helper for emitting the `vnclipu.wi` instruction. +;; +;; vd[i] = clip(roundoff_unsigned(vs2[i], uimm)) +(decl rv_vnclipu_wi (VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vnclipu_wi vs2 imm mask vstate) + (vec_alu_rr_uimm5 (VecAluOpRRImm5.VnclipuWI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmand.mm` (Mask Bitwise AND) instruction. +;; +;; vd.mask[i] = vs2.mask[i] && vs1.mask[i] +(decl rv_vmand_mm (VReg VReg VState) VReg) +(rule (rv_vmand_mm vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.VmandMM) vs2 vs1 (unmasked) vstate)) + +;; Helper for emitting the `vmor.mm` (Mask Bitwise OR) instruction. +;; +;; vd.mask[i] = vs2.mask[i] || vs1.mask[i] +(decl rv_vmor_mm (VReg VReg VState) VReg) +(rule (rv_vmor_mm vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.VmorMM) vs2 vs1 (unmasked) vstate)) + +;; Helper for emitting the `vmnand.mm` (Mask Bitwise NAND) instruction. +;; +;; vd.mask[i] = !(vs2.mask[i] && vs1.mask[i]) +(decl rv_vmnand_mm (VReg VReg VState) VReg) +(rule (rv_vmnand_mm vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.VmnandMM) vs2 vs1 (unmasked) vstate)) + +;; Helper for emitting the `vmnot.m` (Mask Bitwise NOT) instruction. +;; This is an alias for `vmnand.mm vd, vs, vs` +;; +;; vd.mask[i] = !vs.mask[i] +(decl rv_vmnot_m (VReg VState) VReg) +(rule (rv_vmnot_m vs vstate) (rv_vmnand_mm vs vs vstate)) + +;; Helper for emitting the `vmnor.mm` (Mask Bitwise NOR) instruction. +;; +;; vd.mask[i] = !(vs2.mask[i] || vs1.mask[i]) +(decl rv_vmnor_mm (VReg VReg VState) VReg) +(rule (rv_vmnor_mm vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.VmnorMM) vs2 vs1 (unmasked) vstate)) + +;;;; Multi-Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl gen_extractlane (Type VReg u8) Reg) + +;; When extracting lane 0 for floats, we can use `vfmv.f.s` directly. +(rule 3 (gen_extractlane (ty_vec_fits_in_register ty) src 0) + (if (ty_vector_float ty)) + (rv_vfmv_fs src ty)) + +;; When extracting lane 0 for integers, we can use `vmv.x.s` directly. +(rule 2 (gen_extractlane (ty_vec_fits_in_register ty) src 0) + (if (ty_vector_not_float ty)) + (rv_vmv_xs src ty)) + +;; In the general case, we must first use a `vslidedown` to place the correct lane +;; in index 0, and then use the appropriate `vmv` instruction. +;; If the index fits into a 5-bit immediate, we can emit a `vslidedown.vi`. +(rule 1 (gen_extractlane (ty_vec_fits_in_register ty) src (uimm5_from_u8 idx)) + (gen_extractlane ty (rv_vslidedown_vi src idx (unmasked) ty) 0)) + +;; Otherwise lower it into an X register. +(rule 0 (gen_extractlane (ty_vec_fits_in_register ty) src idx) + (gen_extractlane ty (rv_vslidedown_vx src (imm $I64 idx) (unmasked) ty) 0)) + + +;; Build a vector mask from a u64 +;; TODO(#6571): We should merge this with the `vconst` rules, and take advantage of +;; the other existing `vconst` rules. +(decl gen_vec_mask (u64) VReg) + +;; When the immediate fits in a 5-bit immediate, we can use `vmv.v.i` directly. +(rule 1 (gen_vec_mask (imm5_from_u64 imm)) + (rv_vmv_vi imm (vstate_from_type $I64X2))) + +;; Materialize the mask into an X register, and move it into the bottom of +;; the vector register. +(rule 0 (gen_vec_mask mask) + (rv_vmv_sx (imm $I64 mask) (vstate_from_type $I64X2))) + + +;; Loads a `VCodeConstant` value into a vector register. For some special `VCodeConstant`s +;; we can use a dedicated instruction, otherwise we load the value from the pool. +;; +;; Type is the preferred type to use when loading the constant. +(decl gen_constant (Type VCodeConstant) VReg) + +;; The fallback case is to load the constant from the pool. +(rule (gen_constant ty n) + (vec_load + (element_width_from_type ty) + (VecAMode.UnitStride (gen_const_amode n)) + (mem_flags_trusted) + (unmasked) + ty)) + + +;; Emits a vslidedown instruction that moves half the lanes down. +(decl gen_slidedown_half (Type VReg) VReg) + +;; If the lane count can fit in a 5-bit immediate, we can use `vslidedown.vi`. +(rule 1 (gen_slidedown_half (ty_vec_fits_in_register ty) src) + (if-let (uimm5_from_u64 amt) (u64_udiv (ty_lane_count ty) 2)) + (rv_vslidedown_vi src amt (unmasked) ty)) + +;; Otherwise lower it into an X register. +(rule 0 (gen_slidedown_half (ty_vec_fits_in_register ty) src) + (if-let amt (u64_udiv (ty_lane_count ty) 2)) + (rv_vslidedown_vx src (imm $I64 amt) (unmasked) ty)) + + +;; Expands a mask into SEW wide lanes. Enabled lanes are set to all ones, disabled +;; lanes are set to all zeros. +(decl gen_expand_mask (Type VReg) VReg) +(rule (gen_expand_mask ty mask) + (if-let zero (imm5_from_i8 0)) + (if-let neg1 (imm5_from_i8 -1)) + (rv_vmerge_vim (rv_vmv_vi zero ty) neg1 mask ty)) + + +;; Builds a vector mask corresponding to the IntCC operation. +;; TODO: We are still missing some rules here for immediates. See #6623 +(decl gen_icmp_mask (Type IntCC Value Value) VReg) + +;; IntCC.Equal + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x y) + (rv_vmseq_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x (splat y)) + (rv_vmseq_vx x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) (splat x) y) + (rv_vmseq_vx y x (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x (replicated_imm5 y)) + (rv_vmseq_vi x y (unmasked) ty)) + +(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) (replicated_imm5 x) y) + (rv_vmseq_vi y x (unmasked) ty)) + +;; IntCC.NotEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x y) + (rv_vmsne_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x (splat y)) + (rv_vmsne_vx x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) (splat x) y) + (rv_vmsne_vx y x (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x (replicated_imm5 y)) + (rv_vmsne_vi x y (unmasked) ty)) + +(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) (replicated_imm5 x) y) + (rv_vmsne_vi y x (unmasked) ty)) + +;; IntCC.UnsignedLessThan + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) x y) + (rv_vmsltu_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) x (splat y)) + (rv_vmsltu_vx x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) (splat x) y) + (rv_vmsgtu_vx y x (unmasked) ty)) + +(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) (replicated_imm5 x) y) + (rv_vmsgtu_vi y x (unmasked) ty)) + +;; IntCC.SignedLessThan + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) x y) + (rv_vmslt_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) x (splat y)) + (rv_vmslt_vx x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) (splat x) y) + (rv_vmsgt_vx y x (unmasked) ty)) + +(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) (replicated_imm5 x) y) + (rv_vmsgt_vi y x (unmasked) ty)) + +;; IntCC.UnsignedLessThanOrEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x y) + (rv_vmsleu_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x (splat y)) + (rv_vmsleu_vx x y (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x (replicated_imm5 y)) + (rv_vmsleu_vi x y (unmasked) ty)) + +;; IntCC.SignedLessThanOrEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x y) + (rv_vmsle_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x (splat y)) + (rv_vmsle_vx x y (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x (replicated_imm5 y)) + (rv_vmsle_vi x y (unmasked) ty)) + +;; IntCC.UnsignedGreaterThan + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x y) + (rv_vmsgtu_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x (splat y)) + (rv_vmsgtu_vx x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) (splat x) y) + (rv_vmsltu_vx y x (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x (replicated_imm5 y)) + (rv_vmsgtu_vi x y (unmasked) ty)) + +;; IntCC.SignedGreaterThan + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x y) + (rv_vmsgt_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x (splat y)) + (rv_vmsgt_vx x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) (splat x) y) + (rv_vmslt_vx y x (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x (replicated_imm5 y)) + (rv_vmsgt_vi x y (unmasked) ty)) + +;; IntCC.UnsignedGreaterThanOrEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThanOrEqual) x y) + (rv_vmsgeu_vv x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThanOrEqual) (splat x) y) + (rv_vmsleu_vx y x (unmasked) ty)) + +(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThanOrEqual) (replicated_imm5 x) y) + (rv_vmsleu_vi y x (unmasked) ty)) + +;; IntCC.SignedGreaterThanOrEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) x y) + (rv_vmsge_vv x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) (splat x) y) + (rv_vmsle_vx y x (unmasked) ty)) + +(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) (replicated_imm5 x) y) + (rv_vmsle_vi y x (unmasked) ty)) + + + +;; Builds a vector mask corresponding to the FloatCC operation. +(decl gen_fcmp_mask (Type FloatCC Value Value) VReg) + +;; FloatCC.Equal + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) x y) + (rv_vmfeq_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) x (splat y)) + (rv_vmfeq_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) (splat x) y) + (rv_vmfeq_vf y x (unmasked) ty)) + +;; FloatCC.NotEqual +;; Note: This is UnorderedNotEqual. It is the only unoredered comparison that is not named as such. + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) x y) + (rv_vmfne_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) x (splat y)) + (rv_vmfne_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) (splat x) y) + (rv_vmfne_vf y x (unmasked) ty)) + +;; FloatCC.LessThan + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) x y) + (rv_vmflt_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) x (splat y)) + (rv_vmflt_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) (splat x) y) + (rv_vmfgt_vf y x (unmasked) ty)) + +;; FloatCC.LessThanOrEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) x y) + (rv_vmfle_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) x (splat y)) + (rv_vmfle_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) (splat x) y) + (rv_vmfge_vf y x (unmasked) ty)) + +;; FloatCC.GreaterThan + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) x y) + (rv_vmfgt_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) x (splat y)) + (rv_vmfgt_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) (splat x) y) + (rv_vmflt_vf y x (unmasked) ty)) + +;; FloatCC.GreaterThanOrEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) x y) + (rv_vmfge_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) x (splat y)) + (rv_vmfge_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) (splat x) y) + (rv_vmfle_vf y x (unmasked) ty)) + +;; FloatCC.Ordered + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Ordered) x y) + (rv_vmand_mm + (gen_fcmp_mask ty (FloatCC.Equal) x x) + (gen_fcmp_mask ty (FloatCC.Equal) y y) + ty)) + +;; FloatCC.Unordered + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Unordered) x y) + (rv_vmor_mm + (gen_fcmp_mask ty (FloatCC.NotEqual) x x) + (gen_fcmp_mask ty (FloatCC.NotEqual) y y) + ty)) + +;; FloatCC.OrderedNotEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.OrderedNotEqual) x y) + (rv_vmor_mm + (gen_fcmp_mask ty (FloatCC.LessThan) x y) + (gen_fcmp_mask ty (FloatCC.LessThan) y x) + ty)) + +;; FloatCC.UnorderedOrEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrEqual) x y) + (rv_vmnor_mm + (gen_fcmp_mask ty (FloatCC.LessThan) x y) + (gen_fcmp_mask ty (FloatCC.LessThan) y x) + ty)) + +;; FloatCC.UnorderedOrGreaterThan + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrGreaterThan) x y) + (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.LessThanOrEqual) x y) ty)) + +;; FloatCC.UnorderedOrGreaterThanOrEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrGreaterThanOrEqual) x y) + (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.LessThan) x y) ty)) + +;; FloatCC.UnorderedOrLessThan + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrLessThan) x y) + (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.GreaterThanOrEqual) x y) ty)) + +;; FloatCC.UnorderedOrLessThanOrEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrLessThanOrEqual) x y) + (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.GreaterThan) x y) ty)) diff --git a/cranelift/codegen/src/isa/zkasm/lower.isle b/cranelift/codegen/src/isa/zkasm/lower.isle new file mode 100644 index 000000000000..ad9bd74abc68 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/lower.isle @@ -0,0 +1,1877 @@ +;; zkasm instruction selection and CLIF-to-MachInst lowering. + +;; The main lowering constructor term: takes a clif `Inst` and returns the +;; register(s) within which the lowered instruction's result values live. +(decl partial lower (Inst) InstOutput) + +;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty (iconst (u64_from_imm64 n)))) + (imm ty n)) + +;; ;;;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (ty_vec_fits_in_register ty) (vconst n))) + (gen_constant ty (const_to_vconst n))) + +;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (f32const (u32_from_ieee32 n))) + (imm $F32 n)) + +;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (f64const (u64_from_ieee64 n))) + (imm $F64 n)) + +;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty (null))) + (imm ty 0)) + + +;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Base case, simply adding things in registers. +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x y))) + (rv_add x y)) + +(rule 1 (lower (iadd (imm32_from_value x) (imm32_from_value y))) + (zk_add x y)) + +;; Fused Multiply Accumulate Rules `vmacc` +;; +;; I dont think we can use `vmadd`/`vmnsub` here since it just modifies the multiplication +;; register instead of the addition one. The actual pattern matched seems to be +;; exactly the same. + +(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (imul y z)))) + (rv_vmacc_vv x y z (unmasked) ty)) + +(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (imul y (splat z))))) + (rv_vmacc_vx x y z (unmasked) ty)) + +(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (imul (splat y) z)))) + (rv_vmacc_vx x z y (unmasked) ty)) + +(rule 13 (lower (has_type (ty_vec_fits_in_register ty) (iadd (imul x y) z))) + (rv_vmacc_vv z x y (unmasked) ty)) + +(rule 14 (lower (has_type (ty_vec_fits_in_register ty) (iadd (imul x (splat y)) z))) + (rv_vmacc_vx z x y (unmasked) ty)) + +(rule 15 (lower (has_type (ty_vec_fits_in_register ty) (iadd (imul (splat x) y) z))) + (rv_vmacc_vx z y x (unmasked) ty)) + +;; Fused Multiply Subtract Rules `vnmsac` + +(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (ineg (imul y z))))) + (rv_vnmsac_vv x y z (unmasked) ty)) + +(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (ineg (imul y (splat z)))))) + (rv_vnmsac_vx x y z (unmasked) ty)) + +(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (ineg (imul (splat y) z))))) + (rv_vnmsac_vx x z y (unmasked) ty)) + +(rule 13 (lower (has_type (ty_vec_fits_in_register ty) (iadd (ineg (imul x y)) z))) + (rv_vnmsac_vv z x y (unmasked) ty)) + +(rule 14 (lower (has_type (ty_vec_fits_in_register ty) (iadd (ineg (imul x (splat y))) z))) + (rv_vnmsac_vx z x y (unmasked) ty)) + +(rule 15 (lower (has_type (ty_vec_fits_in_register ty) (iadd (ineg (imul (splat x) y)) z))) + (rv_vnmsac_vx z y x (unmasked) ty)) + +;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;; +(rule + (lower (has_type (fits_in_64 ty) (uadd_overflow_trap x y tc))) + (let ((res ValueRegs (lower_uadd_overflow x y ty)) + (_ InstOutput (gen_trapif (value_regs_get res 1) tc))) + (value_regs_get res 0))) + + +;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Base case, simply subtracting things in registers. + +(rule (lower (has_type (ty_int_ref_scalar_64 ty) (isub x y))) + (rv_sub x y)) + +(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (isub x y))) + (rv_subw x y)) + +(rule 2 (lower (has_type $I128 (isub x y))) + (i128_sub x y)) + +;; SIMD Vectors +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (isub x y))) + (rv_vsub_vv x y (unmasked) ty)) + +(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (isub x (splat y)))) + (rv_vsub_vx x y (unmasked) ty)) + +(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (isub x (splat (sextend y @ (value_type sext_ty)))))) + (if-let half_ty (ty_half_width ty)) + (if-let $true (ty_equal (lane_type half_ty) sext_ty)) + (rv_vwsub_wx x y (unmasked) (vstate_mf2 half_ty))) + +(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (isub x (splat (uextend y @ (value_type uext_ty)))))) + (if-let half_ty (ty_half_width ty)) + (if-let $true (ty_equal (lane_type half_ty) uext_ty)) + (rv_vwsubu_wx x y (unmasked) (vstate_mf2 half_ty))) + +(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (isub (splat x) y))) + (rv_vrsub_vx y x (unmasked) ty)) + +(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (isub (replicated_imm5 x) y))) + (rv_vrsub_vi y x (unmasked) ty)) + + +;; Signed Widening Low Subtractions + +(rule 5 (lower (has_type (ty_vec_fits_in_register _) (isub x (swiden_low y @ (value_type in_ty))))) + (rv_vwsub_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_low x @ (value_type in_ty)) + (swiden_low y)))) + (rv_vwsub_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_low x @ (value_type in_ty)) + (splat (sextend y @ (value_type sext_ty)))))) + (if-let $true (ty_equal (lane_type in_ty) sext_ty)) + (rv_vwsub_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +;; Signed Widening High Subtractions +;; These are the same as the low widenings, but we first slide down the inputs. + +(rule 5 (lower (has_type (ty_vec_fits_in_register _) (isub x (swiden_high y @ (value_type in_ty))))) + (rv_vwsub_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_high x @ (value_type in_ty)) + (swiden_high y)))) + (rv_vwsub_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_high x @ (value_type in_ty)) + (splat (sextend y @ (value_type sext_ty)))))) + (if-let $true (ty_equal (lane_type in_ty) sext_ty)) + (rv_vwsub_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +;; Unsigned Widening Low Subtractions + +(rule 5 (lower (has_type (ty_vec_fits_in_register _) (isub x (uwiden_low y @ (value_type in_ty))))) + (rv_vwsubu_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_low x @ (value_type in_ty)) + (uwiden_low y)))) + (rv_vwsubu_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_low x @ (value_type in_ty)) + (splat (uextend y @ (value_type uext_ty)))))) + (if-let $true (ty_equal (lane_type in_ty) uext_ty)) + (rv_vwsubu_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +;; Unsigned Widening High Subtractions +;; These are the same as the low widenings, but we first slide down the inputs. + +(rule 5 (lower (has_type (ty_vec_fits_in_register _) (isub x (uwiden_high y @ (value_type in_ty))))) + (rv_vwsubu_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_high x @ (value_type in_ty)) + (uwiden_high y)))) + (rv_vwsubu_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_high x @ (value_type in_ty)) + (splat (uextend y @ (value_type uext_ty)))))) + (if-let $true (ty_equal (lane_type in_ty) uext_ty)) + (rv_vwsubu_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +;; Signed Widening Mixed High/Low Subtractions + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_low x @ (value_type in_ty)) + (swiden_high y)))) + (rv_vwsub_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_high x @ (value_type in_ty)) + (swiden_low y)))) + (rv_vwsub_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +;; Unsigned Widening Mixed High/Low Subtractions + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_low x @ (value_type in_ty)) + (uwiden_high y)))) + (rv_vwsubu_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_high x @ (value_type in_ty)) + (uwiden_low y)))) + (rv_vwsubu_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + + +;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (ty_int ty) (ineg val))) + (neg ty val)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (ineg x))) + (rv_vneg_v x (unmasked) ty)) + + +;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (imul x y))) + (rv_mul x y)) + +(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (imul x y))) + (rv_mulw x y)) + +;; for I128 +(rule 2 (lower (has_type $I128 (imul x y))) + (let + ((x_regs ValueRegs x) + (x_lo XReg (value_regs_get x_regs 0)) + (x_hi XReg (value_regs_get x_regs 1)) + + ;; Get the high/low registers for `y`. + (y_regs ValueRegs y) + (y_lo XReg (value_regs_get y_regs 0)) + (y_hi XReg (value_regs_get y_regs 1)) + + ;; 128bit mul formula: + ;; dst_lo = x_lo * y_lo + ;; dst_hi = mulhu(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo) + ;; + ;; We can convert the above formula into the following + ;; mulhu dst_hi, x_lo, y_lo + ;; madd dst_hi, x_lo, y_hi, dst_hi + ;; madd dst_hi, x_hi, y_lo, dst_hi + ;; madd dst_lo, x_lo, y_lo, zero + (dst_hi1 XReg (rv_mulhu x_lo y_lo)) + (dst_hi2 XReg (madd x_lo y_hi dst_hi1)) + (dst_hi XReg (madd x_hi y_lo dst_hi2)) + (dst_lo XReg (madd x_lo y_lo (zero_reg)))) + (value_regs dst_lo dst_hi))) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (imul x y))) + (rv_vmul_vv x y (unmasked) ty)) + +(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (imul (splat x) y))) + (rv_vmul_vx y x (unmasked) ty)) + +(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (imul x (splat y)))) + (rv_vmul_vx x y (unmasked) ty)) + + +(rule 6 (lower (imul (imm32_from_value x) (imm32_from_value y))) + (zk_mul x y)) + + +;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (smulhi x y))) + (lower_smlhi ty (sext x ty $I64) (sext y ty $I64))) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (smulhi x y))) + (rv_vmulh_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (smulhi (splat x) y))) + (rv_vmulh_vx y x (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (smulhi x (splat y)))) + (rv_vmulh_vx x y (unmasked) ty)) + +;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (umulhi x y))) + (lower_umlhi ty (zext x ty $I64) (zext y ty $I64))) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (umulhi x y))) + (rv_vmulhu_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (umulhi (splat x) y))) + (rv_vmulhu_vx y x (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (umulhi x (splat y)))) + (rv_vmulhu_vx x y (unmasked) ty)) + +;;;; Rules for `div` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule -1 (lower (has_type (fits_in_32 ty) (udiv x y))) + (let + ((y2 XReg (zext y ty $I64)) + (_ InstOutput (gen_div_by_zero y2))) + (rv_divuw (zext x ty $I64) y2))) + +(rule -1 (lower (has_type (fits_in_32 ty) (sdiv x y))) + (let + ((a XReg (sext x ty $I64)) + (b XReg (sext y ty $I64)) + (_ InstOutput (gen_div_overflow a b ty)) + (_ InstOutput (gen_div_by_zero b))) + (rv_divw a b))) + +(rule (lower (has_type $I64 (sdiv x y))) + (let + ((_ InstOutput (gen_div_overflow x y $I64)) + (_ InstOutput (gen_div_by_zero y)) ) + (rv_div x y))) + +(rule (lower (has_type $I64 (udiv x y))) + (let + ((_ InstOutput (gen_div_by_zero y))) + (rv_divu x y))) + +;;;; Rules for `rem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule -1 (lower (has_type (fits_in_16 ty) (urem x y))) + (let + ((y2 XReg (zext y ty $I64)) + (_ InstOutput (gen_div_by_zero y2))) + (rv_remuw (zext x ty $I64) y2))) + +(rule -1 (lower (has_type (fits_in_16 ty) (srem x y))) + (let + ((y2 XReg (sext y ty $I64)) + (_ InstOutput (gen_div_by_zero y2))) + (rv_remw (sext x ty $I64) y2))) + +(rule (lower (has_type $I32 (srem x y))) + (let + ((y2 XReg (sext y $I32 $I64)) + (_ InstOutput (gen_div_by_zero y2))) + (rv_remw x y2))) + +(rule (lower (has_type $I32 (urem x y))) + (let + ((y2 XReg (zext y $I32 $I64)) + (_ InstOutput (gen_div_by_zero y2))) + (rv_remuw x y2))) + +(rule (lower (has_type $I64 (srem x y))) + (let + ((_ InstOutput (gen_div_by_zero y))) + (rv_rem x y))) + +(rule (lower (has_type $I64 (urem x y))) + (let + ((_ InstOutput (gen_div_by_zero y))) + (rv_remu x y))) + +;;;; Rules for `and` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_int ty) (band x y))) + (gen_and ty x y)) + +;; Special cases for when one operand is an immediate that fits in 12 bits. +(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (band x (imm12_from_value y)))) + (rv_andi x y)) + +(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (band (imm12_from_value x) y))) + (rv_andi y x)) + +(rule 3 (lower (has_type (ty_scalar_float ty) (band x y))) + (lower_float_binary (AluOPRRR.And) x y ty)) + +;; Specialized lowerings for `(band x (bnot y))` which is additionally produced +;; by Cranelift's `band_not` instruction that is legalized into the simpler +;; forms early on. + +(rule 4 (lower (has_type (fits_in_64 (ty_int ty)) (band x (bnot y)))) + (if-let $true (has_zbb)) + (rv_andn x y)) + +(rule 5 (lower (has_type (fits_in_64 (ty_int ty)) (band (bnot y) x))) + (if-let $true (has_zbb)) + (rv_andn x y)) + +(rule 6 (lower (has_type $I128 (band x (bnot y)))) + (if-let $true (has_zbb)) + (let ((low XReg (rv_andn (value_regs_get x 0) (value_regs_get y 0))) + (high XReg (rv_andn (value_regs_get x 1) (value_regs_get y 1)))) + (value_regs low high))) + +(rule 7 (lower (has_type $I128 (band (bnot y) x))) + (if-let $true (has_zbb)) + (let ((low XReg (rv_andn (value_regs_get x 0) (value_regs_get y 0))) + (high XReg (rv_andn (value_regs_get x 1) (value_regs_get y 1)))) + (value_regs low high))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register ty) (band x y))) + (rv_vand_vv x y (unmasked) ty)) + +(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (band x (splat y)))) + (if (ty_vector_not_float ty)) + (rv_vand_vx x y (unmasked) ty)) + +(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (band (splat x) y))) + (if (ty_vector_not_float ty)) + (rv_vand_vx y x (unmasked) ty)) + +(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (band x (replicated_imm5 y)))) + (rv_vand_vi x y (unmasked) ty)) + +(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (band (replicated_imm5 x) y))) + (rv_vand_vi y x (unmasked) ty)) + + +;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_int ty) (bor x y))) + (gen_or ty x y)) + +;; Special cases for when one operand is an immediate that fits in 12 bits. +(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bor x (imm12_from_value y)))) + (rv_ori x y)) + +(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (bor (imm12_from_value x) y))) + (rv_ori y x)) + +(rule 3 (lower (has_type (ty_scalar_float ty) (bor x y))) + (lower_float_binary (AluOPRRR.Or) x y ty)) + +;; Specialized lowerings for `(bor x (bnot y))` which is additionally produced +;; by Cranelift's `bor_not` instruction that is legalized into the simpler +;; forms early on. + +(rule 4 (lower (has_type (fits_in_64 (ty_int ty)) (bor x (bnot y)))) + (if-let $true (has_zbb)) + (rv_orn x y)) + +(rule 5 (lower (has_type (fits_in_64 (ty_int ty)) (bor (bnot y) x))) + (if-let $true (has_zbb)) + (rv_orn x y)) + +(rule 6 (lower (has_type $I128 (bor x (bnot y)))) + (if-let $true (has_zbb)) + (let ((low XReg (rv_orn (value_regs_get x 0) (value_regs_get y 0))) + (high XReg (rv_orn (value_regs_get x 1) (value_regs_get y 1)))) + (value_regs low high))) + +(rule 7 (lower (has_type $I128 (bor (bnot y) x))) + (if-let $true (has_zbb)) + (let ((low XReg (rv_orn (value_regs_get x 0) (value_regs_get y 0))) + (high XReg (rv_orn (value_regs_get x 1) (value_regs_get y 1)))) + (value_regs low high))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register ty) (bor x y))) + (rv_vor_vv x y (unmasked) ty)) + +(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (bor x (splat y)))) + (if (ty_vector_not_float ty)) + (rv_vor_vx x y (unmasked) ty)) + +(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (bor (splat x) y))) + (if (ty_vector_not_float ty)) + (rv_vor_vx y x (unmasked) ty)) + +(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (bor x (replicated_imm5 y)))) + (rv_vor_vi x y (unmasked) ty)) + +(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (bor (replicated_imm5 x) y))) + (rv_vor_vi y x (unmasked) ty)) + + +;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x y))) + (rv_xor x y)) + +;; Special cases for when one operand is an immediate that fits in 12 bits. +(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x (imm12_from_value y)))) + (rv_xori x y)) + +(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (bxor (imm12_from_value x) y))) + (rv_xori y x)) + +(rule 3 (lower (has_type $I128 (bxor x y))) + (lower_b128_binary (AluOPRRR.Xor) x y)) + +(rule 4 (lower (has_type (ty_scalar_float ty) (bxor x y))) + (lower_float_binary (AluOPRRR.Xor) x y ty)) + +(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (bxor x y))) + (rv_vxor_vv x y (unmasked) ty)) + +(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (bxor x (splat y)))) + (if (ty_vector_not_float ty)) + (rv_vxor_vx x y (unmasked) ty)) + +(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (bxor (splat x) y))) + (if (ty_vector_not_float ty)) + (rv_vxor_vx y x (unmasked) ty)) + +(rule 8 (lower (has_type (ty_vec_fits_in_register ty) (bxor x (replicated_imm5 y)))) + (rv_vxor_vi x y (unmasked) ty)) + +(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (bxor (replicated_imm5 x) y))) + (rv_vxor_vi y x (unmasked) ty)) + + +;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar ty) (bnot x))) + (gen_bnot ty x)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (bnot x))) + (rv_vnot_v x (unmasked) ty)) + +;;;; Rules for `bit_reverse` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 (ty_int ty)) (bitrev x))) + (lower_bit_reverse x ty)) + +(rule 1 (lower (has_type $I128 (bitrev x))) + (let ((val ValueRegs x) + (lo_rev XReg (lower_bit_reverse (value_regs_get val 0) $I64)) + (hi_rev XReg (lower_bit_reverse (value_regs_get val 1) $I64))) + (value_regs hi_rev lo_rev))) + +;;;; Rules for `bswap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bswap x))) + (gen_bswap ty x)) + +(rule 2 (lower (has_type $I128 (bswap x))) + (value_regs + (gen_bswap $I64 (value_regs_get x 1)) + (gen_bswap $I64 (value_regs_get x 0)))) + + +;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty) (ctz x))) + (lower_ctz ty x)) + +(rule 1 (lower (has_type $I128 (ctz x))) + (lower_ctz_128 x)) + +;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty) (clz x))) + (lower_clz ty x)) + +(rule 1 (lower (has_type $I128 (clz x))) + (lower_clz_i128 x)) + +;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty) (cls x))) + (lower_cls ty x)) + +(rule 1 (lower (has_type $I128 (cls x))) + (lower_cls_i128 x)) + +;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type out_ty (uextend val @ (value_type in_ty)))) + (extend val (ExtendOp.Zero) in_ty out_ty)) + +;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type out_ty (sextend val @ (value_type in_ty)))) + (extend val (ExtendOp.Signed) in_ty out_ty)) + +;; The instructions below are present in RV64I and sign-extend the result to 64 bits. + +(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (iadd x y))))) + (rv_addw x y)) + +(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (isub x y))))) + (rv_subw x y)) + +(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (ishl x y))))) + (rv_sllw x (value_regs_get y 0))) + +(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (ushr x y))))) + (rv_srlw x (value_regs_get y 0))) + +(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (sshr x y))))) + (rv_sraw x (value_regs_get y 0))) + + +(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (iadd x (imm12_from_value y)))))) + (rv_addiw x y)) + +(rule 3 (lower (has_type $I64 (sextend (has_type $I32 (iadd (imm12_from_value x) y))))) + (rv_addiw y x)) + +(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (ishl x (imm12_from_value y)))))) + (rv_slliw x y)) + +(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (ushr x (imm12_from_value y)))))) + (rv_srliw x y)) + +(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (sshr x (imm12_from_value y)))))) + (rv_sraiw x y)) + +;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (popcnt x))) + (lower_popcnt x ty)) + +(rule 1 (lower (has_type $I128 (popcnt x))) + (lower_popcnt_i128 x)) + +;; Popcount using multiply. +;; This is popcount64c() from +;; http://en.wikipedia.org/wiki/Hamming_weight +;; +;; Here's the C version for 32 bits: +;; x = x - ((x>> 1) & 0x55555555); +;; x = (x & 0x33333333) + ((x >> 2) & 0x33333333); +;; x = ((x + (x >> 4)) & 0x0F0F0F0F); +;; return (x * 0x01010101) >> 24; // Here 24 is the type width - 8. +;; +;; TODO: LLVM generates a much better implementation for I8X16. See: https://godbolt.org/z/qr6vf9Gr3 +;; For the other types it seems to be largely the same. +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (popcnt x))) + (if-let one (u64_to_uimm5 1)) + (if-let two (u64_to_uimm5 2)) + (if-let four (u64_to_uimm5 4)) + + (let (;; x = x - ((x >> 1) & 0x55555555); + (mask_55 XReg (imm (lane_type ty) (u64_and 0x5555555555555555 (ty_mask (lane_type ty))))) + (count2_shr VReg (rv_vsrl_vi x one (unmasked) ty)) + (count2_and VReg (rv_vand_vx count2_shr mask_55 (unmasked) ty)) + (count2 VReg (rv_vsub_vv x count2_and (unmasked) ty)) + + ;; x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + (mask_33 XReg (imm (lane_type ty) (u64_and 0x3333333333333333 (ty_mask (lane_type ty))))) + (count4_shr VReg (rv_vsrl_vi count2 two (unmasked) ty)) + (count4_and VReg (rv_vand_vx count4_shr mask_33 (unmasked) ty)) + (count4_lhs VReg (rv_vand_vx count2 mask_33 (unmasked) ty)) + (count4 VReg (rv_vadd_vv count4_lhs count4_and (unmasked) ty)) + + ;; x = (x + (x >> 4)) & 0x0F0F0F0F; + (mask_0f XReg (imm (lane_type ty) (u64_and 0x0f0f0f0f0f0f0f0f (ty_mask (lane_type ty))))) + (count8_shr VReg (rv_vsrl_vi count4 four (unmasked) ty)) + (count8_add VReg (rv_vadd_vv count4 count8_shr (unmasked) ty)) + (count8 VReg (rv_vand_vx count8_add mask_0f (unmasked) ty)) + + ;; (x * 0x01010101) >> ( - 8) + (mask_01 XReg (imm (lane_type ty) (u64_and 0x0101010101010101 (ty_mask (lane_type ty))))) + (mul VReg (rv_vmul_vx count8 mask_01 (unmasked) ty)) + (shift XReg (imm $I64 (u64_sub (ty_bits (lane_type ty)) 8))) + (res VReg (rv_vsrl_vx mul shift (unmasked) ty))) + res)) + +;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; 8/16 bit types need a mask on the shift amount +(rule 0 (lower (has_type (ty_int (ty_8_or_16 ty)) (ishl x y))) + (if-let mask (u64_to_imm12 (shift_mask ty))) + (rv_sllw x (rv_andi (value_regs_get y 0) mask))) + +;; Using the 32bit version of `sll` automatically masks the shift amount. +(rule 1 (lower (has_type $I32 (ishl x y))) + (rv_sllw x (value_regs_get y 0))) + +;; Similarly, the 64bit version does the right thing. +(rule 1 (lower (has_type $I64 (ishl x y))) + (rv_sll x (value_regs_get y 0))) + +;; If the shift amount is known. We can mask it and encode it in the instruction. +(rule 2 (lower (has_type (int_fits_in_32 ty) (ishl x (maybe_uextend (imm12_from_value y))))) + (rv_slliw x (imm12_and y (shift_mask ty)))) + +;; We technically don't need to mask the shift amount here. The instruction +;; does the right thing. But it's neater when pretty printing it. +(rule 3 (lower (has_type ty @ $I64 (ishl x (maybe_uextend (imm12_from_value y))))) + (rv_slli x (imm12_and y (shift_mask ty)))) + +;; With `Zba` we have a shift that zero extends the LHS argument. +(rule 4 (lower (has_type $I64 (ishl (uextend x @ (value_type $I32)) (maybe_uextend (imm12_from_value y))))) + (if-let $true (has_zba)) + (rv_slliuw x y)) + +;; I128 cases +(rule 4 (lower (has_type $I128 (ishl x y))) + (let ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) + (shamt XReg (value_regs_get tmp 0)) + (len_sub_shamt XReg (value_regs_get tmp 1)) + ;; + (low XReg (rv_sll (value_regs_get x 0) shamt)) + ;; high part. + (high_part1 XReg (rv_srl (value_regs_get x 0) len_sub_shamt)) + (high_part2 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) high_part1)) + ;; + (high_part3 XReg (rv_sll (value_regs_get x 1) shamt)) + (high XReg (rv_or high_part2 high_part3)) + ;; + (const64 XReg (load_u64_constant 64)) + (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127)))) + (value_regs + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 (zero_reg) low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high)))) + +;; SIMD Cases +;; We don't need to mask anything since it is done by the instruction according to SEW. + +(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (ishl x y))) + (rv_vsll_vx x (value_regs_get y 0) (unmasked) ty)) + +(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (ishl x (maybe_uextend (uimm5_from_value y))))) + (rv_vsll_vi x y (unmasked) ty)) + +;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; 8/16 bit types need a mask on the shift amount, and the LHS needs to be +;; zero extended. +(rule 0 (lower (has_type (ty_int (fits_in_16 ty)) (ushr x y))) + (if-let mask (u64_to_imm12 (shift_mask ty))) + (rv_srlw (zext x ty $I64) (rv_andi (value_regs_get y 0) mask))) + +;; Using the 32bit version of `srl` automatically masks the shift amount. +(rule 1 (lower (has_type $I32 (ushr x y))) + (rv_srlw x (value_regs_get y 0))) + +;; Similarly, the 64bit version does the right thing. +(rule 1 (lower (has_type $I64 (ushr x y))) + (rv_srl x (value_regs_get y 0))) + +;; When the RHS is known we can just encode it in the instruction. +(rule 2 (lower (has_type (ty_int (fits_in_16 ty)) (ushr x (maybe_uextend (imm12_from_value y))))) + (rv_srliw (zext x ty $I64) (imm12_and y (shift_mask ty)))) + +(rule 3 (lower (has_type $I32 (ushr x (maybe_uextend (imm12_from_value y))))) + (rv_srliw x y)) + +(rule 3 (lower (has_type $I64 (ushr x (maybe_uextend (imm12_from_value y))))) + (rv_srli x y)) + +(rule 3 (lower (has_type $I128 (ushr x y))) + (let ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) + (shamt XReg (value_regs_get tmp 0)) + (len_sub_shamt XReg (value_regs_get tmp 1)) + ;; low part. + (low_part1 XReg (rv_sll (value_regs_get x 1) len_sub_shamt)) + (low_part2 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part1)) + ;; + (low_part3 XReg (rv_srl (value_regs_get x 0) shamt)) + (low XReg (rv_or low_part2 low_part3)) + ;; + (const64 XReg (load_u64_constant 64)) + ;; + (high XReg (rv_srl (value_regs_get x 1) shamt)) + (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127)))) + (value_regs + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 (zero_reg) high)))) + +;; SIMD Cases +;; We don't need to mask or extend anything since it is done by the instruction according to SEW. + +(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (ushr x y))) + (rv_vsrl_vx x (value_regs_get y 0) (unmasked) ty)) + +(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (ushr x (maybe_uextend (uimm5_from_value y))))) + (rv_vsrl_vi x y (unmasked) ty)) + +;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; 8/16 bit types need a mask on the shift amount, and the LHS needs to be +;; zero extended. +(rule 0 (lower (has_type (ty_int (fits_in_16 ty)) (sshr x y))) + (if-let mask (u64_to_imm12 (shift_mask ty))) + (rv_sraw (sext x ty $I64) (rv_andi (value_regs_get y 0) mask))) + +;; Using the 32bit version of `sra` automatically masks the shift amount. +(rule 1 (lower (has_type $I32 (sshr x y))) + (rv_sraw x (value_regs_get y 0))) + +;; Similarly, the 64bit version does the right thing. +(rule 1 (lower (has_type $I64 (sshr x y))) + (rv_sra x (value_regs_get y 0))) + +;; When the RHS is known we can just encode it in the instruction. +(rule 2 (lower (has_type (ty_int (fits_in_16 ty)) (sshr x (maybe_uextend (imm12_from_value y))))) + (rv_sraiw (sext x ty $I64) (imm12_and y (shift_mask ty)))) + +(rule 3 (lower (has_type $I32 (sshr x (maybe_uextend (imm12_from_value y))))) + (rv_sraiw x y)) + +(rule 3 (lower (has_type $I64 (sshr x (maybe_uextend (imm12_from_value y))))) + (rv_srai x y)) + +(rule 3 (lower (has_type $I128 (sshr x y))) + (let ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) + (shamt XReg (value_regs_get tmp 0)) + (len_sub_shamt XReg (value_regs_get tmp 1)) + ;; low part. + (low_part1 XReg (rv_sll (value_regs_get x 1) len_sub_shamt)) + (low_part2 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part1)) + ;; + (low_part3 XReg (rv_srl (value_regs_get x 0) shamt)) + (low XReg (rv_or low_part2 low_part3)) + ;; + (const64 XReg (load_u64_constant 64)) + ;; + (high XReg (rv_sra (value_regs_get x 1) shamt)) + ;; + (const_neg_1 XReg (load_imm12 -1)) + ;; + (high_replacement XReg (gen_select_reg (IntCC.SignedLessThan) (value_regs_get x 1) (zero_reg) const_neg_1 (zero_reg))) + (const64 XReg (load_u64_constant 64)) + (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127)))) + (value_regs + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high_replacement high)))) + +;; SIMD Cases +;; We don't need to mask or extend anything since it is done by the instruction according to SEW. + +(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (sshr x y))) + (rv_vsra_vx x (value_regs_get y 0) (unmasked) ty)) + +(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (sshr x (maybe_uextend (uimm5_from_value y))))) + (rv_vsra_vi x y (unmasked) ty)) + + +;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty) (rotl x y))) + (lower_rotl ty (zext x ty $I64) (value_regs_get y 0))) + +(rule 1 (lower (has_type $I128 (rotl x y))) + (lower_i128_rotl x y)) + +;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty) (rotr x y))) + (lower_rotr ty (zext x ty $I64) (value_regs_get y 0))) + +(rule 1 (lower (has_type $I128 (rotr x y))) + (lower_i128_rotr x y)) + + +;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar_float ty) (fabs x))) + (rv_fabs ty x)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fabs x))) + (rv_vfabs_v x (unmasked) ty)) + +;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar_float ty) (fneg x))) + (rv_fneg ty x)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fneg x))) + (rv_vfneg_v x (unmasked) ty)) + +;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar_float ty) (fcopysign x y))) + (rv_fsgnj ty x y)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fcopysign x y))) + (rv_vfsgnj_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fcopysign x (splat y)))) + (rv_vfsgnj_vf x y (unmasked) ty)) + +;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar_float ty) (fma x y z))) + (rv_fmadd ty x y z)) + +;; (fma x y z) computes x * y + z +;; vfmacc computes vd[i] = +(vs1[i] * vs2[i]) + vd[i] +;; We need to reverse the order of the arguments + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fma x y z))) + (rv_vfmacc_vv z y x (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fma (splat x) y z))) + (rv_vfmacc_vf z y x (unmasked) ty)) + +;; vfmsac computes vd[i] = +(vs1[i] * vs2[i]) - vd[i] + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fma x y (fneg z)))) + (rv_vfmsac_vv z y x (unmasked) ty)) + +(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (fma (splat x) y (fneg z)))) + (rv_vfmsac_vf z y x (unmasked) ty)) + +;; vfnmacc computes vd[i] = -(vs1[i] * vs2[i]) - vd[i] + +(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg x) y (fneg z)))) + (rv_vfnmacc_vv z y x (unmasked) ty)) + +(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg (splat x)) y (fneg z)))) + (rv_vfnmacc_vf z y x (unmasked) ty)) + +;; vfnmsac computes vd[i] = -(vs1[i] * vs2[i]) + vd[i] + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg x) y z))) + (rv_vfnmsac_vv z y x (unmasked) ty)) + +(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg (splat x)) y z))) + (rv_vfnmsac_vf z y x (unmasked) ty)) + + +;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar_float ty) (sqrt x))) + (rv_fsqrt ty x)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (sqrt x))) + (rv_vfsqrt_v x (unmasked) ty)) + +;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule -1 + ;; + (lower + (has_type (valid_atomic_transaction ty) (atomic_rmw flags op addr x))) + (gen_atomic (get_atomic_rmw_op ty op) addr x (atomic_amo))) + +;;; for I8 and I16 +(rule 1 + (lower + (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags op addr x))) + (gen_atomic_rmw_loop op ty addr x)) + +;;;special for I8 and I16 max min etc. +;;;because I need uextend or sextend the value. +(rule 2 + (lower + (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $true) addr x))) + (gen_atomic_rmw_loop op ty addr (sext x ty $I64))) + + +(rule 2 + ;; + (lower + (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $false) addr x))) + ;; + (gen_atomic_rmw_loop op ty addr (zext x ty $I64))) + +;;;;; Rules for `AtomicRmwOp.Sub` +(rule + (lower + (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Sub) addr x))) + (let + ((tmp WritableReg (temp_writable_reg ty)) + (x2 Reg (rv_neg x))) + (gen_atomic (get_atomic_rmw_op ty (AtomicRmwOp.Add)) addr x2 (atomic_amo)))) + +(decl gen_atomic_rmw_loop (AtomicRmwOp Type XReg XReg) XReg) +(rule + (gen_atomic_rmw_loop op ty addr x) + (let + ((dst WritableXReg (temp_writable_xreg)) + (t0 WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.AtomicRmwLoop (gen_atomic_offset addr ty) op dst ty (gen_atomic_p addr ty) x t0)))) + (writable_reg_to_reg dst))) + +;;;;; Rules for `AtomicRmwOp.Nand` +(rule + (lower + (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Nand) addr x))) + (gen_atomic_rmw_loop (AtomicRmwOp.Nand) ty addr x)) + +(decl is_atomic_rmw_max_etc (AtomicRmwOp bool) AtomicRmwOp) +(extern extractor is_atomic_rmw_max_etc is_atomic_rmw_max_etc) + +;;;;; Rules for `atomic load`;;;;;;;;;;;;;;;;; +(rule + (lower (has_type (valid_atomic_transaction ty) (atomic_load flags p))) + (gen_atomic_load p ty)) + + +;;;;; Rules for `atomic store`;;;;;;;;;;;;;;;;; +(rule + (lower (atomic_store flags src @ (value_type (valid_atomic_transaction ty)) p)) + (gen_atomic_store p ty src)) + +(decl gen_atomic_offset (XReg Type) XReg) +(rule 1 (gen_atomic_offset p (fits_in_16 ty)) + (rv_slli (rv_andi p (imm12_const 3)) (imm12_const 3))) + +(rule (gen_atomic_offset p _) + (zero_reg)) + +(decl gen_atomic_p (XReg Type) XReg) +(rule 1 (gen_atomic_p p (fits_in_16 ty)) + (rv_andi p (imm12_const -4))) + +(rule (gen_atomic_p p _) + p) + + +;;;;; Rules for `atomic cas`;;;;;;;;;;;;;;;;; +(rule + (lower (has_type (valid_atomic_transaction ty) (atomic_cas flags p e x))) + (let + ((t0 WritableReg (temp_writable_reg ty)) + (dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.AtomicCas (gen_atomic_offset p ty) t0 dst (zext e ty $I64) (gen_atomic_p p ty) x ty)))) + (writable_reg_to_reg dst))) + +;;;;; Rules for `ireduce`;;;;;;;;;;;;;;;;; +(rule + (lower (has_type ty (ireduce x))) + (value_regs_get x 0)) + +;;;;; Rules for `fpromote`;;;;;;;;;;;;;;;;; +(rule (lower (fpromote x)) + (rv_fcvtds x)) + +;;;;; Rules for `fdemote`;;;;;;;;;;;;;;;;;; +(rule (lower (fdemote x)) + (rv_fcvtsd x)) + + +;;;;; Rules for for float arithmetic + + +;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_scalar_float ty) (fadd x y))) + (rv_fadd ty x y)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fadd x y))) + (rv_vfadd_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fadd x (splat y)))) + (rv_vfadd_vf x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fadd (splat x) y))) + (rv_vfadd_vf y x (unmasked) ty)) + + +;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar_float ty) (fsub x y))) + (rv_fsub ty x y)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fsub x y))) + (rv_vfsub_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fsub x (splat y)))) + (rv_vfsub_vf x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fsub (splat x) y))) + (rv_vfrsub_vf y x (unmasked) ty)) + +;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar_float ty) (fmul x y))) + (rv_fmul ty x y)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmul x y))) + (rv_vfmul_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fmul x (splat y)))) + (rv_vfmul_vf x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fmul (splat x) y))) + (rv_vfmul_vf y x (unmasked) ty)) + + +;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar_float ty) (fdiv x y))) + (rv_fdiv ty x y)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fdiv x y))) + (rv_vfdiv_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fdiv x (splat y)))) + (rv_vfdiv_vf x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fdiv (splat x) y))) + (rv_vfrdiv_vf y x (unmasked) ty)) + +;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_scalar_float ty) (fmin x y))) + (gen_float_select (FloatSelectOP.Min) x y ty)) + +;; vfmin does almost the right thing, but it does not handle NaN's correctly. +;; We should return a NaN if any of the inputs is a NaN, but vfmin returns the +;; number input instead. +;; +;; TODO: We can improve this by using a masked `fmin` instruction that modifies +;; the canonical nan register. That way we could avoid the `vmerge.vv` instruction. +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmin x y))) + (let ((is_not_nan VReg (gen_fcmp_mask ty (FloatCC.Ordered) x y)) + (nan XReg (imm $I64 (canonical_nan_u64 (lane_type ty)))) + (vec_nan VReg (rv_vmv_vx nan ty)) + (min VReg (rv_vfmin_vv x y (unmasked) ty))) + (rv_vmerge_vvm vec_nan min is_not_nan ty))) + +;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_scalar_float ty) (fmax x y))) + (gen_float_select (FloatSelectOP.Max) x y ty)) + +;; vfmax does almost the right thing, but it does not handle NaN's correctly. +;; We should return a NaN if any of the inputs is a NaN, but vfmax returns the +;; number input instead. +;; +;; TODO: We can improve this by using a masked `fmax` instruction that modifies +;; the canonical nan register. That way we could avoid the `vmerge.vv` instruction. +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmax x y))) + (let ((is_not_nan VReg (gen_fcmp_mask ty (FloatCC.Ordered) x y)) + (nan XReg (imm $I64 (canonical_nan_u64 (lane_type ty)))) + (vec_nan VReg (rv_vmv_vx nan ty)) + (max VReg (rv_vfmax_vv x y (unmasked) ty))) + (rv_vmerge_vvm vec_nan max is_not_nan ty))) + +;;;;; Rules for `stack_addr`;;;;;;;;; +(rule + (lower (stack_addr ss offset)) + (gen_stack_addr ss offset)) + +;;;;; Rules for `is_null`;;;;;;;;; + +;; Null references are represented by the constant value `0`. +(rule (lower (is_null v)) + (rv_seqz v)) + +;;;;; Rules for `is_invalid`;;;;;;;;; + +;; Invalid references are represented by the constant value `-1`. +(rule (lower (is_invalid v)) + (rv_seqz (rv_addi v (imm12_const 1)))) + +;;;;; Rules for `select`;;;;;;;;; +(rule + (lower (has_type ty (select c @ (value_type cty) x y))) + (gen_select ty (truthy_to_reg cty (normalize_cmp_value cty c (ExtendOp.Zero))) x y)) + +(rule 1 + (lower (has_type (fits_in_64 ty) (select (icmp cc a b @ (value_type (fits_in_64 in_ty))) x y))) + (let ((a XReg (truthy_to_reg in_ty (normalize_cmp_value in_ty a (intcc_to_extend_op cc)))) + (b XReg (truthy_to_reg in_ty (normalize_cmp_value in_ty b (intcc_to_extend_op cc))))) + (gen_select_reg cc a b x y))) + +;;;;; Rules for `bitselect`;;;;;;;;; + +;; Do a (c & x) | (~c & y) operation. +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (bitselect c x y))) + (let ((tmp_x XReg (rv_and c x)) + (c_inverse XReg (rv_not c)) + (tmp_y XReg (rv_and c_inverse y))) + (rv_or tmp_x tmp_y))) + +;; For vectors, we also do the same operation. +;; We can technically use any type in the bitwise operations, but prefer +;; using the type of the inputs so that we avoid emitting unnecessary +;; `vsetvl` instructions. It's likeley that the vector unit is already +;; configured for that type. +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (bitselect c x y))) + (let ((tmp_x VReg (rv_vand_vv c x (unmasked) ty)) + (c_inverse VReg (rv_vnot_v c (unmasked) ty)) + (tmp_y VReg (rv_vand_vv c_inverse y (unmasked) ty))) + (rv_vor_vv tmp_x tmp_y (unmasked) ty))) + +;; Special case for bitselects with cmp's as an input. +;; +;; This allows us to skip the mask expansion step and use the more efficient +;; vmerge.vvm instruction. +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (bitselect (icmp cc a @ (value_type (ty_vec_fits_in_register cmp_ty)) b) x y))) + (let ((mask VReg (gen_icmp_mask cmp_ty cc a b))) + (rv_vmerge_vvm y x mask ty))) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (bitselect (fcmp cc a @ (value_type (ty_vec_fits_in_register cmp_ty)) b) x y))) + (let ((mask VReg (gen_fcmp_mask cmp_ty cc a b))) + (rv_vmerge_vvm y x mask ty))) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (bitselect (bitcast _ (fcmp cc a @ (value_type (ty_vec_fits_in_register cmp_ty)) b)) x y))) + (let ((mask VReg (gen_fcmp_mask cmp_ty cc a b))) + (rv_vmerge_vvm y x mask ty))) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (bitselect (bitcast _ (icmp cc a @ (value_type (ty_vec_fits_in_register cmp_ty)) b)) x y))) + (let ((mask VReg (gen_icmp_mask cmp_ty cc a b))) + (rv_vmerge_vvm y x mask ty))) + + +;;;;; Rules for `isplit`;;;;;;;;; +(rule + (lower (isplit x)) + (let + ((t1 XReg (value_regs_get x 0)) + (t2 XReg (value_regs_get x 1))) + (output_pair t1 t2))) + +;;;;; Rules for `iconcat`;;;;;;;;; +(rule + (lower (has_type $I128 (iconcat x y))) + (let + ((t1 XReg x) + (t2 XReg y)) + (value_regs t1 t2))) + + +;;;;; Rules for `smax`;;;;;;;;; + +(rule 0 (lower (has_type (ty_int ty) (smax x y))) + (gen_int_select ty (IntSelectOP.Smax) (ext_int_if_need $true x ty) (ext_int_if_need $true y ty))) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (smax x y))) + (rv_vmax_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (smax x (splat y)))) + (rv_vmax_vx x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (smax (splat x) y))) + (rv_vmax_vx y x (unmasked) ty)) + +;;;;; Rules for `smin`;;;;;;;;; + +(rule 0 (lower (has_type (ty_int ty) (smin x y))) + (gen_int_select ty (IntSelectOP.Smin) (ext_int_if_need $true x ty) (ext_int_if_need $true y ty))) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (smin x y))) + (rv_vmin_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (smin x (splat y)))) + (rv_vmin_vx x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (smin (splat x) y))) + (rv_vmin_vx y x (unmasked) ty)) + +;;;;; Rules for `umax`;;;;;;;;; + +(rule 0 (lower (has_type (ty_int ty) (umax x y))) + (gen_int_select ty (IntSelectOP.Umax) (ext_int_if_need $false x ty) (ext_int_if_need $false y ty))) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (umax x y))) + (rv_vmaxu_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (umax x (splat y)))) + (rv_vmaxu_vx x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (umax (splat x) y))) + (rv_vmaxu_vx y x (unmasked) ty)) + +;;;;; Rules for `umin`;;;;;;;;; + +(rule 0 (lower (has_type (ty_int ty) (umin x y))) + (gen_int_select ty (IntSelectOP.Umin) (ext_int_if_need $false x ty) (ext_int_if_need $false y ty))) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (umin x y))) + (rv_vminu_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (umin x (splat y)))) + (rv_vminu_vx x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (umin (splat x) y))) + (rv_vminu_vx y x (unmasked) ty)) + + +;;;;; Rules for `debugtrap`;;;;;;;;; +(rule + (lower (debugtrap)) + (side_effect (SideEffectNoResult.Inst (MInst.EBreak)))) + +;;;;; Rules for `fence`;;;;;;;;; +(rule + (lower (fence)) + (side_effect (SideEffectNoResult.Inst (MInst.Fence 15 15)))) + +;;;;; Rules for `trap`;;;;;;;;; +(rule + (lower (trap code)) + (udf code)) + +;;;;; Rules for `resumable_trap`;;;;;;;;; +(rule + (lower (resumable_trap code)) + (udf code)) + +;;;;; Rules for `uload8`;;;;;;;;; +(rule + (lower (uload8 flags p @ (value_type (ty_addr64 _)) offset)) + (gen_load p offset (int_load_op $false 8) flags $I64)) +;;;;; Rules for `sload8`;;;;;;;;; +(rule + (lower (sload8 flags p @ (value_type (ty_addr64 _)) offset)) + (gen_load p offset (int_load_op $true 8) flags $I64)) +;;;;; Rules for `uload16`;;;;;;;;; +(rule + (lower (uload16 flags p @ (value_type (ty_addr64 _)) offset)) + (gen_load p offset (int_load_op $false 16) flags $I64)) + +;;;;; Rules for `iload16`;;;;;;;;; +(rule + (lower (sload16 flags p @ (value_type (ty_addr64 _)) offset)) + (gen_load p offset (int_load_op $true 16) flags $I64)) + +;;;;; Rules for `uload32`;;;;;;;;; +(rule + (lower (uload32 flags p @ (value_type (ty_addr64 _)) offset)) + (gen_load p offset (int_load_op $false 32) flags $I64)) + +;;;;; Rules for `iload32`;;;;;;;;; +(rule + (lower (sload32 flags p @ (value_type (ty_addr64 _)) offset)) + (gen_load p offset (int_load_op $true 32) flags $I64)) + +(rule + (lower (has_type ty (load flags p @ (value_type (ty_addr32 _)) offset))) + (gen_load p offset (load_op ty) flags ty) +) +;;;; for I128 +(rule 1 + (lower (has_type $I128 (load flags p @ (value_type (ty_addr64 _)) offset))) + (gen_load_128 p offset flags)) + +(rule 2 + (lower (has_type (ty_vec_fits_in_register ty) (load flags p @ (value_type (ty_addr64 _)) offset))) + (let ((eew VecElementWidth (element_width_from_type ty))) + (vec_load eew (VecAMode.UnitStride (gen_amode p offset $I64)) flags (unmasked) ty))) + +;;;;; Rules for Load + Extend Combos ;;;;;;;;; + +;; These rules cover the special loads that load a 64bit value and do some sort of extension. +;; We don't have any special instructions to do this, so just load the 64 bits as a vector, and +;; do a SEW/2 extension. This only reads half width elements from the source vector register +;; extends it, and writes the back the full register. + +(decl gen_load64_extend (Type ExtendOp MemFlags XReg Offset32) VReg) + +(rule (gen_load64_extend ty (ExtendOp.Signed) flags addr offset) + (let ((eew VecElementWidth (element_width_from_type $I64)) + (load_state VState (vstate_from_type $I64)) + (loaded VReg (vec_load eew (VecAMode.UnitStride (gen_amode addr offset $I64)) flags (unmasked) load_state))) + (rv_vsext_vf2 loaded (unmasked) ty))) + +(rule (gen_load64_extend ty (ExtendOp.Zero) flags addr offset) + (let ((eew VecElementWidth (element_width_from_type $I64)) + (load_state VState (vstate_from_type $I64)) + (loaded VReg (vec_load eew (VecAMode.UnitStride (gen_amode addr offset $I64)) flags (unmasked) load_state))) + (rv_vzext_vf2 loaded (unmasked) ty))) + +;;;;; Rules for `uload8x8`;;;;;;;;;; +(rule (lower (has_type (ty_vec_fits_in_register ty @ $I16X8) (uload8x8 flags addr @ (value_type (ty_addr64 _)) offset))) + (gen_load64_extend ty (ExtendOp.Zero) flags addr offset)) + +;;;;; Rules for `uload16x4`;;;;;;;;; +(rule (lower (has_type (ty_vec_fits_in_register ty @ $I32X4) (uload16x4 flags addr @ (value_type (ty_addr64 _)) offset))) + (gen_load64_extend ty (ExtendOp.Zero) flags addr offset)) + +;;;;; Rules for `uload32x2`;;;;;;;;; +(rule (lower (has_type (ty_vec_fits_in_register ty @ $I64X2) (uload32x2 flags addr @ (value_type (ty_addr64 _)) offset))) + (gen_load64_extend ty (ExtendOp.Zero) flags addr offset)) + +;;;;; Rules for `sload8x8`;;;;;;;;;; +(rule (lower (has_type (ty_vec_fits_in_register ty @ $I16X8) (sload8x8 flags addr @ (value_type (ty_addr64 _)) offset))) + (gen_load64_extend ty (ExtendOp.Signed) flags addr offset)) + +;;;;; Rules for `sload16x4`;;;;;;;;; +(rule (lower (has_type (ty_vec_fits_in_register ty @ $I32X4) (sload16x4 flags addr @ (value_type (ty_addr64 _)) offset))) + (gen_load64_extend ty (ExtendOp.Signed) flags addr offset)) + +;;;;; Rules for `sload32x2`;;;;;;;;; +(rule (lower (has_type (ty_vec_fits_in_register ty @ $I64X2) (sload32x2 flags addr @ (value_type (ty_addr64 _)) offset))) + (gen_load64_extend ty (ExtendOp.Signed) flags addr offset)) + +;;;;; Rules for `istore8`;;;;;;;;; +(rule + (lower (istore8 flags x p @ (value_type (ty_addr64 _)) offset)) + (gen_store p offset (StoreOP.Sb) flags x)) +;;;;; Rules for `istore16`;;;;;;;;; +(rule + (lower (istore16 flags x p @ (value_type (ty_addr64 _)) offset)) + (gen_store p offset (StoreOP.Sh) flags x)) + +;;;;; Rules for `istore32`;;;;;;;;; +(rule + (lower (istore32 flags x p @ (value_type (ty_addr64 _)) offset)) + (gen_store p offset (StoreOP.Sw) flags x)) + +;;;;; Rules for `store`;;;;;;;;; +(rule + (lower (store flags x @ (value_type ty) p @ (value_type (ty_addr32 _)) offset)) + (gen_store p offset (store_op ty) flags x)) + +;;; special for I128 +(rule 1 + (lower (store flags x @ (value_type $I128 ) p @ (value_type (ty_addr64 _)) offset)) + (gen_store_128 p offset flags x)) + +(rule 2 + (lower (store flags x @ (value_type (ty_vec_fits_in_register ty)) p @ (value_type (ty_addr64 _)) offset)) + (let ((eew VecElementWidth (element_width_from_type ty))) + (vec_store eew (VecAMode.UnitStride (gen_amode p offset $I64)) x flags (unmasked) ty))) + +(decl gen_icmp (IntCC ValueRegs ValueRegs Type) XReg) +(rule + (gen_icmp cc x y ty) + (let + ((result WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Icmp cc result x y ty)))) + result)) + +;;;;; Rules for `icmp`;;;;;;;;; +(rule 0 (lower (icmp cc x @ (value_type (ty_int ty)) y)) + (lower_icmp cc x y ty)) + +(rule 1 (lower (icmp cc x @ (value_type (ty_vec_fits_in_register ty)) y)) + (gen_expand_mask ty (gen_icmp_mask ty cc x y))) + + +;;;;; Rules for `fcmp`;;;;;;;;; +(rule 0 (lower (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) + (cmp_value (emit_fcmp cc ty x y))) + +(rule 1 (lower (fcmp cc x @ (value_type (ty_vec_fits_in_register ty)) y)) + (gen_expand_mask ty (gen_fcmp_mask ty cc x y))) + +;;;;; Rules for `func_addr`;;;;;;;;; +(rule + (lower (func_addr (func_ref_data _ name _))) + (load_ext_name name 0)) + +;;;;; Rules for `fcvt_to_uint`;;;;;;;;; +(rule + (lower (has_type to (fcvt_to_uint v @ (value_type from)))) + (gen_fcvt_int $false v $false from to)) + +;;;;; Rules for `fcvt_to_sint`;;;;;;;;; +(rule + (lower (has_type to (fcvt_to_sint v @ (value_type from)))) + (gen_fcvt_int $false v $true from to)) + +;;;;; Rules for `fcvt_to_sint_sat`;;;;;;;;; +(rule + (lower (has_type to (fcvt_to_sint_sat v @ (value_type from)))) + (gen_fcvt_int $true v $true from to)) + +;;;;; Rules for `fcvt_to_uint_sat`;;;;;;;;; +(rule + (lower (has_type to (fcvt_to_uint_sat v @ (value_type from)))) + (gen_fcvt_int $true v $false from to)) + +;;;;; Rules for `fcvt_from_sint`;;;;;;;;; +(rule + (lower (has_type to (fcvt_from_sint v @ (value_type from_ty)))) + (let ((float_op FpuOPRR (int_convert_2_float_op from_ty $true to)) + (value XReg (normalize_fcvt_from_int v from_ty (ExtendOp.Signed)))) + (fpu_rr float_op to value))) + +;;;;; Rules for `fcvt_from_uint`;;;;;;;;; +(rule + (lower (has_type to (fcvt_from_uint v @ (value_type from_ty)))) + (let ((float_op FpuOPRR (int_convert_2_float_op from_ty $false to)) + (value XReg (normalize_fcvt_from_int v from_ty (ExtendOp.Zero)))) + (fpu_rr float_op to value))) + +;;;;; Rules for `symbol_value`;;;;;;;;; +(rule + (lower (symbol_value (symbol_value_data name _ offset))) + (load_ext_name name offset) +) +;;;;; Rules for `bitcast`;;;;;;;;; +(rule + (lower (has_type out_ty (bitcast _ v @ (value_type in_ty)))) + (gen_bitcast v in_ty out_ty)) + +;;;;; Rules for `ceil`;;;;;;;;; +(rule + (lower (has_type ty (ceil x))) + (gen_float_round (FloatRoundOP.Ceil) x ty) +) + +;;;;; Rules for `floor`;;;;;;;;; +(rule + (lower (has_type ty (floor x))) + (gen_float_round (FloatRoundOP.Floor) x ty)) +;;;;; Rules for `trunc`;;;;;;;;; +(rule + (lower (has_type ty (trunc x))) + (gen_float_round (FloatRoundOP.Trunc) x ty)) + +;;;;; Rules for `nearest`;;;;;;;;; +(rule + (lower (has_type ty (nearest x))) + (gen_float_round (FloatRoundOP.Nearest) x ty)) + + +;;;;; Rules for `select_spectre_guard`;;;;;;;;; + +;; SelectSpectreGuard is equivalent to Select, but we should not use a branch based +;; lowering for it. Instead we use a conditional move based lowering. +;; +;; We don't have cmov's in RISC-V either, but we can emulate those using bitwise +;; operations, which is what we do below. +(rule (lower (has_type ty (select_spectre_guard cmp @ (value_type cmp_ty) x @ (value_type arg_ty) y))) + (let (;; Build a mask that is 0 or -1 depending on the input comparision value. + ;; `lower_bmask` handles normalizing the input. + (mask ValueRegs (lower_bmask arg_ty cmp_ty cmp)) + ;; Using the mask above we can select either `x` or `y` by + ;; performing a bitwise `and` on both sides and then merging them + ;; together. We know that only the bits of one of the sides will be selected. + ;; TODO: We can use `andn` here if we have `Zbb` + (lhs ValueRegs (gen_and arg_ty x mask)) + (rhs ValueRegs (gen_and arg_ty y (gen_bnot arg_ty mask)))) + (gen_or arg_ty lhs rhs))) + +;;;;; Rules for `bmask`;;;;;;;;; +(rule + (lower (has_type oty (bmask x @ (value_type ity)))) + (lower_bmask oty ity x)) + +;; N.B.: the Ret itself is generated by the ABI. +(rule (lower (return args)) + (lower_return args)) + +;;; Rules for `get_{frame,stack}_pointer` and `get_return_address` ;;;;;;;;;;;;; + +(rule (lower (get_frame_pointer)) + (gen_mov_from_preg (fp_reg))) + +(rule (lower (get_stack_pointer)) + (gen_mov_from_preg (sp_reg))) + +(rule (lower (get_return_address)) + (load_ra)) + +;;; Rules for `iabs` ;;;;;;;;;;;;; + +;; I64 and lower +;; Generate the following code: +;; sext.{b,h,w} a0, a0 +;; neg a1, a0 +;; max a0, a0, a1 +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (iabs x))) + (let ((extended XReg (sext x ty $I64)) + (negated XReg (rv_neg extended))) + (max $I64 extended negated))) + +;; For vectors we generate the same code, but with vector instructions +;; we can skip the sign extension, since the vector unit will only process +;; Element Sized chunks. +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (iabs x))) + (let ((negated VReg (rv_vneg_v x (unmasked) ty))) + (rv_vmax_vv x negated (unmasked) ty))) + +;;;; Rules for calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (call (func_ref_data sig_ref extname dist) inputs)) + (gen_call sig_ref extname dist inputs)) + +(rule (lower (call_indirect sig_ref val inputs)) + (gen_call_indirect sig_ref val inputs)) + +;;;; Rules for `return_call` and `return_call_indirect` ;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (return_call (func_ref_data sig_ref extname dist) args)) + (gen_return_call sig_ref extname dist args)) + +(rule (lower (return_call_indirect sig_ref callee args)) + (gen_return_call_indirect sig_ref callee args)) + + +;;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (extractlane x @ (value_type ty) (u8_from_uimm8 idx))) + (gen_extractlane ty x idx)) + +;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; We can insert a lane by using a masked splat from an X register. +;; Build a mask that is only enabled in the lane we want to insert. +;; Then use a masked splat (vmerge) to insert the value. +(rule 0 (lower (insertlane vec @ (value_type (ty_vec_fits_in_register ty)) + val @ (value_type (ty_int _)) + (u8_from_uimm8 lane))) + (let ((mask VReg (gen_vec_mask (u64_shl 1 lane)))) + (rv_vmerge_vxm vec val mask ty))) + +;; Similar to above, but using the float variants of the instructions. +(rule 1 (lower (insertlane vec @ (value_type (ty_vec_fits_in_register ty)) + val @ (value_type (ty_scalar_float _)) + (u8_from_uimm8 lane))) + (let ((mask VReg (gen_vec_mask (u64_shl 1 lane)))) + (rv_vfmerge_vfm vec val mask ty))) + +;; If we are inserting from an Imm5 const we can use the immediate +;; variant of vmerge. +(rule 2 (lower (insertlane vec @ (value_type (ty_vec_fits_in_register ty)) + (iconst (u64_from_imm64 (imm5_from_u64 imm))) + (u8_from_uimm8 lane))) + (let ((mask VReg (gen_vec_mask (u64_shl 1 lane)))) + (rv_vmerge_vim vec imm mask ty))) + +;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type ty (splat n @ (value_type (ty_scalar_float _))))) + (rv_vfmv_vf n ty)) + +(rule 1 (lower (has_type ty (splat n @ (value_type (ty_int_ref_scalar_64 _))))) + (rv_vmv_vx n ty)) + +(rule 2 (lower (has_type ty (splat (iconst (u64_from_imm64 (imm5_from_u64 imm)))))) + (rv_vmv_vi imm ty)) + +;; TODO: We can splat out more patterns by using for example a vmv.v.i i8x16 for +;; a i64x2 const with a compatible bit pattern. The AArch64 Backend does something +;; similar in its splat rules. +;; TODO: Look through bitcasts when splatting out registers. We can use +;; `vmv.v.x` in a `(splat.f32x4 (bitcast.f32 val))`. And vice versa for integers. + +;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat x y))) + (rv_vsaddu_vv x y (unmasked) ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat x (splat y)))) + (rv_vsaddu_vx x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat (splat x) y))) + (rv_vsaddu_vx y x (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat x (replicated_imm5 y)))) + (rv_vsaddu_vi x y (unmasked) ty)) + +(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat (replicated_imm5 x) y))) + (rv_vsaddu_vi y x (unmasked) ty)) + +;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat x y))) + (rv_vsadd_vv x y (unmasked) ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat x (splat y)))) + (rv_vsadd_vx x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat (splat x) y))) + (rv_vsadd_vx y x (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat x (replicated_imm5 y)))) + (rv_vsadd_vi x y (unmasked) ty)) + +(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat (replicated_imm5 x) y))) + (rv_vsadd_vi y x (unmasked) ty)) + +;;;; Rules for `usub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (usub_sat x y))) + (rv_vssubu_vv x y (unmasked) ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (usub_sat x (splat y)))) + (rv_vssubu_vx x y (unmasked) ty)) + +;;;; Rules for `ssub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (ssub_sat x y))) + (rv_vssub_vv x y (unmasked) ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (ssub_sat x (splat y)))) + (rv_vssub_vx x y (unmasked) ty)) + +;;;; Rules for `vall_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Here we do a Vector Reduce operation. Get the unsigned minimum value of any +;; lane in the vector. The fixed input to the reduce operation is a 1. +;; This way, if any lane is 0, the result will be 0. Otherwise, the result will +;; be a 1. +;; The reduce operation leaves the result in the lowest lane, we then move it +;; into the destination X register. +(rule (lower (vall_true x @ (value_type (ty_vec_fits_in_register ty)))) + (if-let one (imm5_from_i8 1)) + ;; We don't need to broadcast the immediate into all lanes, only into lane 0. + ;; I did it this way since it uses one less instruction than with a vmv.s.x. + (let ((fixed VReg (rv_vmv_vi one ty)) + (min VReg (rv_vredminu_vs x fixed (unmasked) ty))) + (rv_vmv_xs min ty))) + + +;;;; Rules for `vany_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Here we do a Vector Reduce operation. Get the unsigned maximum value of the +;; input vector register. Move the max to an X register, and do a `snez` on it +;; to ensure its either 1 or 0. +(rule (lower (vany_true x @ (value_type (ty_vec_fits_in_register ty)))) + (let ((max VReg (rv_vredmaxu_vs x x (unmasked) ty)) + (x_max XReg (rv_vmv_xs max ty))) + (rv_snez x_max))) + + +;;;; Rules for `vhigh_bits` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; To check if the MSB of a lane is set, we do a `vmslt` with zero, this sets +;; the mask bit to 1 if the value is negative (MSB 1) and 0 if not. We can then +;; just move that mask to an X Register. +;; +;; We must ensure that the move to the X register has a SEW with enough bits +;; to hold the full mask. Additionally, in some cases (e.g. i64x2) we are going +;; to read some tail bits. These are undefined, so we need to further mask them +;; off. +(rule (lower (vhigh_bits x @ (value_type (ty_vec_fits_in_register ty)))) + (let ((mask VReg (rv_vmslt_vx x (zero_reg) (unmasked) ty)) + ;; Here we only need I64X1, but emit an AVL of 2 since it + ;; saves one vector state change in the case of I64X2. + ;; + ;; TODO: For types that have more lanes than element bits, we can + ;; use the original type as a VState and avoid a state change. + (x_mask XReg (rv_vmv_xs mask (vstate_from_type $I64X2)))) + (gen_andi x_mask (ty_lane_mask ty)))) + +;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (swizzle x y))) + (rv_vrgather_vv x y (unmasked) ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (swizzle x (splat y)))) + (rv_vrgather_vx x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (swizzle x (replicated_uimm5 y)))) + (rv_vrgather_vi x y (unmasked) ty)) + +;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Use a vrgather to load all 0-15 lanes from x. And then modify the mask to load all +;; 16-31 lanes from y. Finally, use a vor to combine the two vectors. +;; +;; vrgather will insert a 0 for lanes that are out of bounds, so we can let it load +;; negative and out of bounds indexes. +(rule (lower (has_type (ty_vec_fits_in_register ty @ $I8X16) (shuffle x y (vconst_from_immediate mask)))) + (if-let neg16 (imm5_from_i8 -16)) + (let ((x_mask VReg (gen_constant ty mask)) + (x_lanes VReg (rv_vrgather_vv x x_mask (unmasked) ty)) + (y_mask VReg (rv_vadd_vi x_mask neg16 (unmasked) ty)) + (y_lanes VReg (rv_vrgather_vv y y_mask (unmasked) ty))) + (rv_vor_vv x_lanes y_lanes (unmasked) ty))) + +;;;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Slide down half the vector, and do a signed extension. +(rule 0 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_high x @ (value_type in_ty)))) + (rv_vsext_vf2 (gen_slidedown_half in_ty x) (unmasked) out_ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_high (swiden_high x @ (value_type in_ty))))) + (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty))) + (rv_vsext_vf4 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_high (swiden_high (swiden_high x @ (value_type in_ty)))))) + (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty))) + (rv_vsext_vf8 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty)) + +;;;; Rules for `uwiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Slide down half the vector, and do a zero extension. +(rule 0 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_high x @ (value_type in_ty)))) + (rv_vzext_vf2 (gen_slidedown_half in_ty x) (unmasked) out_ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_high (uwiden_high x @ (value_type in_ty))))) + (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty))) + (rv_vzext_vf4 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_high (uwiden_high (uwiden_high x @ (value_type in_ty)))))) + (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty))) + (rv_vzext_vf8 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty)) + +;;;; Rules for `swiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_low x))) + (rv_vsext_vf2 x (unmasked) out_ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_low (swiden_low x)))) + (rv_vsext_vf4 x (unmasked) out_ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_low (swiden_low (swiden_low x))))) + (rv_vsext_vf8 x (unmasked) out_ty)) + +;;;; Rules for `uwiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_low x))) + (rv_vzext_vf2 x (unmasked) out_ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_low (uwiden_low x)))) + (rv_vzext_vf4 x (unmasked) out_ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_low (uwiden_low (uwiden_low x))))) + (rv_vzext_vf8 x (unmasked) out_ty)) + +;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; We don't have a dedicated instruction for this, rearrange the register elements +;; and use a vadd. +;; +;; We do this by building two masks, one for the even elements and one for the odd +;; elements. Using vcompress we can extract the elements and group them together. +;; +;; This is likely not the optimal way of doing this. LLVM does this using a bunch +;; of vrgathers (See: https://godbolt.org/z/jq8Wj8WG4), that doesen't seem to be +;; too much better than this. +;; +;; However V8 does something better. They use 2 vcompresses using LMUL2, that means +;; that they can do the whole thing in 3 instructions (2 vcompress + vadd). We don't +;; support LMUL > 1, so we can't do that. +(rule (lower (has_type (ty_vec_fits_in_register ty) (iadd_pairwise x y))) + (if-let half_size (u64_to_uimm5 (u64_udiv (ty_lane_count ty) 2))) + (let ((odd_mask VReg (gen_vec_mask 0x5555555555555555)) + (lhs_lo VReg (rv_vcompress_vm x odd_mask ty)) + (lhs_hi VReg (rv_vcompress_vm y odd_mask ty)) + (lhs VReg (rv_vslideup_vvi lhs_lo lhs_hi half_size (unmasked) ty)) + + (even_mask VReg (gen_vec_mask 0xAAAAAAAAAAAAAAAA)) + (rhs_lo VReg (rv_vcompress_vm x even_mask ty)) + (rhs_hi VReg (rv_vcompress_vm y even_mask ty)) + (rhs VReg (rv_vslideup_vvi rhs_lo rhs_hi half_size (unmasked) ty))) + (rv_vadd_vv lhs rhs (unmasked) ty))) + +;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; `avg_round` computes the unsigned average with rounding: a := (x + y + 1) // 2 +;; +;; See Section "2–5 Average of Two Integers" of the Hacker's Delight book +;; +;; The floor average of two integers without overflow can be computed as: +;; t = (x & y) + ((x ^ y) >> 1) +;; +;; The right shift should be a logical shift if the integers are unsigned. +;; +;; We are however interested in the ceiling average (x + y + 1). For that +;; we use a special rounding mode in the right shift instruction. +;; +;; For the right shift instruction we use `vssrl` which is a Scaling Shift +;; Right Logical instruction using the `vxrm` fixed-point rouding mode. The +;; default rounding mode is `rnu` (round-to-nearest-up (add +0.5 LSB)). +;; Which is coincidentally the rounding mode we want for `avg_round`. +(rule (lower (has_type (ty_vec_fits_in_register ty) (avg_round x y))) + (if-let one (u64_to_uimm5 1)) + (let ((lhs VReg (rv_vand_vv x y (unmasked) ty)) + (xor VReg (rv_vxor_vv x y (unmasked) ty)) + (rhs VReg (rv_vssrl_vi xor one (unmasked) ty))) + (rv_vadd_vv lhs rhs (unmasked) ty))) + +;;;; Rules for `scalar_to_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (scalar_to_vector x))) + (if (ty_vector_not_float ty)) + (let ((zero VReg (rv_vmv_vx (zero_reg) ty)) + (mask VReg (gen_vec_mask 1))) + (rv_vmerge_vxm zero x mask ty))) + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (scalar_to_vector x))) + (if (ty_vector_float ty)) + (let ((zero VReg (rv_vmv_vx (zero_reg) ty)) + (elem VReg (rv_vfmv_sf x ty)) + (mask VReg (gen_vec_mask 1))) + (rv_vmerge_vvm zero elem mask ty))) + +;;;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (sqmul_round_sat x y))) + (rv_vsmul_vv x y (unmasked) ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (sqmul_round_sat x (splat y)))) + (rv_vsmul_vx x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (sqmul_round_sat (splat x) y))) + (rv_vsmul_vx y x (unmasked) ty)) + +;;;; Rules for `snarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (ty_vec_fits_in_register out_ty) (snarrow x @ (value_type in_ty) y))) + (if-let lane_diff (u64_to_uimm5 (u64_udiv (ty_lane_count out_ty) 2))) + (if-let zero (u64_to_uimm5 0)) + (let ((x_clip VReg (rv_vnclip_wi x zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty)))) + (y_clip VReg (rv_vnclip_wi y zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty))))) + (rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty))) + +;;;; Rules for `uunarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (ty_vec_fits_in_register out_ty) (uunarrow x @ (value_type in_ty) y))) + (if-let lane_diff (u64_to_uimm5 (u64_udiv (ty_lane_count out_ty) 2))) + (if-let zero (u64_to_uimm5 0)) + (let ((x_clip VReg (rv_vnclipu_wi x zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty)))) + (y_clip VReg (rv_vnclipu_wi y zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty))))) + (rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty))) + +;;;; Rules for `unarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; We don't have a instruction that saturates a signed source into an unsigned destination. +;; To correct for this we just remove negative values using `vmax` and then use the normal +;; unsigned to unsigned narrowing instruction. + +(rule (lower (has_type (ty_vec_fits_in_register out_ty) (unarrow x @ (value_type in_ty) y))) + (if-let lane_diff (u64_to_uimm5 (u64_udiv (ty_lane_count out_ty) 2))) + (if-let zero (u64_to_uimm5 0)) + (let ((x_pos VReg (rv_vmax_vx x (zero_reg) (unmasked) in_ty)) + (y_pos VReg (rv_vmax_vx y (zero_reg) (unmasked) in_ty)) + (x_clip VReg (rv_vnclipu_wi x_pos zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty)))) + (y_clip VReg (rv_vnclipu_wi y_pos zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty))))) + (rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty))) diff --git a/cranelift/codegen/src/isa/zkasm/lower.rs b/cranelift/codegen/src/isa/zkasm/lower.rs new file mode 100644 index 000000000000..384fba864596 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/lower.rs @@ -0,0 +1,33 @@ +//! Lowering rules for Riscv64. +use crate::ir::Inst as IRInst; +use crate::isa::zkasm::inst::*; +use crate::isa::zkasm::Riscv64Backend; +use crate::machinst::lower::*; +use crate::machinst::*; +pub mod isle; + +//============================================================================= +// Lowering-backend trait implementation. + +impl LowerBackend for Riscv64Backend { + type MInst = Inst; + + fn lower(&self, ctx: &mut Lower, ir_inst: IRInst) -> Option { + isle::lower(ctx, self, ir_inst) + } + + fn lower_branch( + &self, + ctx: &mut Lower, + ir_inst: IRInst, + targets: &[MachLabel], + ) -> Option<()> { + isle::lower_branch(ctx, self, ir_inst, targets) + } + + fn maybe_pinned_reg(&self) -> Option { + // pinned register is a register that you want put anything in it. + // right now zkasm not support this feature. + None + } +} diff --git a/cranelift/codegen/src/isa/zkasm/lower/isle.rs b/cranelift/codegen/src/isa/zkasm/lower/isle.rs new file mode 100644 index 000000000000..cd8cab87b477 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/lower/isle.rs @@ -0,0 +1,623 @@ +//! ISLE integration glue code for zkasm lowering. + +// Pull in the ISLE generated code. +#[allow(unused)] +pub mod generated_code; +use generated_code::{Context, ExtendOp, MInst}; + +// Types that the generated ISLE code uses via `use super::*`. +use self::generated_code::{VecAluOpRR, VecLmul}; +use super::{writable_zero_reg, zero_reg}; +use crate::isa::zkasm::abi::Riscv64ABICallSite; +use crate::isa::zkasm::lower::args::{FReg, VReg, WritableFReg, WritableVReg, WritableXReg, XReg}; +use crate::isa::zkasm::Riscv64Backend; +use crate::machinst::Reg; +use crate::machinst::{isle::*, MachInst, SmallInstVec}; +use crate::machinst::{VCodeConstant, VCodeConstantData}; +use crate::{ + ir::{ + immediates::*, types::*, AtomicRmwOp, BlockCall, ExternalName, Inst, InstructionData, + MemFlags, StackSlot, TrapCode, Value, ValueList, + }, + isa::zkasm::inst::*, + machinst::{ArgPair, InstOutput, Lower}, +}; +use crate::{isa, isle_common_prelude_methods, isle_lower_prelude_methods}; +use regalloc2::PReg; +use std::boxed::Box; +use std::convert::TryFrom; +use std::vec::Vec; + +type BoxCallInfo = Box; +type BoxCallIndInfo = Box; +type BoxReturnCallInfo = Box; +type BoxExternalName = Box; +type VecMachLabel = Vec; +type VecArgPair = Vec; +use crate::machinst::valueregs; + +pub(crate) struct RV64IsleContext<'a, 'b, I, B> +where + I: VCodeInst, + B: LowerBackend, +{ + pub lower_ctx: &'a mut Lower<'b, I>, + pub backend: &'a B, + /// Precalucated value for the minimum vector register size. Will be 0 if + /// vectors are not supported. + min_vec_reg_size: u64, +} + +impl<'a, 'b> RV64IsleContext<'a, 'b, MInst, Riscv64Backend> { + isle_prelude_method_helpers!(Riscv64ABICallSite); + + fn new(lower_ctx: &'a mut Lower<'b, MInst>, backend: &'a Riscv64Backend) -> Self { + Self { + lower_ctx, + backend, + min_vec_reg_size: backend.isa_flags.min_vec_reg_size(), + } + } + + #[inline] + fn emit_list(&mut self, list: &SmallInstVec) { + for i in list { + self.lower_ctx.emit(i.clone()); + } + } +} + +impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> { + isle_lower_prelude_methods!(); + isle_prelude_caller_methods!(Riscv64MachineDeps, Riscv64ABICallSite); + + fn gen_return_call( + &mut self, + callee_sig: SigRef, + callee: ExternalName, + distance: RelocDistance, + args: ValueSlice, + ) -> InstOutput { + let caller_conv = isa::CallConv::Tail; + debug_assert_eq!( + self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()), + caller_conv, + "Can only do `return_call`s from within a `tail` calling convention function" + ); + + let call_site = Riscv64ABICallSite::from_func( + self.lower_ctx.sigs(), + callee_sig, + &callee, + distance, + caller_conv, + self.backend.flags().clone(), + ); + call_site.emit_return_call(self.lower_ctx, args); + + InstOutput::new() + } + + fn gen_return_call_indirect( + &mut self, + callee_sig: SigRef, + callee: Value, + args: ValueSlice, + ) -> InstOutput { + let caller_conv = isa::CallConv::Tail; + debug_assert_eq!( + self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()), + caller_conv, + "Can only do `return_call`s from within a `tail` calling convention function" + ); + + let callee = self.put_in_reg(callee); + + let call_site = Riscv64ABICallSite::from_ptr( + self.lower_ctx.sigs(), + callee_sig, + callee, + Opcode::ReturnCallIndirect, + caller_conv, + self.backend.flags().clone(), + ); + call_site.emit_return_call(self.lower_ctx, args); + + InstOutput::new() + } + + fn vreg_new(&mut self, r: Reg) -> VReg { + VReg::new(r).unwrap() + } + fn writable_vreg_new(&mut self, r: WritableReg) -> WritableVReg { + r.map(|wr| VReg::new(wr).unwrap()) + } + fn writable_vreg_to_vreg(&mut self, arg0: WritableVReg) -> VReg { + arg0.to_reg() + } + fn writable_vreg_to_writable_reg(&mut self, arg0: WritableVReg) -> WritableReg { + arg0.map(|vr| vr.to_reg()) + } + fn vreg_to_reg(&mut self, arg0: VReg) -> Reg { + *arg0 + } + fn xreg_new(&mut self, r: Reg) -> XReg { + XReg::new(r).unwrap() + } + fn writable_xreg_new(&mut self, r: WritableReg) -> WritableXReg { + r.map(|wr| XReg::new(wr).unwrap()) + } + fn writable_xreg_to_xreg(&mut self, arg0: WritableXReg) -> XReg { + arg0.to_reg() + } + fn writable_xreg_to_writable_reg(&mut self, arg0: WritableXReg) -> WritableReg { + arg0.map(|xr| xr.to_reg()) + } + fn xreg_to_reg(&mut self, arg0: XReg) -> Reg { + *arg0 + } + fn freg_new(&mut self, r: Reg) -> FReg { + FReg::new(r).unwrap() + } + fn writable_freg_new(&mut self, r: WritableReg) -> WritableFReg { + r.map(|wr| FReg::new(wr).unwrap()) + } + fn writable_freg_to_freg(&mut self, arg0: WritableFReg) -> FReg { + arg0.to_reg() + } + fn writable_freg_to_writable_reg(&mut self, arg0: WritableFReg) -> WritableReg { + arg0.map(|fr| fr.to_reg()) + } + fn freg_to_reg(&mut self, arg0: FReg) -> Reg { + *arg0 + } + + fn vec_writable_to_regs(&mut self, val: &VecWritableReg) -> ValueRegs { + match val.len() { + 1 => ValueRegs::one(val[0].to_reg()), + 2 => ValueRegs::two(val[0].to_reg(), val[1].to_reg()), + _ => unreachable!(), + } + } + fn intcc_to_extend_op(&mut self, cc: &IntCC) -> ExtendOp { + use IntCC::*; + match *cc { + Equal + | NotEqual + | UnsignedLessThan + | UnsignedGreaterThanOrEqual + | UnsignedGreaterThan + | UnsignedLessThanOrEqual => ExtendOp::Zero, + + SignedLessThan + | SignedGreaterThanOrEqual + | SignedGreaterThan + | SignedLessThanOrEqual => ExtendOp::Signed, + } + } + fn lower_cond_br( + &mut self, + cc: &IntCC, + a: ValueRegs, + targets: &VecMachLabel, + ty: Type, + ) -> Unit { + MInst::lower_br_icmp( + *cc, + a, + self.int_zero_reg(ty), + BranchTarget::Label(targets[0]), + BranchTarget::Label(targets[1]), + ty, + ) + .iter() + .for_each(|i| self.emit(i)); + } + fn lower_br_icmp( + &mut self, + cc: &IntCC, + a: ValueRegs, + b: ValueRegs, + targets: &VecMachLabel, + ty: Type, + ) -> Unit { + let test = generated_code::constructor_lower_icmp(self, cc, a, b, ty); + self.emit(&MInst::CondBr { + taken: BranchTarget::Label(targets[0]), + not_taken: BranchTarget::Label(targets[1]), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: test, + rs2: zero_reg(), + }, + }); + } + fn load_ra(&mut self) -> Reg { + if self.backend.flags.preserve_frame_pointers() { + let tmp = self.temp_writable_reg(I64); + self.emit(&MInst::Load { + rd: tmp, + op: LoadOP::Ld, + flags: MemFlags::trusted(), + from: AMode::FPOffset(8, I64), + }); + tmp.to_reg() + } else { + link_reg() + } + } + fn int_zero_reg(&mut self, ty: Type) -> ValueRegs { + assert!(ty.is_int(), "{:?}", ty); + if ty.bits() == 128 { + ValueRegs::two(self.zero_reg(), self.zero_reg()) + } else { + ValueRegs::one(self.zero_reg()) + } + } + + fn vec_label_get(&mut self, val: &VecMachLabel, x: u8) -> MachLabel { + val[x as usize] + } + + fn label_to_br_target(&mut self, label: MachLabel) -> BranchTarget { + BranchTarget::Label(label) + } + + fn vec_writable_clone(&mut self, v: &VecWritableReg) -> VecWritableReg { + v.clone() + } + + fn imm12_and(&mut self, imm: Imm12, x: u64) -> Imm12 { + Imm12::from_bits(imm.as_i16() & (x as i16)) + } + + fn alloc_vec_writable(&mut self, ty: Type) -> VecWritableReg { + if ty.is_int() || ty == R32 || ty == R64 { + if ty.bits() <= 64 { + vec![self.temp_writable_reg(I64)] + } else { + vec![self.temp_writable_reg(I64), self.temp_writable_reg(I64)] + } + } else if ty.is_float() || ty.is_vector() { + vec![self.temp_writable_reg(ty)] + } else { + unimplemented!("ty:{:?}", ty) + } + } + + fn imm(&mut self, ty: Type, val: u64) -> Reg { + let tmp = self.temp_writable_reg(ty); + let alloc_tmp = &mut |ty| self.temp_writable_reg(ty); + let insts = match ty { + F32 => MInst::load_fp_constant32(tmp, val as u32, alloc_tmp), + F64 => MInst::load_fp_constant64(tmp, val, alloc_tmp), + _ => MInst::load_constant_u64(tmp, val, alloc_tmp), + }; + self.emit_list(&insts); + tmp.to_reg() + } + #[inline] + fn emit(&mut self, arg0: &MInst) -> Unit { + self.lower_ctx.emit(arg0.clone()); + } + #[inline] + fn imm12_from_u64(&mut self, arg0: u64) -> Option { + Imm12::maybe_from_u64(arg0) + } + + #[inline] + fn imm32_from_u64(&mut self, arg0: u64) -> Option { + Imm32::maybe_from_u64(arg0) + } + #[inline] + fn imm5_from_u64(&mut self, arg0: u64) -> Option { + Imm5::maybe_from_i8(i8::try_from(arg0 as i64).ok()?) + } + #[inline] + fn imm5_from_i8(&mut self, arg0: i8) -> Option { + Imm5::maybe_from_i8(arg0) + } + #[inline] + fn uimm5_bitcast_to_imm5(&mut self, arg0: UImm5) -> Imm5 { + Imm5::from_bits(arg0.bits() as u8) + } + #[inline] + fn uimm5_from_u8(&mut self, arg0: u8) -> Option { + UImm5::maybe_from_u8(arg0) + } + #[inline] + fn uimm5_from_u64(&mut self, arg0: u64) -> Option { + arg0.try_into().ok().and_then(UImm5::maybe_from_u8) + } + #[inline] + fn writable_zero_reg(&mut self) -> WritableReg { + writable_zero_reg() + } + #[inline] + fn neg_imm12(&mut self, arg0: Imm12) -> Imm12 { + -arg0 + } + #[inline] + fn zero_reg(&mut self) -> Reg { + zero_reg() + } + #[inline] + fn imm_from_bits(&mut self, val: u64) -> Imm12 { + Imm12::maybe_from_u64(val).unwrap() + } + #[inline] + fn imm_from_neg_bits(&mut self, val: i64) -> Imm12 { + Imm12::maybe_from_u64(val as u64).unwrap() + } + + fn gen_default_frm(&mut self) -> OptionFloatRoundingMode { + None + } + fn gen_select_reg(&mut self, cc: &IntCC, a: XReg, b: XReg, rs1: Reg, rs2: Reg) -> Reg { + let rd = self.temp_writable_reg(MInst::canonical_type_for_rc(rs1.class())); + self.emit(&MInst::SelectReg { + rd, + rs1, + rs2, + condition: IntegerCompare { + kind: *cc, + rs1: a.to_reg(), + rs2: b.to_reg(), + }, + }); + rd.to_reg() + } + fn load_u64_constant(&mut self, val: u64) -> Reg { + let rd = self.temp_writable_reg(I64); + MInst::load_constant_u64(rd, val, &mut |ty| self.temp_writable_reg(ty)) + .iter() + .for_each(|i| self.emit(i)); + rd.to_reg() + } + fn u8_as_i32(&mut self, x: u8) -> i32 { + x as i32 + } + + fn imm12_const(&mut self, val: i32) -> Imm12 { + if let Some(res) = Imm12::maybe_from_u64(val as u64) { + res + } else { + panic!("Unable to make an Imm12 value from {}", val) + } + } + fn imm12_const_add(&mut self, val: i32, add: i32) -> Imm12 { + Imm12::maybe_from_u64((val + add) as u64).unwrap() + } + + // + fn gen_shamt(&mut self, ty: Type, shamt: XReg) -> ValueRegs { + let ty_bits = if ty.bits() > 64 { 64 } else { ty.bits() }; + let shamt = { + let tmp = self.temp_writable_reg(I64); + self.emit(&MInst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd: tmp, + rs: shamt.to_reg(), + imm12: Imm12::from_bits((ty_bits - 1) as i16), + }); + tmp.to_reg() + }; + let len_sub_shamt = { + let tmp = self.temp_writable_reg(I64); + self.emit(&MInst::load_imm12(tmp, Imm12::from_bits(ty_bits as i16))); + let len_sub_shamt = self.temp_writable_reg(I64); + self.emit(&MInst::AluRRR { + alu_op: AluOPRRR::Sub, + rd: len_sub_shamt, + rs1: tmp.to_reg(), + rs2: shamt, + }); + len_sub_shamt.to_reg() + }; + ValueRegs::two(shamt, len_sub_shamt) + } + + fn has_v(&mut self) -> bool { + self.backend.isa_flags.has_v() + } + + fn has_zbkb(&mut self) -> bool { + self.backend.isa_flags.has_zbkb() + } + + fn has_zba(&mut self) -> bool { + self.backend.isa_flags.has_zba() + } + + fn has_zbb(&mut self) -> bool { + self.backend.isa_flags.has_zbb() + } + + fn has_zbc(&mut self) -> bool { + self.backend.isa_flags.has_zbc() + } + + fn has_zbs(&mut self) -> bool { + self.backend.isa_flags.has_zbs() + } + + fn offset32_imm(&mut self, offset: i32) -> Offset32 { + Offset32::new(offset) + } + fn default_memflags(&mut self) -> MemFlags { + MemFlags::new() + } + + fn pack_float_rounding_mode(&mut self, f: &FRM) -> OptionFloatRoundingMode { + Some(*f) + } + + fn int_convert_2_float_op(&mut self, from: Type, is_signed: bool, to: Type) -> FpuOPRR { + FpuOPRR::int_convert_2_float_op(from, is_signed, to) + } + + fn gen_amode(&mut self, base: Reg, offset: Offset32, ty: Type) -> AMode { + AMode::RegOffset(base, i64::from(offset), ty) + } + + fn gen_const_amode(&mut self, c: VCodeConstant) -> AMode { + AMode::Const(c) + } + + fn valid_atomic_transaction(&mut self, ty: Type) -> Option { + if ty.is_int() && ty.bits() <= 64 { + Some(ty) + } else { + None + } + } + fn is_atomic_rmw_max_etc(&mut self, op: &AtomicRmwOp) -> Option<(AtomicRmwOp, bool)> { + let op = *op; + match op { + crate::ir::AtomicRmwOp::Umin => Some((op, false)), + crate::ir::AtomicRmwOp::Umax => Some((op, false)), + crate::ir::AtomicRmwOp::Smin => Some((op, true)), + crate::ir::AtomicRmwOp::Smax => Some((op, true)), + _ => None, + } + } + fn load_op(&mut self, ty: Type) -> LoadOP { + LoadOP::from_type(ty) + } + fn store_op(&mut self, ty: Type) -> StoreOP { + StoreOP::from_type(ty) + } + fn load_ext_name(&mut self, name: ExternalName, offset: i64) -> Reg { + let tmp = self.temp_writable_reg(I64); + self.emit(&MInst::LoadExtName { + rd: tmp, + name: Box::new(name), + offset, + }); + tmp.to_reg() + } + + fn offset32_add(&mut self, a: Offset32, adden: i64) -> Offset32 { + a.try_add_i64(adden).expect("offset exceed range.") + } + + fn gen_stack_addr(&mut self, slot: StackSlot, offset: Offset32) -> Reg { + let result = self.temp_writable_reg(I64); + let i = self + .lower_ctx + .abi() + .sized_stackslot_addr(slot, i64::from(offset) as u32, result); + self.emit(&i); + result.to_reg() + } + fn atomic_amo(&mut self) -> AMO { + AMO::SeqCst + } + + fn lower_br_table(&mut self, index: Reg, targets: &VecMachLabel) -> Unit { + let tmp1 = self.temp_writable_reg(I64); + let tmp2 = self.temp_writable_reg(I64); + let targets: Vec = targets + .into_iter() + .copied() + .map(BranchTarget::Label) + .collect(); + self.emit(&MInst::BrTable { + index, + tmp1, + tmp2, + targets, + }); + } + + fn fp_reg(&mut self) -> PReg { + px_reg(8) + } + + fn sp_reg(&mut self) -> PReg { + px_reg(2) + } + + fn shift_int_to_most_significant(&mut self, v: XReg, ty: Type) -> XReg { + assert!(ty.is_int() && ty.bits() <= 64); + if ty == I64 { + return v; + } + let tmp = self.temp_writable_reg(I64); + self.emit(&MInst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp, + rs: v.to_reg(), + imm12: Imm12::from_bits((64 - ty.bits()) as i16), + }); + + self.xreg_new(tmp.to_reg()) + } + + #[inline] + fn int_compare(&mut self, kind: &IntCC, rs1: XReg, rs2: XReg) -> IntegerCompare { + IntegerCompare { + kind: *kind, + rs1: rs1.to_reg(), + rs2: rs2.to_reg(), + } + } + + #[inline] + fn vstate_from_type(&mut self, ty: Type) -> VState { + VState::from_type(ty) + } + + #[inline] + fn vstate_mf2(&mut self, vs: VState) -> VState { + VState { + vtype: VType { + lmul: VecLmul::LmulF2, + ..vs.vtype + }, + ..vs + } + } + + fn min_vec_reg_size(&mut self) -> u64 { + self.min_vec_reg_size + } + + #[inline] + fn ty_vec_fits_in_register(&mut self, ty: Type) -> Option { + if ty.is_vector() && (ty.bits() as u64) <= self.min_vec_reg_size() { + Some(ty) + } else { + None + } + } + + fn vec_alu_rr_dst_type(&mut self, op: &VecAluOpRR) -> Type { + MInst::canonical_type_for_rc(op.dst_regclass()) + } +} + +/// The main entry point for lowering with ISLE. +pub(crate) fn lower( + lower_ctx: &mut Lower, + backend: &Riscv64Backend, + inst: Inst, +) -> Option { + // TODO: reuse the ISLE context across lowerings so we can reuse its + // internal heap allocations. + let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend); + generated_code::constructor_lower(&mut isle_ctx, inst) +} + +/// The main entry point for branch lowering with ISLE. +pub(crate) fn lower_branch( + lower_ctx: &mut Lower, + backend: &Riscv64Backend, + branch: Inst, + targets: &[MachLabel], +) -> Option<()> { + // TODO: reuse the ISLE context across lowerings so we can reuse its + // internal heap allocations. + let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend); + generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets.to_vec()) +} diff --git a/cranelift/codegen/src/isa/zkasm/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/zkasm/lower/isle/generated_code.rs new file mode 100644 index 000000000000..955a0a2b1171 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/lower/isle/generated_code.rs @@ -0,0 +1,9 @@ +// See https://github.com/rust-lang/rust/issues/47995: we cannot use `#![...]` attributes inside of +// the generated ISLE source below because we include!() it. We must include!() it because its path +// depends on an environment variable; and also because of this, we can't do the `#[path = "..."] +// mod generated_code;` trick either. +#![allow(dead_code, unreachable_code, unreachable_patterns)] +#![allow(unused_imports, unused_variables, non_snake_case, unused_mut)] +#![allow(irrefutable_let_patterns)] + +include!(concat!(env!("ISLE_DIR"), "/isle_zkasm.rs")); diff --git a/cranelift/codegen/src/isa/zkasm/mod.rs b/cranelift/codegen/src/isa/zkasm/mod.rs new file mode 100644 index 000000000000..7e19f7578d2e --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/mod.rs @@ -0,0 +1,228 @@ +//! risc-v 64-bit Instruction Set Architecture. + +use crate::dominator_tree::DominatorTree; +use crate::ir; +use crate::ir::{Function, Type}; +use crate::isa::zkasm::settings as riscv_settings; +use crate::isa::{Builder as IsaBuilder, FunctionAlignment, TargetIsa}; +use crate::machinst::{ + compile, CompiledCode, CompiledCodeStencil, MachInst, MachTextSectionBuilder, Reg, SigSet, + TextSectionBuilder, VCode, +}; +use crate::result::CodegenResult; +use crate::settings as shared_settings; +use alloc::{boxed::Box, vec::Vec}; +use core::fmt; +use cranelift_control::ControlPlane; +use regalloc2::MachineEnv; +use target_lexicon::{Architecture, Triple}; +mod abi; +pub(crate) mod inst; +mod lower; +mod settings; +#[cfg(feature = "unwind")] +use crate::isa::unwind::systemv; + +use inst::crate_reg_eviroment; + +use self::inst::EmitInfo; + +/// An zkasm backend. +pub struct Riscv64Backend { + triple: Triple, + flags: shared_settings::Flags, + isa_flags: riscv_settings::Flags, + mach_env: MachineEnv, +} + +impl Riscv64Backend { + /// Create a new zkasm backend with the given (shared) flags. + pub fn new_with_flags( + triple: Triple, + flags: shared_settings::Flags, + isa_flags: riscv_settings::Flags, + ) -> Riscv64Backend { + let mach_env = crate_reg_eviroment(&flags); + Riscv64Backend { + triple, + flags, + isa_flags, + mach_env, + } + } + + /// This performs lowering to VCode, register-allocates the code, computes block layout and + /// finalizes branches. The result is ready for binary emission. + fn compile_vcode( + &self, + func: &Function, + domtree: &DominatorTree, + ctrl_plane: &mut ControlPlane, + ) -> CodegenResult<(VCode, regalloc2::Output)> { + let emit_info = EmitInfo::new(self.flags.clone(), self.isa_flags.clone()); + let sigs = SigSet::new::(func, &self.flags)?; + let abi = abi::Riscv64Callee::new(func, self, &self.isa_flags, &sigs)?; + compile::compile::(func, domtree, self, abi, emit_info, sigs, ctrl_plane) + } +} + +impl TargetIsa for Riscv64Backend { + fn compile_function( + &self, + func: &Function, + domtree: &DominatorTree, + want_disasm: bool, + ctrl_plane: &mut ControlPlane, + ) -> CodegenResult { + let (vcode, regalloc_result) = self.compile_vcode(func, domtree, ctrl_plane)?; + + let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug); + let emit_result = vcode.emit(®alloc_result, want_disasm, &self.flags, ctrl_plane); + let frame_size = emit_result.frame_size; + let value_labels_ranges = emit_result.value_labels_ranges; + let buffer = emit_result.buffer; + let sized_stackslot_offsets = emit_result.sized_stackslot_offsets; + let dynamic_stackslot_offsets = emit_result.dynamic_stackslot_offsets; + + if let Some(disasm) = emit_result.disasm.as_ref() { + log::debug!("disassembly:\n{}", disasm); + } + + Ok(CompiledCodeStencil { + buffer, + frame_size, + vcode: emit_result.disasm, + value_labels_ranges, + sized_stackslot_offsets, + dynamic_stackslot_offsets, + bb_starts: emit_result.bb_offsets, + bb_edges: emit_result.bb_edges, + }) + } + + fn name(&self) -> &'static str { + "zkasm" + } + fn dynamic_vector_bytes(&self, _dynamic_ty: ir::Type) -> u32 { + 16 + } + + fn triple(&self) -> &Triple { + &self.triple + } + + fn flags(&self) -> &shared_settings::Flags { + &self.flags + } + + fn machine_env(&self) -> &MachineEnv { + &self.mach_env + } + + fn isa_flags(&self) -> Vec { + self.isa_flags.iter().collect() + } + + #[cfg(feature = "unwind")] + fn emit_unwind_info( + &self, + result: &CompiledCode, + kind: crate::machinst::UnwindInfoKind, + ) -> CodegenResult> { + use crate::isa::unwind::UnwindInfo; + use crate::machinst::UnwindInfoKind; + Ok(match kind { + UnwindInfoKind::SystemV => { + let mapper = self::inst::unwind::systemv::RegisterMapper; + Some(UnwindInfo::SystemV( + crate::isa::unwind::systemv::create_unwind_info_from_insts( + &result.buffer.unwind_info[..], + result.buffer.data().len(), + &mapper, + )?, + )) + } + UnwindInfoKind::Windows => None, + _ => None, + }) + } + + #[cfg(feature = "unwind")] + fn create_systemv_cie(&self) -> Option { + Some(inst::unwind::systemv::create_cie()) + } + + fn text_section_builder(&self, num_funcs: usize) -> Box { + Box::new(MachTextSectionBuilder::::new(num_funcs)) + } + + #[cfg(feature = "unwind")] + fn map_regalloc_reg_to_dwarf(&self, reg: Reg) -> Result { + inst::unwind::systemv::map_reg(reg).map(|reg| reg.0) + } + + fn function_alignment(&self) -> FunctionAlignment { + inst::Inst::function_alignment() + } + + #[cfg(feature = "disas")] + fn to_capstone(&self) -> Result { + use capstone::prelude::*; + let mut cs = Capstone::new() + .riscv() + .mode(arch::riscv::ArchMode::RiscV64) + .build()?; + // Similar to AArch64, RISC-V uses inline constants rather than a separate + // constant pool. We want to skip dissasembly over inline constants instead + // of stopping on invalid bytes. + cs.set_skipdata(true)?; + Ok(cs) + } + + fn has_native_fma(&self) -> bool { + true + } + + fn has_x86_blendv_lowering(&self, _: Type) -> bool { + false + } + + fn has_x86_pshufb_lowering(&self) -> bool { + false + } + + fn has_x86_pmulhrsw_lowering(&self) -> bool { + false + } + + fn has_x86_pmaddubsw_lowering(&self) -> bool { + false + } +} + +impl fmt::Display for Riscv64Backend { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("MachBackend") + .field("name", &self.name()) + .field("triple", &self.triple()) + .field("flags", &format!("{}", self.flags())) + .finish() + } +} + +/// Create a new `isa::Builder`. +pub fn isa_builder(triple: Triple) -> IsaBuilder { + match triple.architecture { + Architecture::Sparc => {} + _ => unreachable!(), + } + IsaBuilder { + triple, + setup: riscv_settings::builder(), + constructor: |triple, shared_flags, builder| { + let isa_flags = riscv_settings::Flags::new(&shared_flags, builder); + let backend = Riscv64Backend::new_with_flags(triple, shared_flags, isa_flags); + Ok(backend.wrapped()) + }, + } +} diff --git a/cranelift/codegen/src/isa/zkasm/settings.rs b/cranelift/codegen/src/isa/zkasm/settings.rs new file mode 100644 index 000000000000..a91e91e61938 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/settings.rs @@ -0,0 +1,8 @@ +//! zkasm Settings. + +use crate::settings::{self, detail, Builder, Value}; +use core::fmt; + +// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a +// public `Flags` struct with an impl for all of the settings defined in +include!(concat!(env!("OUT_DIR"), "/settings-zkasm.rs")); diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index b537d9e10cb0..53fac6435810 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -507,6 +507,14 @@ macro_rules! isle_common_prelude_methods { } } + #[inline] + fn ty_addr32(&mut self, ty: Type) -> Option { + match ty { + I32 | R32 => Some(ty), + _ => None, + } + } + #[inline] fn u64_from_imm64(&mut self, imm: Imm64) -> u64 { imm.bits() as u64 diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index e08384c30ab2..1eda843fe50d 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -185,6 +185,7 @@ pub trait MachInst: Clone + Debug { /// block, if any. Note that the return value must not be subject to /// register allocation. fn gen_block_start( + _block_index: usize, _is_indirect_branch_target: bool, _is_forward_edge_cfi_enabled: bool, ) -> Option { diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 59c7328c3aa8..d5ebee420d2f 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -896,6 +896,7 @@ impl VCode { } if let Some(block_start) = I::gen_block_start( + block.index(), self.block_order.is_indirect_branch_target(block), is_forward_edge_cfi_enabled, ) { diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index dd3c186a6747..188f423c41e7 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -461,6 +461,9 @@ (decl ty_addr64 (Type) Type) (extern extractor ty_addr64 ty_addr64) +(decl ty_addr32 (Type) Type) +(extern extractor ty_addr32 ty_addr32) + ;; A pure constructor that matches everything except vectors with size 32X2. (decl pure partial not_vec32x2 (Type) Type) (extern constructor not_vec32x2 not_vec32x2) diff --git a/cranelift/data/add.zkasm b/cranelift/data/add.zkasm new file mode 100644 index 000000000000..9bc8f465b6b3 --- /dev/null +++ b/cranelift/data/add.zkasm @@ -0,0 +1,19 @@ +start: + zkPC + 2 => RR + :JMP(function_1) + :JMP(finalizeExecution) + +function_1: + SP + 1 => SP + RR :MSTORE(SP) + 2 + 3 => A + 0 + 5 => B + B :ASSERT + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) + diff --git a/cranelift/data/add_func.zkasm b/cranelift/data/add_func.zkasm new file mode 100644 index 000000000000..cddf7c328475 --- /dev/null +++ b/cranelift/data/add_func.zkasm @@ -0,0 +1,26 @@ +start: + zkPC + 2 => RR + :JMP(function_1) + :JMP(finalizeExecution) + +function_1: + SP + 1 => SP + RR :MSTORE(SP) + 0 + 2 => A + 0 + 3 => B + zkPC + 2 => RR + :JMP(function_2) + 0 + 5 => B + B :ASSERT + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +function_2: + $ => A :ADD + :JMP(RR) + +finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) + diff --git a/cranelift/data/add_memory.zkasm b/cranelift/data/add_memory.zkasm new file mode 100644 index 000000000000..2040a74e15d8 --- /dev/null +++ b/cranelift/data/add_memory.zkasm @@ -0,0 +1,39 @@ +start: + zkPC + 2 => RR + :JMP(function_1) + :JMP(finalizeExecution) + +function_1: + SP + 1 => SP + RR :MSTORE(SP) + 0 + 0 => B + 0 + 2 => D + $ => A :MLOAD(CTX) + $ => E :ADD + D :MSTORE(E) + 0 + 8 => B + 0 + 3 => E + $ => A :MLOAD(CTX) + $ => A :ADD + E :MSTORE(A) + 0 + 0 => B + $ => A :MLOAD(CTX) + $ => A :ADD + $ => A :MLOAD(A) + A => E + 0 + 8 => B + $ => A :MLOAD(CTX) + $ => A :ADD + $ => B :MLOAD(A) + E => A + $ => A :ADD + 0 + 5 => B + B :ASSERT + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) + diff --git a/cranelift/data/counter.zkasm b/cranelift/data/counter.zkasm new file mode 100644 index 000000000000..a69561b7ebb0 --- /dev/null +++ b/cranelift/data/counter.zkasm @@ -0,0 +1,28 @@ +start: + zkPC + 2 => RR + :JMP(function_1) + :JMP(finalizeExecution) + +function_1: + SP + 1 => SP + RR :MSTORE(SP) + 0 + 0 => A + :JMP(L1_1) +L1_1: + 0 + 1 => B + $ => A :ADD + 0 + 10 => B + $ => B :EQ + B :JMPNZ(L1_3) + :JMP(L1_1) +L1_3: + 0 + 10 => B + B :ASSERT + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) + diff --git a/cranelift/data/fibonacci.zkasm b/cranelift/data/fibonacci.zkasm new file mode 100644 index 000000000000..3f6d73266c71 --- /dev/null +++ b/cranelift/data/fibonacci.zkasm @@ -0,0 +1,42 @@ +start: + zkPC + 2 => RR + :JMP(function_1) + :JMP(finalizeExecution) + +function_1: + SP + 1 => SP + RR :MSTORE(SP) + SP + 1 => SP + 0 + 0 => A + A => D + 0 + 0 => A + 0 + 1 => B + B :MSTORE(SP) + :JMP(L1_1) +L1_1: + $ => A :ADD + A => C + B :MSTORE(SP) + 0 + 1 => B + D => A + $ => A :ADD + 0 + 10 => B + $ => E :EQ + E :JMPNZ(L1_3) + C => B + A => D + $ => A :MLOAD(SP) + :JMP(L1_1) +L1_3: + 0 + 89 => B + C => A + B :ASSERT + SP - 1 => SP + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) + diff --git a/cranelift/data/fibonacci_recursive.zkasm b/cranelift/data/fibonacci_recursive.zkasm new file mode 100644 index 000000000000..90e0da518126 --- /dev/null +++ b/cranelift/data/fibonacci_recursive.zkasm @@ -0,0 +1,67 @@ +start: + zkPC + 2 => RR + :JMP(function_2) + :JMP(finalizeExecution) + +function_1: + SP + 1 => SP + RR :MSTORE(SP) + SP + 2 => SP + A :MSTORE(SP) + 0 + 0 => B + $ => E :EQ + E :JMPNZ(L1_5) + 0 + 1 => B + $ => A :MLOAD(SP) + $ => B :SUB + B => A + 0 + 0 => B + $ => A :EQ + A :JMPNZ(L1_3) + 0 + 1 => B + $ => A :MLOAD(SP) + $ => B :SUB + A :MSTORE(SP) + B => A + zkPC + 2 => RR + :JMP(function_1) + A :MSTORE(SP + 8) + 0 + 2 => B + $ => A :MLOAD(SP) + $ => A :SUB + zkPC + 2 => RR + :JMP(function_1) + A => B + $ => A :MLOAD(SP + 8) + $ => A :ADD + :JMP(L1_4) +L1_3: + 0 + 1 => A + :JMP(L1_4) +L1_4: + :JMP(L1_6) +L1_5: + 0 + 0 => A + :JMP(L1_6) +L1_6: + SP - 2 => SP + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +function_2: + SP + 1 => SP + RR :MSTORE(SP) + 0 + 11 => A + zkPC + 2 => RR + :JMP(function_1) + 0 + 89 => B + B :ASSERT + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) + diff --git a/cranelift/data/gen.sh b/cranelift/data/gen.sh new file mode 100755 index 000000000000..a6af61f42a61 --- /dev/null +++ b/cranelift/data/gen.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +cargo build +for name in add counter add_func add_memory fibonacci locals locals_simple fibonacci_recursive mul +do + echo $name; + ../target/debug/clif-util wasm --target sparc-unknown-unknown ../../zkwasm/data/$name.wat > data/$name.zkasm +done diff --git a/cranelift/data/locals.zkasm b/cranelift/data/locals.zkasm new file mode 100644 index 000000000000..9bc8f465b6b3 --- /dev/null +++ b/cranelift/data/locals.zkasm @@ -0,0 +1,19 @@ +start: + zkPC + 2 => RR + :JMP(function_1) + :JMP(finalizeExecution) + +function_1: + SP + 1 => SP + RR :MSTORE(SP) + 2 + 3 => A + 0 + 5 => B + B :ASSERT + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) + diff --git a/cranelift/data/locals_simple.zkasm b/cranelift/data/locals_simple.zkasm new file mode 100644 index 000000000000..dcbe3fb15b7a --- /dev/null +++ b/cranelift/data/locals_simple.zkasm @@ -0,0 +1,19 @@ +start: + zkPC + 2 => RR + :JMP(function_1) + :JMP(finalizeExecution) + +function_1: + SP + 1 => SP + RR :MSTORE(SP) + 0 + 2 => A + 0 + 2 => B + B :ASSERT + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) + diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs index 3dd3ae2ccd2b..c0c00d4388da 100644 --- a/cranelift/src/wasm.rs +++ b/cranelift/src/wasm.rs @@ -10,6 +10,7 @@ use crate::disasm::print_all; use anyhow::{Context as _, Result}; use clap::Parser; +use cranelift_codegen::ir::ExternalName; use cranelift_codegen::print_errors::{pretty_error, pretty_verifier_error}; use cranelift_codegen::settings::FlagsOrIsa; use cranelift_codegen::timing; @@ -17,6 +18,7 @@ use cranelift_codegen::Context; use cranelift_entity::EntityRef; use cranelift_reader::parse_sets_and_triple; use cranelift_wasm::{translate_module, DummyEnvironment, FuncIndex}; +use std::collections::HashMap; use std::io::Read; use std::path::Path; use std::path::PathBuf; @@ -235,6 +237,15 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - vprintln!(options.verbose, ""); } + println!("start:"); + let start_func = dummy_environ + .info + .start_func + .expect("Must have a start function"); + println!(" zkPC + 2 => RR"); + println!(" :JMP(function_{})", start_func.index()); + println!(" :JMP(finalizeExecution)"); + let num_func_imports = dummy_environ.get_num_func_imports(); let mut total_module_code_size = 0; let mut context = Context::new(); @@ -243,6 +254,7 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - let mut saved_size = None; let func_index = num_func_imports + def_index.index(); + println!("\nfunction_{}:", func_index); let mut mem = vec![]; let (relocs, traps, stack_maps) = if options.check_translation { if let Err(errors) = context.verify(fisa) { @@ -254,6 +266,82 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - .compile_and_emit(isa, &mut mem, &mut Default::default()) .map_err(|err| anyhow::anyhow!("{}", pretty_error(&err.func, err.inner)))?; let code_info = compiled_code.code_info(); + let mut code_buffer = compiled_code.code_buffer().to_vec(); + let mut delta = 0i32; + for reloc in compiled_code.buffer.relocs() { + let start = (reloc.offset as i32 + delta) as usize; + let mut pos = start; + while code_buffer[pos] != b'\n' { + pos += 1; + delta -= 1; + } + + let code = if let ExternalName::User(name) = reloc.name { + let name = &func.params.user_named_funcs()[name]; + if name.index == 0 { + b" B :ASSERT".to_vec() + } else { + format!(" zkPC + 2 => RR\n :JMP(function_{})", name.index) + .as_bytes() + .to_vec() + } + } else { + b" UNKNOWN".to_vec() + }; + delta += code.len() as i32; + + code_buffer.splice(start..pos, code); + } + + if let Ok(code) = std::str::from_utf8(&code_buffer) { + let mut label_definition: HashMap = HashMap::new(); + let mut label_uses: HashMap> = HashMap::new(); + let mut lines = Vec::new(); + for (index, line) in code.lines().enumerate() { + let mut line = line.to_string(); + if line.starts_with(&"label_") { + let label_index: usize = line[6..line.len() - 1] + .parse() + .expect("Failed to parse label index"); + line = format!("L{func_index}_{label_index}:"); + label_definition.insert(label_index, index); + } else if line.contains(&"label_") { + let pos = line.find(&"label_").unwrap(); + let pos_end = pos + line[pos..].find(&")").unwrap(); + let label_index: usize = line[pos + 6..pos_end] + .parse() + .expect("Failed to parse label index"); + line.replace_range(pos..pos_end, &format!("L{func_index}_{label_index}")); + label_uses.entry(label_index).or_default().push(index); + } + lines.push(line); + } + + let mut lines_to_delete = Vec::new(); + for (label, label_line) in label_definition { + match label_uses.entry(label) { + std::collections::hash_map::Entry::Occupied(uses) => { + if uses.get().len() == 1 { + let use_line = uses.get()[0]; + if use_line + 1 == label_line { + lines_to_delete.push(use_line); + lines_to_delete.push(label_line); + } + } + } + std::collections::hash_map::Entry::Vacant(_) => { + lines_to_delete.push(label_line); + } + } + } + lines_to_delete.sort(); + lines_to_delete.reverse(); + for index in lines_to_delete { + lines.remove(index); + } + + println!("{}", lines.join("\n")); + } if options.print_size { println!( @@ -310,6 +398,13 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - context.clear(); } + let postamble = " +finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) +"; + println!("{postamble}"); + if !options.check_translation && options.print_size { println!("Total module code size: {} bytes", total_module_code_size); let total_bytecode_size: usize = dummy_environ.func_bytecode_sizes.iter().sum();