From 2af4f6061d4c3636bbe580b5f70a9ab66db7595c Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Thu, 24 Aug 2023 12:34:07 +0300 Subject: [PATCH 01/68] zkasm: copy riscv64 backend as zkasm starting point does not build yet --- cranelift/codegen/Cargo.toml | 4 +- cranelift/codegen/build.rs | 15 +- cranelift/codegen/meta/src/isa/mod.rs | 7 +- cranelift/codegen/meta/src/isa/zkasm.rs | 101 + cranelift/codegen/src/isa/mod.rs | 6 +- cranelift/codegen/src/isa/zkasm/abi.rs | 986 +++++ cranelift/codegen/src/isa/zkasm/inst.isle | 2964 +++++++++++++++ cranelift/codegen/src/isa/zkasm/inst/args.rs | 1812 +++++++++ cranelift/codegen/src/isa/zkasm/inst/emit.rs | 3226 +++++++++++++++++ .../codegen/src/isa/zkasm/inst/emit_tests.rs | 2338 ++++++++++++ .../codegen/src/isa/zkasm/inst/encode.rs | 299 ++ cranelift/codegen/src/isa/zkasm/inst/imms.rs | 250 ++ cranelift/codegen/src/isa/zkasm/inst/mod.rs | 2140 +++++++++++ cranelift/codegen/src/isa/zkasm/inst/regs.rs | 223 ++ .../codegen/src/isa/zkasm/inst/unwind.rs | 2 + .../src/isa/zkasm/inst/unwind/systemv.rs | 174 + .../codegen/src/isa/zkasm/inst/vector.rs | 996 +++++ .../codegen/src/isa/zkasm/inst_vector.isle | 1773 +++++++++ cranelift/codegen/src/isa/zkasm/lower.isle | 2082 +++++++++++ cranelift/codegen/src/isa/zkasm/lower.rs | 33 + cranelift/codegen/src/isa/zkasm/lower/isle.rs | 620 ++++ .../isa/zkasm/lower/isle/generated_code.rs | 9 + cranelift/codegen/src/isa/zkasm/mod.rs | 228 ++ cranelift/codegen/src/isa/zkasm/settings.rs | 8 + 24 files changed, 20292 insertions(+), 4 deletions(-) create mode 100644 cranelift/codegen/meta/src/isa/zkasm.rs create mode 100644 cranelift/codegen/src/isa/zkasm/abi.rs create mode 100644 cranelift/codegen/src/isa/zkasm/inst.isle create mode 100644 cranelift/codegen/src/isa/zkasm/inst/args.rs create mode 100644 cranelift/codegen/src/isa/zkasm/inst/emit.rs create mode 100644 cranelift/codegen/src/isa/zkasm/inst/emit_tests.rs create mode 100644 cranelift/codegen/src/isa/zkasm/inst/encode.rs create mode 100644 cranelift/codegen/src/isa/zkasm/inst/imms.rs create mode 100644 cranelift/codegen/src/isa/zkasm/inst/mod.rs create mode 100644 cranelift/codegen/src/isa/zkasm/inst/regs.rs create mode 100644 cranelift/codegen/src/isa/zkasm/inst/unwind.rs create mode 100644 cranelift/codegen/src/isa/zkasm/inst/unwind/systemv.rs create mode 100644 cranelift/codegen/src/isa/zkasm/inst/vector.rs create mode 100644 cranelift/codegen/src/isa/zkasm/inst_vector.isle create mode 100644 cranelift/codegen/src/isa/zkasm/lower.isle create mode 100644 cranelift/codegen/src/isa/zkasm/lower.rs create mode 100644 cranelift/codegen/src/isa/zkasm/lower/isle.rs create mode 100644 cranelift/codegen/src/isa/zkasm/lower/isle/generated_code.rs create mode 100644 cranelift/codegen/src/isa/zkasm/mod.rs create mode 100644 cranelift/codegen/src/isa/zkasm/settings.rs diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 15d2a1c6c108..5cbedb9a82de 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -73,6 +73,7 @@ x86 = [] arm64 = [] s390x = [] riscv64 = [] +zkasm = [] # Enable the ISA target for the host machine host-arch = [] @@ -81,7 +82,8 @@ all-arch = [ "x86", "arm64", "s390x", - "riscv64" + "riscv64", + "zkasm" ] # For dependent crates that want to serialize some parts of cranelift diff --git a/cranelift/codegen/build.rs b/cranelift/codegen/build.rs index 211b62177dd2..a634bd5209b2 100644 --- a/cranelift/codegen/build.rs +++ b/cranelift/codegen/build.rs @@ -33,7 +33,7 @@ fn main() { .cloned() .filter(|isa| { let env_key = format!("CARGO_FEATURE_{}", isa.to_string().to_uppercase()); - env::var(env_key).is_ok() + dbg!(env::var(dbg!(env_key)).is_ok()) }) .collect::>(); @@ -200,6 +200,8 @@ fn get_isle_compilations( let src_isa_risc_v = make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("isa").join("riscv64")); + let src_isa_zkasm = + make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("isa").join("zkasm")); // This is a set of ISLE compilation units. // // The format of each entry is: @@ -280,6 +282,17 @@ fn get_isle_compilations( ], untracked_inputs: vec![clif_lower_isle.clone()], }, + IsleCompilation { + output: out_dir.join("isle_zkasm.rs"), + inputs: vec![ + prelude_isle.clone(), + prelude_lower_isle.clone(), + src_isa_zkasm.join("inst.isle"), + src_isa_zkasm.join("inst_vector.isle"), + src_isa_zkasm.join("lower.isle"), + ], + untracked_inputs: vec![clif_lower_isle.clone()], + }, ], }) } diff --git a/cranelift/codegen/meta/src/isa/mod.rs b/cranelift/codegen/meta/src/isa/mod.rs index ecda9b83d054..1440e07c0abd 100644 --- a/cranelift/codegen/meta/src/isa/mod.rs +++ b/cranelift/codegen/meta/src/isa/mod.rs @@ -6,6 +6,7 @@ mod arm64; mod riscv64; mod s390x; pub(crate) mod x86; +mod zkasm; /// Represents known ISA target. #[derive(PartialEq, Copy, Clone)] @@ -14,6 +15,7 @@ pub enum Isa { Arm64, S390x, Riscv64, + ZkAsm } impl Isa { @@ -29,6 +31,7 @@ impl Isa { pub fn from_arch(arch: &str) -> Option { match arch { "aarch64" => Some(Isa::Arm64), + "sparc" | "zkasm" => Some(Isa::ZkAsm), "s390x" => Some(Isa::S390x), x if ["x86_64", "i386", "i586", "i686"].contains(&x) => Some(Isa::X86), "riscv64" | "riscv64gc" | "riscv64imac" => Some(Isa::Riscv64), @@ -38,7 +41,7 @@ impl Isa { /// Returns all supported isa targets. pub fn all() -> &'static [Isa] { - &[Isa::X86, Isa::Arm64, Isa::S390x, Isa::Riscv64] + &[Isa::X86, Isa::Arm64, Isa::S390x, Isa::Riscv64, Isa::ZkAsm] } } @@ -50,6 +53,7 @@ impl fmt::Display for Isa { Isa::Arm64 => write!(f, "arm64"), Isa::S390x => write!(f, "s390x"), Isa::Riscv64 => write!(f, "riscv64"), + Isa::ZkAsm => write!(f, "zkasm"), } } } @@ -61,6 +65,7 @@ pub(crate) fn define(isas: &[Isa]) -> Vec { Isa::Arm64 => arm64::define(), Isa::S390x => s390x::define(), Isa::Riscv64 => riscv64::define(), + Isa::ZkAsm => zkasm::define(), }) .collect() } diff --git a/cranelift/codegen/meta/src/isa/zkasm.rs b/cranelift/codegen/meta/src/isa/zkasm.rs new file mode 100644 index 000000000000..f080aaef3737 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/zkasm.rs @@ -0,0 +1,101 @@ +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::settings::SettingGroupBuilder; + +macro_rules! define_zvl_ext { + (DEF: $settings:expr, $size:expr) => {{ + let name = concat!("has_zvl", $size, "b"); + let desc = concat!("has extension Zvl", $size, "b?"); + let comment = concat!( + "Zvl", + $size, + "b: Vector register has a minimum of ", + $size, + " bits" + ); + $settings.add_bool(&name, &desc, &comment, false) + }}; + ($settings:expr, $size:expr $(, $implies:expr)*) => {{ + let has_feature = define_zvl_ext!(DEF: $settings, $size); + + let name = concat!("zvl", $size, "b"); + let desc = concat!("Has a vector register size of at least ", $size, " bits"); + + let preset = $settings.add_preset(&name, &desc, preset!(has_feature $( && $implies )*)); + (has_feature, preset) + }}; +} + +pub(crate) fn define() -> TargetIsa { + let mut setting = SettingGroupBuilder::new("riscv64"); + + let _has_m = setting.add_bool("has_m", "has extension M?", "", false); + let _has_a = setting.add_bool("has_a", "has extension A?", "", false); + let _has_f = setting.add_bool("has_f", "has extension F?", "", false); + let _has_d = setting.add_bool("has_d", "has extension D?", "", false); + let _has_v = setting.add_bool("has_v", "has extension V?", "", false); + let _has_c = setting.add_bool("has_c", "has extension C?", "", false); + let _has_zbkb = setting.add_bool( + "has_zbkb", + "has extension zbkb?", + "Zbkb: Bit-manipulation for Cryptography", + false, + ); + let _has_zba = setting.add_bool( + "has_zba", + "has extension zba?", + "Zba: Address Generation", + false, + ); + let _has_zbb = setting.add_bool( + "has_zbb", + "has extension zbb?", + "Zbb: Basic bit-manipulation", + false, + ); + let _has_zbc = setting.add_bool( + "has_zbc", + "has extension zbc?", + "Zbc: Carry-less multiplication", + false, + ); + let _has_zbs = setting.add_bool( + "has_zbs", + "has extension zbs?", + "Zbs: Single-bit instructions", + false, + ); + + let _has_zicsr = setting.add_bool( + "has_zicsr", + "has extension zicsr?", + "Zicsr: Control and Status Register (CSR) Instructions", + false, + ); + let _has_zifencei = setting.add_bool( + "has_zifencei", + "has extension zifencei?", + "Zifencei: Instruction-Fetch Fence", + false, + ); + + // Zvl*: Minimum Vector Length Standard Extensions + // These extension specifiy the minimum number of bits in a vector register. + // Since it is a minimum, Zvl64b implies Zvl32b, Zvl128b implies Zvl64b, etc. + // The V extension supports a maximum of 64K bits in a single register. + // + // See: https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#181-zvl-minimum-vector-length-standard-extensions + let (_, zvl32b) = define_zvl_ext!(setting, 32); + let (_, zvl64b) = define_zvl_ext!(setting, 64, zvl32b); + let (_, zvl128b) = define_zvl_ext!(setting, 128, zvl64b); + let (_, zvl256b) = define_zvl_ext!(setting, 256, zvl128b); + let (_, zvl512b) = define_zvl_ext!(setting, 512, zvl256b); + let (_, zvl1024b) = define_zvl_ext!(setting, 1024, zvl512b); + let (_, zvl2048b) = define_zvl_ext!(setting, 2048, zvl1024b); + let (_, zvl4096b) = define_zvl_ext!(setting, 4096, zvl2048b); + let (_, zvl8192b) = define_zvl_ext!(setting, 8192, zvl4096b); + let (_, zvl16384b) = define_zvl_ext!(setting, 16384, zvl8192b); + let (_, zvl32768b) = define_zvl_ext!(setting, 32768, zvl16384b); + let (_, _zvl65536b) = define_zvl_ext!(setting, 65536, zvl32768b); + + TargetIsa::new("riscv64", setting.build()) +} diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index 860fcbd24e14..5b2fffc57101 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -74,6 +74,9 @@ pub mod riscv64; #[cfg(feature = "s390x")] mod s390x; +#[cfg(feature = "zkasm")] +pub mod zkasm; + pub mod unwind; mod call_conv; @@ -103,6 +106,7 @@ pub fn lookup(triple: Triple) -> Result { Architecture::Aarch64 { .. } => isa_builder!(aarch64, (feature = "arm64"), triple), Architecture::S390x { .. } => isa_builder!(s390x, (feature = "s390x"), triple), Architecture::Riscv64 { .. } => isa_builder!(riscv64, (feature = "riscv64"), triple), + Architecture::Sparc { .. } => isa_builder!(zkasm, (feature = "zkasm"), triple), _ => Err(LookupError::Unsupported), } } @@ -110,7 +114,7 @@ pub fn lookup(triple: Triple) -> Result { /// The string names of all the supported, but possibly not enabled, architectures. The elements of /// this slice are suitable to be passed to the [lookup_by_name] function to obtain the default /// configuration for that architecture. -pub const ALL_ARCHITECTURES: &[&str] = &["x86_64", "aarch64", "s390x", "riscv64"]; +pub const ALL_ARCHITECTURES: &[&str] = &["x86_64", "aarch64", "s390x", "riscv64", "sparc"]; /// Look for a supported ISA with the given `name`. /// Return a builder that can create a corresponding `TargetIsa`. diff --git a/cranelift/codegen/src/isa/zkasm/abi.rs b/cranelift/codegen/src/isa/zkasm/abi.rs new file mode 100644 index 000000000000..b410c4fceb6f --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/abi.rs @@ -0,0 +1,986 @@ +//! Implementation of a standard Riscv64 ABI. + +use crate::ir; +use crate::ir::types::*; + +use crate::ir::ExternalName; +use crate::ir::MemFlags; +use crate::isa; + +use crate::isa::zkasm::{inst::EmitState, inst::*}; +use crate::isa::CallConv; +use crate::machinst::*; + +use crate::ir::types::I8; +use crate::ir::LibCall; +use crate::ir::Signature; +use crate::isa::zkasm::settings::Flags as RiscvFlags; +use crate::isa::unwind::UnwindInst; +use crate::settings; +use crate::CodegenError; +use crate::CodegenResult; +use alloc::boxed::Box; +use alloc::vec::Vec; +use regalloc2::PRegSet; +use regs::x_reg; + +use smallvec::{smallvec, SmallVec}; + +/// Support for the Riscv64 ABI from the callee side (within a function body). +pub(crate) type Riscv64Callee = Callee; + +/// Support for the Riscv64 ABI from the caller side (at a callsite). +pub(crate) type Riscv64ABICallSite = CallSite; + +/// This is the limit for the size of argument and return-value areas on the +/// stack. We place a reasonable limit here to avoid integer overflow issues +/// with 32-bit arithmetic: for now, 128 MB. +static STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024; + +/// Riscv64-specific ABI behavior. This struct just serves as an implementation +/// point for the trait; it is never actually instantiated. +pub struct Riscv64MachineDeps; + +impl IsaFlags for RiscvFlags {} + +impl RiscvFlags { + pub(crate) fn min_vec_reg_size(&self) -> u64 { + let entries = [ + (self.has_zvl65536b(), 65536), + (self.has_zvl32768b(), 32768), + (self.has_zvl16384b(), 16384), + (self.has_zvl8192b(), 8192), + (self.has_zvl4096b(), 4096), + (self.has_zvl2048b(), 2048), + (self.has_zvl1024b(), 1024), + (self.has_zvl512b(), 512), + (self.has_zvl256b(), 256), + // In order to claim the Application Profile V extension, a minimum + // register size of 128 is required. i.e. V implies Zvl128b. + (self.has_v(), 128), + (self.has_zvl128b(), 128), + (self.has_zvl64b(), 64), + (self.has_zvl32b(), 32), + ]; + + for (has_flag, size) in entries.into_iter() { + if !has_flag { + continue; + } + + // Due to a limitation in regalloc2, we can't support types + // larger than 1024 bytes. So limit that here. + return std::cmp::min(size, 1024); + } + + return 0; + } +} + +impl ABIMachineSpec for Riscv64MachineDeps { + type I = Inst; + type F = RiscvFlags; + + fn word_bits() -> u32 { + 64 + } + + /// Return required stack alignment in bytes. + fn stack_align(_call_conv: isa::CallConv) -> u32 { + 16 + } + + fn compute_arg_locs<'a, I>( + call_conv: isa::CallConv, + _flags: &settings::Flags, + params: I, + args_or_rets: ArgsOrRets, + add_ret_area_ptr: bool, + mut args: ArgsAccumulator<'_>, + ) -> CodegenResult<(u32, Option)> + where + I: IntoIterator, + { + // All registers that can be used as parameters or rets. + // both start and end are included. + let (x_start, x_end, f_start, f_end) = match (call_conv, args_or_rets) { + (isa::CallConv::Tail, _) => (9, 29, 0, 31), + (_, ArgsOrRets::Args) => (10, 17, 10, 17), + (_, ArgsOrRets::Rets) => (10, 11, 10, 11), + }; + let mut next_x_reg = x_start; + let mut next_f_reg = f_start; + // Stack space. + let mut next_stack: u32 = 0; + + for param in params { + if let ir::ArgumentPurpose::StructArgument(size) = param.purpose { + let offset = next_stack; + assert!(size % 8 == 0, "StructArgument size is not properly aligned"); + next_stack += size; + args.push(ABIArg::StructArg { + pointer: None, + offset: offset as i64, + size: size as u64, + purpose: param.purpose, + }); + continue; + } + + // Find regclass(es) of the register(s) used to store a value of this type. + let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?; + let mut slots = ABIArgSlotVec::new(); + for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) { + let next_reg = if (next_x_reg <= x_end) && *rc == RegClass::Int { + let x = Some(x_reg(next_x_reg)); + next_x_reg += 1; + x + } else if (next_f_reg <= f_end) && *rc == RegClass::Float { + let x = Some(f_reg(next_f_reg)); + next_f_reg += 1; + x + } else { + None + }; + if let Some(reg) = next_reg { + slots.push(ABIArgSlot::Reg { + reg: reg.to_real_reg().unwrap(), + ty: *reg_ty, + extension: param.extension, + }); + } else { + // Compute size and 16-byte stack alignment happens + // separately after all args. + let size = reg_ty.bits() / 8; + let size = std::cmp::max(size, 8); + // Align. + debug_assert!(size.is_power_of_two()); + next_stack = align_to(next_stack, size); + slots.push(ABIArgSlot::Stack { + offset: next_stack as i64, + ty: *reg_ty, + extension: param.extension, + }); + next_stack += size; + } + } + args.push(ABIArg::Slots { + slots, + purpose: param.purpose, + }); + } + let pos: Option = if add_ret_area_ptr { + assert!(ArgsOrRets::Args == args_or_rets); + if next_x_reg <= x_end { + let arg = ABIArg::reg( + x_reg(next_x_reg).to_real_reg().unwrap(), + I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + ); + args.push(arg); + } else { + let arg = ABIArg::stack( + next_stack as i64, + I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + ); + args.push(arg); + next_stack += 8; + } + Some(args.args().len() - 1) + } else { + None + }; + + next_stack = align_to(next_stack, Self::stack_align(call_conv)); + + // To avoid overflow issues, limit the arg/return size to something + // reasonable -- here, 128 MB. + if next_stack > STACK_ARG_RET_SIZE_LIMIT { + return Err(CodegenError::ImplLimitExceeded); + } + + Ok((next_stack, pos)) + } + + fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 { + // lr fp. + 16 + } + + fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Inst { + Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted()) + } + + fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst { + Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted()) + } + + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { + Inst::gen_move(to_reg, from_reg, ty) + } + + fn gen_extend( + to_reg: Writable, + from_reg: Reg, + signed: bool, + from_bits: u8, + to_bits: u8, + ) -> Inst { + assert!(from_bits < to_bits); + Inst::Extend { + rd: to_reg, + rn: from_reg, + signed, + from_bits, + to_bits, + } + } + + fn get_ext_mode( + _call_conv: isa::CallConv, + specified: ir::ArgumentExtension, + ) -> ir::ArgumentExtension { + specified + } + + fn gen_args(_isa_flags: &crate::isa::zkasm::settings::Flags, args: Vec) -> Inst { + Inst::Args { args } + } + + fn gen_ret( + _setup_frame: bool, + _isa_flags: &Self::F, + _call_conv: isa::CallConv, + rets: Vec, + stack_bytes_to_pop: u32, + ) -> Inst { + Inst::Ret { + rets, + stack_bytes_to_pop, + } + } + + fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg { + spilltmp_reg() + } + + fn gen_add_imm( + _call_conv: isa::CallConv, + into_reg: Writable, + from_reg: Reg, + imm: u32, + ) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + if let Some(imm12) = Imm12::maybe_from_u64(imm as u64) { + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: into_reg, + rs: from_reg, + imm12, + }); + } else { + insts.extend(Inst::load_constant_u32( + writable_spilltmp_reg2(), + imm as u64, + &mut |_| writable_spilltmp_reg2(), + )); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: into_reg, + rs1: spilltmp_reg2(), + rs2: from_reg, + }); + } + insts + } + + fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec { + let mut insts = SmallVec::new(); + insts.push(Inst::TrapIfC { + cc: IntCC::UnsignedLessThan, + rs1: stack_reg(), + rs2: limit_reg, + trap_code: ir::TrapCode::StackOverflow, + }); + insts + } + + fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable, _ty: Type) -> Inst { + Inst::LoadAddr { + rd: into_reg, + mem: mem.into(), + } + } + + fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Inst { + let mem = AMode::RegOffset(base, offset as i64, ty); + Inst::gen_load(into_reg, mem, ty, MemFlags::trusted()) + } + + fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst { + let mem = AMode::RegOffset(base, offset as i64, ty); + Inst::gen_store(mem, from_reg, ty, MemFlags::trusted()) + } + + fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec { + let mut insts = SmallVec::new(); + if amount == 0 { + return insts; + } + insts.push(Inst::AdjustSp { + amount: amount as i64, + }); + insts + } + + fn gen_nominal_sp_adj(offset: i32) -> Inst { + Inst::VirtualSPOffsetAdj { + amount: offset as i64, + } + } + + fn gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec { + // add sp,sp,-16 ;; alloc stack space for fp. + // sd ra,8(sp) ;; save ra. + // sd fp,0(sp) ;; store old fp. + // mv fp,sp ;; set fp to sp. + let mut insts = SmallVec::new(); + insts.push(Inst::AdjustSp { amount: -16 }); + insts.push(Self::gen_store_stack( + StackAMode::SPOffset(8, I64), + link_reg(), + I64, + )); + insts.push(Self::gen_store_stack( + StackAMode::SPOffset(0, I64), + fp_reg(), + I64, + )); + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::PushFrameRegs { + offset_upward_to_caller_sp: 16, // FP, LR + }, + }); + } + insts.push(Inst::Mov { + rd: writable_fp_reg(), + rm: stack_reg(), + ty: I64, + }); + insts + } + /// reverse of gen_prologue_frame_setup. + fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec { + let mut insts = SmallVec::new(); + insts.push(Self::gen_load_stack( + StackAMode::SPOffset(8, I64), + writable_link_reg(), + I64, + )); + insts.push(Self::gen_load_stack( + StackAMode::SPOffset(0, I64), + writable_fp_reg(), + I64, + )); + insts.push(Inst::AdjustSp { amount: 16 }); + insts + } + + fn gen_probestack(insts: &mut SmallInstVec, frame_size: u32) { + insts.extend(Inst::load_constant_u32( + writable_a0(), + frame_size as u64, + &mut |_| writable_a0(), + )); + insts.push(Inst::Call { + info: Box::new(CallInfo { + dest: ExternalName::LibCall(LibCall::Probestack), + uses: smallvec![CallArgPair { + vreg: a0(), + preg: a0(), + }], + defs: smallvec![], + clobbers: PRegSet::empty(), + opcode: Opcode::Call, + callee_callconv: CallConv::SystemV, + caller_callconv: CallConv::SystemV, + callee_pop_size: 0, + }), + }); + } + // Returns stack bytes used as well as instructions. Does not adjust + // nominal SP offset; abi_impl generic code will do that. + fn gen_clobber_save( + _call_conv: isa::CallConv, + setup_frame: bool, + flags: &settings::Flags, + clobbered_callee_saves: &[Writable], + fixed_frame_storage_size: u32, + _outgoing_args_size: u32, + ) -> (u64, SmallVec<[Inst; 16]>) { + let mut insts = SmallVec::new(); + let clobbered_size = compute_clobber_size(&clobbered_callee_saves); + // Adjust the stack pointer downward for clobbers and the function fixed + // frame (spillslots and storage slots). + let stack_size = fixed_frame_storage_size + clobbered_size; + if flags.unwind_info() && setup_frame { + // The *unwind* frame (but not the actual frame) starts at the + // clobbers, just below the saved FP/LR pair. + insts.push(Inst::Unwind { + inst: UnwindInst::DefineNewFrame { + offset_downward_to_clobbers: clobbered_size, + offset_upward_to_caller_sp: 16, // FP, LR + }, + }); + } + // Store each clobbered register in order at offsets from SP, + // placing them above the fixed frame slots. + if stack_size > 0 { + // since we use fp, we didn't need use UnwindInst::StackAlloc. + let mut cur_offset = 8; + for reg in clobbered_callee_saves { + let r_reg = reg.to_reg(); + let ty = match r_reg.class() { + RegClass::Int => I64, + RegClass::Float => F64, + RegClass::Vector => unimplemented!("Vector Clobber Saves"), + }; + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::SaveReg { + clobber_offset: clobbered_size - cur_offset, + reg: r_reg, + }, + }); + } + insts.push(Self::gen_store_stack( + StackAMode::SPOffset(-(cur_offset as i64), ty), + real_reg_to_reg(reg.to_reg()), + ty, + )); + cur_offset += 8 + } + insts.push(Inst::AdjustSp { + amount: -(stack_size as i64), + }); + } + (clobbered_size as u64, insts) + } + + fn gen_clobber_restore( + call_conv: isa::CallConv, + sig: &Signature, + _flags: &settings::Flags, + clobbers: &[Writable], + fixed_frame_storage_size: u32, + _outgoing_args_size: u32, + ) -> SmallVec<[Inst; 16]> { + let mut insts = SmallVec::new(); + let clobbered_callee_saves = + Self::get_clobbered_callee_saves(call_conv, _flags, sig, clobbers); + let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered_callee_saves); + if stack_size > 0 { + insts.push(Inst::AdjustSp { + amount: stack_size as i64, + }); + } + let mut cur_offset = 8; + for reg in &clobbered_callee_saves { + let rreg = reg.to_reg(); + let ty = match rreg.class() { + RegClass::Int => I64, + RegClass::Float => F64, + RegClass::Vector => unimplemented!("Vector Clobber Restores"), + }; + insts.push(Self::gen_load_stack( + StackAMode::SPOffset(-cur_offset, ty), + Writable::from_reg(real_reg_to_reg(reg.to_reg())), + ty, + )); + cur_offset += 8 + } + insts + } + + fn gen_call( + dest: &CallDest, + uses: CallArgList, + defs: CallRetList, + clobbers: PRegSet, + opcode: ir::Opcode, + tmp: Writable, + callee_conv: isa::CallConv, + caller_conv: isa::CallConv, + callee_pop_size: u32, + ) -> SmallVec<[Self::I; 2]> { + let mut insts = SmallVec::new(); + match &dest { + &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push(Inst::Call { + info: Box::new(CallInfo { + dest: name.clone(), + uses, + defs, + clobbers, + opcode, + caller_callconv: caller_conv, + callee_callconv: callee_conv, + callee_pop_size, + }), + }), + &CallDest::ExtName(ref name, RelocDistance::Far) => { + insts.push(Inst::LoadExtName { + rd: tmp, + name: Box::new(name.clone()), + offset: 0, + }); + insts.push(Inst::CallInd { + info: Box::new(CallIndInfo { + rn: tmp.to_reg(), + uses, + defs, + clobbers, + opcode, + caller_callconv: caller_conv, + callee_callconv: callee_conv, + callee_pop_size, + }), + }); + } + &CallDest::Reg(reg) => insts.push(Inst::CallInd { + info: Box::new(CallIndInfo { + rn: *reg, + uses, + defs, + clobbers, + opcode, + caller_callconv: caller_conv, + callee_callconv: callee_conv, + callee_pop_size, + }), + }), + } + insts + } + + fn gen_memcpy Writable>( + call_conv: isa::CallConv, + dst: Reg, + src: Reg, + size: usize, + mut alloc_tmp: F, + ) -> SmallVec<[Self::I; 8]> { + let mut insts = SmallVec::new(); + let arg0 = Writable::from_reg(x_reg(10)); + let arg1 = Writable::from_reg(x_reg(11)); + let arg2 = Writable::from_reg(x_reg(12)); + let tmp = alloc_tmp(Self::word_type()); + insts.extend(Inst::load_constant_u64(tmp, size as u64, &mut alloc_tmp).into_iter()); + insts.push(Inst::Call { + info: Box::new(CallInfo { + dest: ExternalName::LibCall(LibCall::Memcpy), + uses: smallvec![ + CallArgPair { + vreg: dst, + preg: arg0.to_reg() + }, + CallArgPair { + vreg: src, + preg: arg1.to_reg() + }, + CallArgPair { + vreg: tmp.to_reg(), + preg: arg2.to_reg() + } + ], + defs: smallvec![], + clobbers: Self::get_regs_clobbered_by_call(call_conv), + opcode: Opcode::Call, + caller_callconv: call_conv, + callee_callconv: call_conv, + callee_pop_size: 0, + }), + }); + insts + } + + fn get_number_of_spillslots_for_value( + rc: RegClass, + _target_vector_bytes: u32, + isa_flags: &RiscvFlags, + ) -> u32 { + // We allocate in terms of 8-byte slots. + match rc { + RegClass::Int => 1, + RegClass::Float => 1, + RegClass::Vector => (isa_flags.min_vec_reg_size() / 8) as u32, + } + } + + /// Get the current virtual-SP offset from an instruction-emission state. + fn get_virtual_sp_offset_from_state(s: &EmitState) -> i64 { + s.virtual_sp_offset + } + + /// Get the nominal-SP-to-FP offset from an instruction-emission state. + fn get_nominal_sp_to_fp(s: &EmitState) -> i64 { + s.nominal_sp_to_fp + } + + fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegSet { + if call_conv_of_callee == isa::CallConv::Tail { + TAIL_CLOBBERS + } else { + DEFAULT_CLOBBERS + } + } + + fn get_clobbered_callee_saves( + call_conv: isa::CallConv, + _flags: &settings::Flags, + _sig: &Signature, + regs: &[Writable], + ) -> Vec> { + let mut regs: Vec> = regs + .iter() + .cloned() + .filter(|r| is_reg_saved_in_prologue(call_conv, r.to_reg())) + .collect(); + + regs.sort(); + regs + } + + fn is_frame_setup_needed( + is_leaf: bool, + stack_args_size: u32, + num_clobbered_callee_saves: usize, + fixed_frame_storage_size: u32, + ) -> bool { + !is_leaf + // The function arguments that are passed on the stack are addressed + // relative to the Frame Pointer. + || stack_args_size > 0 + || num_clobbered_callee_saves > 0 + || fixed_frame_storage_size > 0 + } + + fn gen_inline_probestack( + insts: &mut SmallInstVec, + call_conv: isa::CallConv, + frame_size: u32, + guard_size: u32, + ) { + // Unroll at most n consecutive probes, before falling back to using a loop + const PROBE_MAX_UNROLL: u32 = 3; + // Number of probes that we need to perform + let probe_count = align_to(frame_size, guard_size) / guard_size; + + if probe_count <= PROBE_MAX_UNROLL { + Self::gen_probestack_unroll(insts, guard_size, probe_count) + } else { + Self::gen_probestack_loop(insts, call_conv, guard_size, probe_count) + } + } +} + +impl Riscv64ABICallSite { + pub fn emit_return_call(mut self, ctx: &mut Lower, args: isle::ValueSlice) { + let (new_stack_arg_size, old_stack_arg_size) = + self.emit_temporary_tail_call_frame(ctx, args); + + let dest = self.dest().clone(); + let opcode = self.opcode(); + let uses = self.take_uses(); + let info = Box::new(ReturnCallInfo { + uses, + opcode, + old_stack_arg_size, + new_stack_arg_size, + }); + + match dest { + CallDest::ExtName(name, RelocDistance::Near) => { + ctx.emit(Inst::ReturnCall { + callee: Box::new(name), + info, + }); + } + CallDest::ExtName(name, RelocDistance::Far) => { + let callee = ctx.alloc_tmp(ir::types::I64).only_reg().unwrap(); + ctx.emit(Inst::LoadExtName { + rd: callee, + name: Box::new(name), + offset: 0, + }); + ctx.emit(Inst::ReturnCallInd { + callee: callee.to_reg(), + info, + }); + } + CallDest::Reg(callee) => ctx.emit(Inst::ReturnCallInd { callee, info }), + } + } +} + +const CALLEE_SAVE_X_REG: [bool; 32] = [ + false, false, true, false, false, false, false, false, // 0-7 + true, true, false, false, false, false, false, false, // 8-15 + false, false, true, true, true, true, true, true, // 16-23 + true, true, true, true, false, false, false, false, // 24-31 +]; +const CALLEE_SAVE_F_REG: [bool; 32] = [ + false, false, false, false, false, false, false, false, // 0-7 + true, false, false, false, false, false, false, false, // 8-15 + false, false, true, true, true, true, true, true, // 16-23 + true, true, true, true, false, false, false, false, // 24-31 +]; + +/// This should be the registers that must be saved by callee. +#[inline] +fn is_reg_saved_in_prologue(conv: CallConv, reg: RealReg) -> bool { + if conv == CallConv::Tail { + return false; + } + + match reg.class() { + RegClass::Int => CALLEE_SAVE_X_REG[reg.hw_enc() as usize], + RegClass::Float => CALLEE_SAVE_F_REG[reg.hw_enc() as usize], + // All vector registers are caller saved. + RegClass::Vector => false, + } +} + +fn compute_clobber_size(clobbers: &[Writable]) -> u32 { + let mut clobbered_size = 0; + for reg in clobbers { + match reg.to_reg().class() { + RegClass::Int => { + clobbered_size += 8; + } + RegClass::Float => { + clobbered_size += 8; + } + RegClass::Vector => unimplemented!("Vector Size Clobbered"), + } + } + align_to(clobbered_size, 16) +} + +const fn default_clobbers() -> PRegSet { + PRegSet::empty() + .with(px_reg(1)) + .with(px_reg(5)) + .with(px_reg(6)) + .with(px_reg(7)) + .with(px_reg(10)) + .with(px_reg(11)) + .with(px_reg(12)) + .with(px_reg(13)) + .with(px_reg(14)) + .with(px_reg(15)) + .with(px_reg(16)) + .with(px_reg(17)) + .with(px_reg(28)) + .with(px_reg(29)) + .with(px_reg(30)) + .with(px_reg(31)) + // F Regs + .with(pf_reg(0)) + .with(pf_reg(1)) + .with(pf_reg(2)) + .with(pf_reg(3)) + .with(pf_reg(4)) + .with(pf_reg(5)) + .with(pf_reg(6)) + .with(pf_reg(7)) + .with(pf_reg(9)) + .with(pf_reg(10)) + .with(pf_reg(11)) + .with(pf_reg(12)) + .with(pf_reg(13)) + .with(pf_reg(14)) + .with(pf_reg(15)) + .with(pf_reg(16)) + .with(pf_reg(17)) + .with(pf_reg(28)) + .with(pf_reg(29)) + .with(pf_reg(30)) + .with(pf_reg(31)) + // V Regs - All vector regs get clobbered + .with(pv_reg(0)) + .with(pv_reg(1)) + .with(pv_reg(2)) + .with(pv_reg(3)) + .with(pv_reg(4)) + .with(pv_reg(5)) + .with(pv_reg(6)) + .with(pv_reg(7)) + .with(pv_reg(8)) + .with(pv_reg(9)) + .with(pv_reg(10)) + .with(pv_reg(11)) + .with(pv_reg(12)) + .with(pv_reg(13)) + .with(pv_reg(14)) + .with(pv_reg(15)) + .with(pv_reg(16)) + .with(pv_reg(17)) + .with(pv_reg(18)) + .with(pv_reg(19)) + .with(pv_reg(20)) + .with(pv_reg(21)) + .with(pv_reg(22)) + .with(pv_reg(23)) + .with(pv_reg(24)) + .with(pv_reg(25)) + .with(pv_reg(26)) + .with(pv_reg(27)) + .with(pv_reg(28)) + .with(pv_reg(29)) + .with(pv_reg(30)) + .with(pv_reg(31)) +} + +const DEFAULT_CLOBBERS: PRegSet = default_clobbers(); + +// All allocatable registers are clobbered by calls using the `tail` calling +// convention. +const fn tail_clobbers() -> PRegSet { + PRegSet::empty() + // `x0` is the zero register, and not allocatable. + .with(px_reg(1)) + // `x2` is the stack pointer, `x3` is the global pointer, and `x4` is + // the thread pointer. None are allocatable. + .with(px_reg(5)) + .with(px_reg(6)) + .with(px_reg(7)) + // `x8` is the frame pointer, and not allocatable. + .with(px_reg(9)) + .with(px_reg(10)) + .with(px_reg(10)) + .with(px_reg(11)) + .with(px_reg(12)) + .with(px_reg(13)) + .with(px_reg(14)) + .with(px_reg(15)) + .with(px_reg(16)) + .with(px_reg(17)) + .with(px_reg(18)) + .with(px_reg(19)) + .with(px_reg(20)) + .with(px_reg(21)) + .with(px_reg(22)) + .with(px_reg(23)) + .with(px_reg(24)) + .with(px_reg(25)) + .with(px_reg(26)) + .with(px_reg(27)) + .with(px_reg(28)) + .with(px_reg(29)) + // `x30` and `x31` are reserved as scratch registers, and are not + // allocatable. + // + // F Regs + .with(pf_reg(0)) + .with(pf_reg(1)) + .with(pf_reg(2)) + .with(pf_reg(3)) + .with(pf_reg(4)) + .with(pf_reg(5)) + .with(pf_reg(6)) + .with(pf_reg(7)) + .with(pf_reg(9)) + .with(pf_reg(10)) + .with(pf_reg(11)) + .with(pf_reg(12)) + .with(pf_reg(13)) + .with(pf_reg(14)) + .with(pf_reg(15)) + .with(pf_reg(16)) + .with(pf_reg(17)) + .with(pf_reg(18)) + .with(pf_reg(19)) + .with(pf_reg(20)) + .with(pf_reg(21)) + .with(pf_reg(22)) + .with(pf_reg(23)) + .with(pf_reg(24)) + .with(pf_reg(25)) + .with(pf_reg(26)) + .with(pf_reg(27)) + .with(pf_reg(28)) + .with(pf_reg(29)) + .with(pf_reg(30)) + .with(pf_reg(31)) + // V Regs + .with(pv_reg(0)) + .with(pv_reg(1)) + .with(pv_reg(2)) + .with(pv_reg(3)) + .with(pv_reg(4)) + .with(pv_reg(5)) + .with(pv_reg(6)) + .with(pv_reg(7)) + .with(pv_reg(8)) + .with(pv_reg(9)) + .with(pv_reg(10)) + .with(pv_reg(11)) + .with(pv_reg(12)) + .with(pv_reg(13)) + .with(pv_reg(14)) + .with(pv_reg(15)) + .with(pv_reg(16)) + .with(pv_reg(17)) + .with(pv_reg(18)) + .with(pv_reg(19)) + .with(pv_reg(20)) + .with(pv_reg(21)) + .with(pv_reg(22)) + .with(pv_reg(23)) + .with(pv_reg(24)) + .with(pv_reg(25)) + .with(pv_reg(26)) + .with(pv_reg(27)) + .with(pv_reg(28)) + .with(pv_reg(29)) + .with(pv_reg(30)) + .with(pv_reg(31)) +} + +const TAIL_CLOBBERS: PRegSet = tail_clobbers(); + +impl Riscv64MachineDeps { + fn gen_probestack_unroll(insts: &mut SmallInstVec, guard_size: u32, probe_count: u32) { + insts.reserve(probe_count as usize); + for i in 0..probe_count { + let offset = (guard_size * (i + 1)) as i64; + insts.push(Self::gen_store_stack( + StackAMode::SPOffset(-offset, I8), + zero_reg(), + I32, + )); + } + } + + fn gen_probestack_loop( + insts: &mut SmallInstVec, + call_conv: isa::CallConv, + guard_size: u32, + probe_count: u32, + ) { + // Must be a caller-saved register that is not an argument. + let tmp = match call_conv { + isa::CallConv::Tail => Writable::from_reg(x_reg(1)), + _ => Writable::from_reg(x_reg(28)), // t3 + }; + insts.push(Inst::StackProbeLoop { + guard_size, + probe_count, + tmp, + }); + } +} diff --git a/cranelift/codegen/src/isa/zkasm/inst.isle b/cranelift/codegen/src/isa/zkasm/inst.isle new file mode 100644 index 000000000000..d2f3933f3bd6 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst.isle @@ -0,0 +1,2964 @@ +;; Instruction formats. +(type MInst + (enum + ;; A no-op of zero size. + (Nop0) + (Nop4) + + ;; load immediate + (Lui + (rd WritableReg) + (imm Imm20)) + + (LoadConst32 + (rd WritableReg) + (imm u32)) + + (LoadConst64 + (rd WritableReg) + (imm u64)) + + (Auipc + (rd WritableReg) + (imm Imm20)) + + ;; An ALU operation with one register sources and a register destination. + (FpuRR + (alu_op FpuOPRR) + (frm OptionFloatRoundingMode) + (rd WritableReg) + (rs Reg)) + + + ;; An ALU operation with two register sources and a register destination. + (AluRRR + (alu_op AluOPRRR) + (rd WritableReg) + (rs1 Reg) + (rs2 Reg)) + + ;; An ALU operation with two register sources and a register destination. + (FpuRRR + (alu_op FpuOPRRR) + (frm OptionFloatRoundingMode) + (rd WritableReg) + (rs1 Reg) + (rs2 Reg)) + + ;; An ALU operation with three register sources and a register destination. + (FpuRRRR + (alu_op FpuOPRRRR) + (frm OptionFloatRoundingMode) + (rd WritableReg) + (rs1 Reg) + (rs2 Reg) + (rs3 Reg)) + + ;; An ALU operation with a register source and an immediate-12 source, and a register + ;; destination. + (AluRRImm12 + (alu_op AluOPRRI) + (rd WritableReg) + (rs Reg) + (imm12 Imm12)) + + ;; An load + (Load + (rd WritableReg) + (op LoadOP) + (flags MemFlags) + (from AMode)) + ;; An Store + (Store + (to AMode) + (op StoreOP) + (flags MemFlags) + (src Reg)) + + ;; A pseudo-instruction that captures register arguments in vregs. + (Args + (args VecArgPair)) + + (Ret (rets VecRetPair) + (stack_bytes_to_pop u32)) + + (Extend + (rd WritableReg) + (rn Reg) + (signed bool) + (from_bits u8) + (to_bits u8)) + + (AdjustSp + (amount i64)) + (Call + (info BoxCallInfo)) + + ;; A machine indirect-call instruction. + (CallInd + (info BoxCallIndInfo)) + + ;; A direct return-call macro instruction. + (ReturnCall + (callee BoxExternalName) + (info BoxReturnCallInfo)) + + ;; An indirect return-call macro instruction. + (ReturnCallInd + (callee Reg) + (info BoxReturnCallInfo)) + + (TrapIf + (test Reg) + (trap_code TrapCode)) + + ;; use a simple compare to decide to cause trap or not. + (TrapIfC + (rs1 Reg) + (rs2 Reg) + (cc IntCC) + (trap_code TrapCode)) + + (Jal + ;; (rd WritableReg) don't use + (dest BranchTarget)) + + (CondBr + (taken BranchTarget) + (not_taken BranchTarget) + (kind IntegerCompare)) + + ;; Load an inline symbol reference. + (LoadExtName + (rd WritableReg) + (name BoxExternalName) + (offset i64)) + + ;; Load address referenced by `mem` into `rd`. + (LoadAddr + (rd WritableReg) + (mem AMode)) + + ;; Marker, no-op in generated code: SP "virtual offset" is adjusted. This + ;; controls how AMode::NominalSPOffset args are lowered. + (VirtualSPOffsetAdj + (amount i64)) + + ;; A MOV instruction. These are encoded as OrR's (AluRRR form) but we + ;; keep them separate at the `Inst` level for better pretty-printing + ;; and faster `is_move()` logic. + (Mov + (rd WritableReg) + (rm Reg) + (ty Type)) + + ;; A MOV instruction, but where the source register is a non-allocatable + ;; PReg. It's important that the register be non-allocatable, as regalloc2 + ;; will not see it as used. + (MovFromPReg + (rd WritableReg) + (rm PReg)) + + (Fence + (pred FenceReq) + (succ FenceReq)) + + (FenceI) + + (ECall) + + (EBreak) + + ;; An instruction guaranteed to always be undefined and to trigger an illegal instruction at + ;; runtime. + (Udf + (trap_code TrapCode)) + ;; a jump and link register operation + (Jalr + ;;Plain unconditional jumps (assembler pseudo-op J) are encoded as a JAL with rd=x0. + (rd WritableReg) + (base Reg) + (offset Imm12)) + + ;; atomic operations. + (Atomic + (op AtomicOP) + (rd WritableReg) + (addr Reg) + (src Reg) + (amo AMO)) + ;; an atomic store + (AtomicStore + (src Reg) + (ty Type) + (p Reg)) + ;; an atomic load. + (AtomicLoad + (rd WritableReg) + (ty Type) + (p Reg)) + + ;; an atomic nand need using loop to implement. + (AtomicRmwLoop + (offset Reg) + (op AtomicRmwOp) + (dst WritableReg) + (ty Type) + (p Reg) + (x Reg) + (t0 WritableReg)) + + ;; select x or y base on condition + (Select + (dst VecWritableReg) + (ty Type) + (condition Reg) + (x ValueRegs) + (y ValueRegs)) + + (BrTable + (index Reg) + (tmp1 WritableReg) + (tmp2 WritableReg) + (targets VecBranchTarget)) + + ;; atomic compare and set operation + (AtomicCas + (offset Reg) + (t0 WritableReg) + (dst WritableReg) + (e Reg) + (addr Reg) + (v Reg) + (ty Type)) + ;; select x or y base on op_code + (IntSelect + (op IntSelectOP) + (dst VecWritableReg) + (x ValueRegs) + (y ValueRegs) + (ty Type)) + ;; an integer compare. + (Icmp + (cc IntCC) + (rd WritableReg) + (a ValueRegs) + (b ValueRegs) + (ty Type)) + ;; select a reg base on condition. + ;; very useful because in lowering stage we can not have condition branch. + (SelectReg + (rd WritableReg) + (rs1 Reg) + (rs2 Reg) + (condition IntegerCompare)) + ;; + (FcvtToInt + (is_sat bool) + (rd WritableReg) + (tmp WritableReg) ;; a float register to load bounds. + (rs Reg) + (is_signed bool) + (in_type Type) + (out_type Type)) + + (RawData (data VecU8)) + + ;; An unwind pseudo-instruction. + (Unwind + (inst UnwindInst)) + + ;; A dummy use, useful to keep a value alive. + (DummyUse + (reg Reg)) + ;;; + (FloatRound + (op FloatRoundOP) + (rd WritableReg) + (int_tmp WritableReg) + (f_tmp WritableReg) + (rs Reg) + (ty Type)) + ;;;; FMax + (FloatSelect + (op FloatSelectOP) + (rd WritableReg) + ;; a integer register + (tmp WritableReg) + (rs1 Reg) + (rs2 Reg) + (ty Type)) + + ;; popcnt if target doesn't support extension B + ;; use iteration to implement. + (Popcnt + (sum WritableReg) + (step WritableReg) + (tmp WritableReg) + (rs Reg) + (ty Type)) + + ;;; counting leading or trailing zeros. + (Cltz + ;; leading or trailing. + (leading bool) + (sum WritableReg) + (step WritableReg) + (tmp WritableReg) + (rs Reg) + (ty Type)) + ;; Byte-reverse register + (Rev8 + (rs Reg) + (step WritableReg) + (tmp WritableReg) + (rd WritableReg)) + ;; + (Brev8 + (rs Reg) + (ty Type) + (step WritableReg) + (tmp WritableReg) + (tmp2 WritableReg) + (rd WritableReg)) + (StackProbeLoop + (guard_size u32) + (probe_count u32) + (tmp WritableReg)) + + (VecAluRRRR + (op VecAluOpRRRR) + (vd WritableReg) + (vd_src Reg) + (vs2 Reg) + (vs1 Reg) + (mask VecOpMasking) + (vstate VState)) + + (VecAluRRRImm5 + (op VecAluOpRRRImm5) + (vd WritableReg) + (vd_src Reg) + (vs2 Reg) + (imm Imm5) + (mask VecOpMasking) + (vstate VState)) + + (VecAluRRR + (op VecAluOpRRR) + (vd WritableReg) + (vs2 Reg) + (vs1 Reg) + (mask VecOpMasking) + (vstate VState)) + + (VecAluRRImm5 + (op VecAluOpRRImm5) + (vd WritableReg) + (vs2 Reg) + (imm Imm5) + (mask VecOpMasking) + (vstate VState)) + + (VecAluRR + (op VecAluOpRR) + (vd WritableReg) + (vs Reg) + (mask VecOpMasking) + (vstate VState)) + + (VecAluRImm5 + (op VecAluOpRImm5) + (vd WritableReg) + (imm Imm5) + (mask VecOpMasking) + (vstate VState)) + + (VecSetState + (rd WritableReg) + (vstate VState)) + + (VecLoad + (eew VecElementWidth) + (to WritableReg) + (from VecAMode) + (flags MemFlags) + (mask VecOpMasking) + (vstate VState)) + + (VecStore + (eew VecElementWidth) + (to VecAMode) + (from Reg) + (flags MemFlags) + (mask VecOpMasking) + (vstate VState)) +)) + + +(type FloatSelectOP (enum + (Max) + (Min) +)) + +(type FloatRoundOP (enum + (Nearest) + (Ceil) + (Floor) + (Trunc) +)) + +(type IntSelectOP (enum + (Smax) + (Umax) + (Smin) + (Umin) +)) + +(type AtomicOP (enum + (LrW) + (ScW) + (AmoswapW) + (AmoaddW) + (AmoxorW) + (AmoandW) + (AmoorW) + (AmominW) + (AmomaxW) + (AmominuW) + (AmomaxuW) + (LrD) + (ScD) + (AmoswapD) + (AmoaddD) + (AmoxorD) + (AmoandD) + (AmoorD) + (AmominD) + (AmomaxD) + (AmominuD) + (AmomaxuD) +)) + +(type FpuOPRRRR (enum + ;; float32 + (FmaddS) + (FmsubS) + (FnmsubS) + (FnmaddS) + ;; float64 + (FmaddD) + (FmsubD) + (FnmsubD) + (FnmaddD) +)) + +(type FClassResult (enum + ;;0 rs1 is −∞. + (NegInfinite) + ;; 1 rs1 is a negative normal number. + (NegNormal) + ;; 2 rs1 is a negative subnormal number. + (NegSubNormal) + ;; 3 rs1 is −0. + (NegZero) + ;; 4 rs1 is +0. + (PosZero) + ;; 5 rs1 is a positive subnormal number. + (PosSubNormal) + ;; 6 rs1 is a positive normal number. + (PosNormal) + ;; 7 rs1 is +∞. + (PosInfinite) + ;; 8 rs1 is a signaling NaN. + (SNaN) + ;; 9 rs1 is a quiet NaN. + (QNaN) +)) + +(type FpuOPRR (enum + ;; RV32F Standard Extension + (FsqrtS) + (FcvtWS) + (FcvtWuS) + (FmvXW) + (FclassS) + (FcvtSw) + (FcvtSwU) + (FmvWX) + + + ;; RV64F Standard Extension (in addition to RV32F) + (FcvtLS) + (FcvtLuS) + (FcvtSL) + (FcvtSLU) + + + ;; RV64D Standard Extension (in addition to RV32D) + (FcvtLD) + (FcvtLuD) + (FmvXD) + (FcvtDL) + (FcvtDLu) + (FmvDX) + + ;; RV32D Standard Extension + (FsqrtD) + (FcvtSD) + (FcvtDS) + (FclassD) + (FcvtWD) + (FcvtWuD) + (FcvtDW) + (FcvtDWU) + ;; bitmapip + +)) + +(type LoadOP (enum + (Lb) + (Lh) + (Lw) + (Lbu) + (Lhu) + (Lwu) + (Ld) + (Flw) + (Fld) +)) + +(type StoreOP (enum + (Sb) + (Sh) + (Sw) + (Sd) + (Fsw) + (Fsd) +)) + +(type AluOPRRR (enum + ;; base set + (Add) + (Sub) + (Sll) + (Slt) + (SltU) + (Sgt) + (Sgtu) + (Xor) + (Srl) + (Sra) + (Or) + (And) + + ;; RV64I Base Instruction Set (in addition to RV32I) + (Addw) + (Subw) + (Sllw) + (Srlw) + (Sraw) + + + ;;RV32M Standard Extension + (Mul) + (Mulh) + (Mulhsu) + (Mulhu) + (Div) + (DivU) + (Rem) + (RemU) + + ;; RV64M Standard Extension (in addition to RV32M) + (Mulw) + (Divw) + (Divuw) + (Remw) + (Remuw) + + ;; Zba: Address Generation Instructions + (Adduw) + (Sh1add) + (Sh1adduw) + (Sh2add) + (Sh2adduw) + (Sh3add) + (Sh3adduw) + + ;; Zbb: Bit Manipulation Instructions + (Andn) + (Orn) + (Xnor) + (Max) + (Maxu) + (Min) + (Minu) + (Rol) + (Rolw) + (Ror) + (Rorw) + + ;; Zbs: Single-bit instructions + (Bclr) + (Bext) + (Binv) + (Bset) + + ;; Zbc: Carry-less multiplication + (Clmul) + (Clmulh) + (Clmulr) + + ;; Zbkb: Bit-manipulation for Cryptography + (Pack) + (Packw) + (Packh) +)) + + +(type FpuOPRRR (enum + ;; RV32F Standard Extension + (FaddS) + (FsubS) + (FmulS) + (FdivS) + + (FsgnjS) + (FsgnjnS) + (FsgnjxS) + (FminS) + (FmaxS) + (FeqS) + (FltS) + (FleS) + + ;; RV32D Standard Extension + (FaddD) + (FsubD) + (FmulD) + (FdivD) + (FsgnjD) + (FsgnjnD) + (FsgnjxD) + (FminD) + (FmaxD) + (FeqD) + (FltD) + (FleD) +)) + + + +(type AluOPRRI (enum + ;; Base ISA + (Addi) + (Slti) + (SltiU) + (Xori) + (Ori) + (Andi) + (Slli) + (Srli) + (Srai) + (Addiw) + (Slliw) + (SrliW) + (Sraiw) + + ;; Zba: Address Generation Instructions + (SlliUw) + + ;; Zbb: Bit Manipulation Instructions + (Clz) + (Clzw) + (Ctz) + (Ctzw) + (Cpop) + (Cpopw) + (Sextb) + (Sexth) + (Zexth) + (Rori) + (Roriw) + (Rev8) + (Brev8) + (Orcb) + + ;; Zbs: Single-bit instructions + (Bclri) + (Bexti) + (Binvi) + (Bseti) +)) + + +(type FRM (enum + ;; Round to Nearest, ties to Even + (RNE) + ;; Round towards Zero + (RTZ) + ;; Round Down (towards −∞) + (RDN) + ;; Round Up (towards +∞) + (RUP) + ;; Round to Nearest, ties to Max Magnitude + (RMM) + ;; In instruction’s rm field, selects dynamic rounding mode; + ;;In Rounding Mode register, Invalid. + (Fcsr) +)) + +(type FFlagsException (enum + ;; Invalid Operation + (NV) + ;; Divide by Zero + (DZ) + ;; Overflow + (OF) + ;; Underflow + (UF) + ;; Inexact + (NX) +)) + +;;;; input output read write +;;;; SI SO SR SW +;;;; PI PO PR PW +;;;; lowest four bit are used. +(type FenceReq (primitive u8)) + +(type VecBranchTarget (primitive VecBranchTarget)) +(type BoxCallInfo (primitive BoxCallInfo)) +(type BoxCallIndInfo (primitive BoxCallIndInfo)) +(type BoxReturnCallInfo (primitive BoxReturnCallInfo)) +(type IntegerCompare (primitive IntegerCompare)) +(type AMode (primitive AMode)) +(type OptionReg (primitive OptionReg)) +(type OptionImm12 (primitive OptionImm12)) +(type OptionUimm5 (primitive OptionUimm5)) +(type Imm12 (primitive Imm12)) +(type UImm5 (primitive UImm5)) +(type Imm5 (primitive Imm5)) +(type Imm20 (primitive Imm20)) +(type Imm3 (primitive Imm3)) +(type BranchTarget (primitive BranchTarget)) +(type OptionFloatRoundingMode (primitive OptionFloatRoundingMode)) +(type VecU8 (primitive VecU8)) +(type AMO (primitive AMO)) +(type VecMachLabel extern (enum)) + + +;;;; Newtypes for Different Register Classes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(type XReg (primitive XReg)) +(type WritableXReg (primitive WritableXReg)) +(type FReg (primitive FReg)) +(type WritableFReg (primitive WritableFReg)) +(type VReg (primitive VReg)) +(type WritableVReg (primitive WritableVReg)) + +;; Construct a new `XReg` from a `Reg`. +;; +;; Asserts that the register has a Integer RegClass. +(decl xreg_new (Reg) XReg) +(extern constructor xreg_new xreg_new) +(convert Reg XReg xreg_new) + +;; Construct a new `WritableXReg` from a `WritableReg`. +;; +;; Asserts that the register has a Integer RegClass. +(decl writable_xreg_new (WritableReg) WritableXReg) +(extern constructor writable_xreg_new writable_xreg_new) +(convert WritableReg WritableXReg writable_xreg_new) + +;; Put a value into a XReg. +;; +;; Asserts that the value goes into a XReg. +(decl put_in_xreg (Value) XReg) +(rule (put_in_xreg val) (xreg_new (put_in_reg val))) +(convert Value XReg put_in_xreg) + +;; Construct an `InstOutput` out of a single XReg register. +(decl output_xreg (XReg) InstOutput) +(rule (output_xreg x) (output_reg x)) +(convert XReg InstOutput output_xreg) + +;; Convert a `WritableXReg` to an `XReg`. +(decl pure writable_xreg_to_xreg (WritableXReg) XReg) +(extern constructor writable_xreg_to_xreg writable_xreg_to_xreg) +(convert WritableXReg XReg writable_xreg_to_xreg) + +;; Convert a `WritableXReg` to an `WritableReg`. +(decl pure writable_xreg_to_writable_reg (WritableXReg) WritableReg) +(extern constructor writable_xreg_to_writable_reg writable_xreg_to_writable_reg) +(convert WritableXReg WritableReg writable_xreg_to_writable_reg) + +;; Convert a `WritableXReg` to an `Reg`. +(decl pure writable_xreg_to_reg (WritableXReg) Reg) +(rule (writable_xreg_to_reg x) (writable_xreg_to_writable_reg x)) +(convert WritableXReg Reg writable_xreg_to_reg) + +;; Convert an `XReg` to a `Reg`. +(decl pure xreg_to_reg (XReg) Reg) +(extern constructor xreg_to_reg xreg_to_reg) +(convert XReg Reg xreg_to_reg) + +;; Convert a `XReg` to a `ValueRegs`. +(decl xreg_to_value_regs (XReg) ValueRegs) +(rule (xreg_to_value_regs x) (value_reg x)) +(convert XReg ValueRegs xreg_to_reg) + +;; Convert a `WritableXReg` to a `ValueRegs`. +(decl writable_xreg_to_value_regs (WritableXReg) ValueRegs) +(rule (writable_xreg_to_value_regs x) (value_reg x)) +(convert WritableXReg ValueRegs writable_xreg_to_value_regs) + +;; Allocates a new `WritableXReg`. +(decl temp_writable_xreg () WritableXReg) +(rule (temp_writable_xreg) (temp_writable_reg $I64)) + + +;; Construct a new `FReg` from a `Reg`. +;; +;; Asserts that the register has a Float RegClass. +(decl freg_new (Reg) FReg) +(extern constructor freg_new freg_new) +(convert Reg FReg freg_new) + +;; Construct a new `WritableFReg` from a `WritableReg`. +;; +;; Asserts that the register has a Float RegClass. +(decl writable_freg_new (WritableReg) WritableFReg) +(extern constructor writable_freg_new writable_freg_new) +(convert WritableReg WritableFReg writable_freg_new) + +;; Put a value into a FReg. +;; +;; Asserts that the value goes into a FReg. +(decl put_in_freg (Value) FReg) +(rule (put_in_freg val) (freg_new (put_in_reg val))) +(convert Value FReg put_in_freg) + +;; Construct an `InstOutput` out of a single FReg register. +(decl output_freg (FReg) InstOutput) +(rule (output_freg x) (output_reg x)) +(convert FReg InstOutput output_freg) + +;; Convert a `WritableFReg` to an `FReg`. +(decl pure writable_freg_to_freg (WritableFReg) FReg) +(extern constructor writable_freg_to_freg writable_freg_to_freg) +(convert WritableFReg FReg writable_freg_to_freg) + +;; Convert a `WritableFReg` to an `WritableReg`. +(decl pure writable_freg_to_writable_reg (WritableFReg) WritableReg) +(extern constructor writable_freg_to_writable_reg writable_freg_to_writable_reg) +(convert WritableFReg WritableReg writable_freg_to_writable_reg) + +;; Convert a `WritableFReg` to an `Reg`. +(decl pure writable_freg_to_reg (WritableFReg) Reg) +(rule (writable_freg_to_reg x) (writable_freg_to_writable_reg x)) +(convert WritableFReg Reg writable_freg_to_reg) + +;; Convert an `FReg` to a `Reg`. +(decl pure freg_to_reg (FReg) Reg) +(extern constructor freg_to_reg freg_to_reg) +(convert FReg Reg freg_to_reg) + +;; Convert a `FReg` to a `ValueRegs`. +(decl freg_to_value_regs (FReg) ValueRegs) +(rule (freg_to_value_regs x) (value_reg x)) +(convert FReg ValueRegs xreg_to_reg) + +;; Convert a `WritableFReg` to a `ValueRegs`. +(decl writable_freg_to_value_regs (WritableFReg) ValueRegs) +(rule (writable_freg_to_value_regs x) (value_reg x)) +(convert WritableFReg ValueRegs writable_freg_to_value_regs) + +;; Allocates a new `WritableFReg`. +(decl temp_writable_freg () WritableFReg) +(rule (temp_writable_freg) (temp_writable_reg $F64)) + + + +;; Construct a new `VReg` from a `Reg`. +;; +;; Asserts that the register has a Vector RegClass. +(decl vreg_new (Reg) VReg) +(extern constructor vreg_new vreg_new) +(convert Reg VReg vreg_new) + +;; Construct a new `WritableVReg` from a `WritableReg`. +;; +;; Asserts that the register has a Vector RegClass. +(decl writable_vreg_new (WritableReg) WritableVReg) +(extern constructor writable_vreg_new writable_vreg_new) +(convert WritableReg WritableVReg writable_vreg_new) + +;; Put a value into a VReg. +;; +;; Asserts that the value goes into a VReg. +(decl put_in_vreg (Value) VReg) +(rule (put_in_vreg val) (vreg_new (put_in_reg val))) +(convert Value VReg put_in_vreg) + +;; Construct an `InstOutput` out of a single VReg register. +(decl output_vreg (VReg) InstOutput) +(rule (output_vreg x) (output_reg x)) +(convert VReg InstOutput output_vreg) + +;; Convert a `WritableVReg` to an `VReg`. +(decl pure writable_vreg_to_vreg (WritableVReg) VReg) +(extern constructor writable_vreg_to_vreg writable_vreg_to_vreg) +(convert WritableVReg VReg writable_vreg_to_vreg) + +;; Convert a `WritableVReg` to an `WritableReg`. +(decl pure writable_vreg_to_writable_reg (WritableVReg) WritableReg) +(extern constructor writable_vreg_to_writable_reg writable_vreg_to_writable_reg) +(convert WritableVReg WritableReg writable_vreg_to_writable_reg) + +;; Convert a `WritableVReg` to an `Reg`. +(decl pure writable_vreg_to_reg (WritableVReg) Reg) +(rule (writable_vreg_to_reg x) (writable_vreg_to_writable_reg x)) +(convert WritableVReg Reg writable_vreg_to_reg) + +;; Convert an `VReg` to a `Reg`. +(decl pure vreg_to_reg (VReg) Reg) +(extern constructor vreg_to_reg vreg_to_reg) +(convert VReg Reg vreg_to_reg) + +;; Convert a `VReg` to a `ValueRegs`. +(decl vreg_to_value_regs (VReg) ValueRegs) +(rule (vreg_to_value_regs x) (value_reg x)) +(convert VReg ValueRegs xreg_to_reg) + +;; Convert a `WritableVReg` to a `ValueRegs`. +(decl writable_vreg_to_value_regs (WritableVReg) ValueRegs) +(rule (writable_vreg_to_value_regs x) (value_reg x)) +(convert WritableVReg ValueRegs writable_vreg_to_value_regs) + +;; Allocates a new `WritableVReg`. +(decl temp_writable_vreg () WritableVReg) +(rule (temp_writable_vreg) (temp_writable_reg $I8X16)) + + +;; Converters + +(convert u8 i32 u8_as_i32) +(decl u8_as_i32 (u8) i32) +(extern constructor u8_as_i32 u8_as_i32) + +;; ISA Extension helpers + +(decl pure has_v () bool) +(extern constructor has_v has_v) + +(decl pure has_zbkb () bool) +(extern constructor has_zbkb has_zbkb) + +(decl pure has_zba () bool) +(extern constructor has_zba has_zba) + +(decl pure has_zbb () bool) +(extern constructor has_zbb has_zbb) + +(decl pure has_zbc () bool) +(extern constructor has_zbc has_zbc) + +(decl pure has_zbs () bool) +(extern constructor has_zbs has_zbs) + +(decl gen_float_round (FloatRoundOP Reg Type) Reg) +(rule + (gen_float_round op rs ty) + (let + ((rd WritableReg (temp_writable_reg ty)) + (tmp WritableXReg (temp_writable_xreg)) + (tmp2 WritableFReg (temp_writable_freg)) + (_ Unit (emit (MInst.FloatRound op rd tmp tmp2 rs ty)))) + (writable_reg_to_reg rd))) + +(decl gen_float_select (FloatSelectOP Reg Reg Type) Reg) +(rule + (gen_float_select op x y ty) + (let + ((rd WritableReg (temp_writable_reg ty)) + (tmp WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.FloatSelect op rd tmp x y ty)))) + (writable_reg_to_reg rd))) + + +;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; RV32I Base Integer Instruction Set + +;; Helper for emitting the `add` instruction. +;; rd ← rs1 + rs2 +(decl rv_add (XReg XReg) XReg) +(rule (rv_add rs1 rs2) + (alu_rrr (AluOPRRR.Add) rs1 rs2)) + +;; Helper for emitting the `addi` ("Add Immediate") instruction. +;; rd ← rs1 + sext(imm) +(decl rv_addi (XReg Imm12) XReg) +(rule (rv_addi rs1 imm) + (alu_rr_imm12 (AluOPRRI.Addi) rs1 imm)) + +;; Helper for emitting the `sub` instruction. +;; rd ← rs1 - rs2 +(decl rv_sub (XReg XReg) XReg) +(rule (rv_sub rs1 rs2) + (alu_rrr (AluOPRRR.Sub) rs1 rs2)) + +;; Helper for emitting the `neg` instruction. +;; This instruction is a mnemonic for `sub rd, zero, rs1`. +(decl rv_neg (XReg) XReg) +(rule (rv_neg rs1) + (alu_rrr (AluOPRRR.Sub) (zero_reg) rs1)) + +;; Helper for emitting the `sll` ("Shift Left Logical") instruction. +;; rd ← rs1 << rs2 +(decl rv_sll (XReg XReg) XReg) +(rule (rv_sll rs1 rs2) + (alu_rrr (AluOPRRR.Sll) rs1 rs2)) + +;; Helper for emitting the `slli` ("Shift Left Logical Immediate") instruction. +;; rd ← rs1 << uext(imm) +(decl rv_slli (XReg Imm12) XReg) +(rule (rv_slli rs1 imm) + (alu_rr_imm12 (AluOPRRI.Slli) rs1 imm)) + +;; Helper for emitting the `srl` ("Shift Right Logical") instruction. +;; rd ← rs1 >> rs2 +(decl rv_srl (XReg XReg) XReg) +(rule (rv_srl rs1 rs2) + (alu_rrr (AluOPRRR.Srl) rs1 rs2)) + +;; Helper for emitting the `srli` ("Shift Right Logical Immediate") instruction. +;; rd ← rs1 >> uext(imm) +(decl rv_srli (XReg Imm12) XReg) +(rule (rv_srli rs1 imm) + (alu_rr_imm12 (AluOPRRI.Srli) rs1 imm)) + +;; Helper for emitting the `sra` ("Shift Right Arithmetic") instruction. +;; rd ← rs1 >> rs2 +(decl rv_sra (XReg XReg) XReg) +(rule (rv_sra rs1 rs2) + (alu_rrr (AluOPRRR.Sra) rs1 rs2)) + +;; Helper for emitting the `srai` ("Shift Right Arithmetic Immediate") instruction. +;; rd ← rs1 >> uext(imm) +(decl rv_srai (XReg Imm12) XReg) +(rule (rv_srai rs1 imm) + (alu_rr_imm12 (AluOPRRI.Srai) rs1 imm)) + +;; Helper for emitting the `or` instruction. +;; rd ← rs1 ∨ rs2 +(decl rv_or (XReg XReg) XReg) +(rule (rv_or rs1 rs2) + (alu_rrr (AluOPRRR.Or) rs1 rs2)) + +;; Helper for emitting the `ori` ("Or Immediate") instruction. +;; rd ← rs1 ∨ uext(imm) +(decl rv_ori (XReg Imm12) XReg) +(rule (rv_ori rs1 imm) + (alu_rr_imm12 (AluOPRRI.Ori) rs1 imm)) + +;; Helper for emitting the `xor` instruction. +;; rd ← rs1 ⊕ rs2 +(decl rv_xor (XReg XReg) XReg) +(rule (rv_xor rs1 rs2) + (alu_rrr (AluOPRRR.Xor) rs1 rs2)) + +;; Helper for emitting the `xori` ("Exlusive Or Immediate") instruction. +;; rd ← rs1 ⊕ uext(imm) +(decl rv_xori (XReg Imm12) XReg) +(rule (rv_xori rs1 imm) + (alu_rr_imm12 (AluOPRRI.Xori) rs1 imm)) + +;; Helper for emitting the `not` instruction. +;; This instruction is a mnemonic for `xori rd, rs1, -1`. +(decl rv_not (XReg) XReg) +(rule (rv_not rs1) + (rv_xori rs1 (imm12_const -1))) + +;; Helper for emitting the `and` instruction. +;; rd ← rs1 ∧ rs2 +(decl rv_and (XReg XReg) XReg) +(rule (rv_and rs1 rs2) + (alu_rrr (AluOPRRR.And) rs1 rs2)) + +;; Helper for emitting the `andi` ("And Immediate") instruction. +;; rd ← rs1 ∧ uext(imm) +(decl rv_andi (XReg Imm12) XReg) +(rule (rv_andi rs1 imm) + (alu_rr_imm12 (AluOPRRI.Andi) rs1 imm)) + +;; Helper for emitting the `sltu` ("Set Less Than Unsigned") instruction. +;; rd ← rs1 < rs2 +(decl rv_sltu (XReg XReg) XReg) +(rule (rv_sltu rs1 rs2) + (alu_rrr (AluOPRRR.SltU) rs1 rs2)) + +;; Helper for emitting the `snez` instruction. +;; This instruction is a mnemonic for `sltu rd, zero, rs`. +(decl rv_snez (XReg) XReg) +(rule (rv_snez rs1) + (rv_sltu (zero_reg) rs1)) + +;; Helper for emiting the `sltiu` ("Set Less Than Immediate Unsigned") instruction. +;; rd ← rs1 < imm +(decl rv_sltiu (XReg Imm12) XReg) +(rule (rv_sltiu rs1 imm) + (alu_rr_imm12 (AluOPRRI.SltiU) rs1 imm)) + +;; Helper for emitting the `seqz` instruction. +;; This instruction is a mnemonic for `sltiu rd, rs, 1`. +(decl rv_seqz (XReg) XReg) +(rule (rv_seqz rs1) + (rv_sltiu rs1 (imm12_const 1))) + + +;; RV64I Base Integer Instruction Set +;; Unlike RV32I instructions these are only present in the 64bit ISA + +;; Helper for emitting the `addw` ("Add Word") instruction. +;; rd ← sext32(rs1) + sext32(rs2) +(decl rv_addw (XReg XReg) XReg) +(rule (rv_addw rs1 rs2) + (alu_rrr (AluOPRRR.Addw) rs1 rs2)) + +;; Helper for emitting the `addiw` ("Add Word Immediate") instruction. +;; rd ← sext32(rs1) + imm +(decl rv_addiw (XReg Imm12) XReg) +(rule (rv_addiw rs1 imm) + (alu_rr_imm12 (AluOPRRI.Addiw) rs1 imm)) + +;; Helper for emitting the `sext.w` ("Sign Extend Word") instruction. +;; This instruction is a mnemonic for `addiw rd, rs, zero`. +(decl rv_sextw (XReg) XReg) +(rule (rv_sextw rs1) + (rv_addiw rs1 (imm12_const 0))) + +;; Helper for emitting the `subw` ("Subtract Word") instruction. +;; rd ← sext32(rs1) - sext32(rs2) +(decl rv_subw (XReg XReg) XReg) +(rule (rv_subw rs1 rs2) + (alu_rrr (AluOPRRR.Subw) rs1 rs2)) + +;; Helper for emitting the `sllw` ("Shift Left Logical Word") instruction. +;; rd ← sext32(uext32(rs1) << rs2) +(decl rv_sllw (XReg XReg) XReg) +(rule (rv_sllw rs1 rs2) + (alu_rrr (AluOPRRR.Sllw) rs1 rs2)) + +;; Helper for emitting the `slliw` ("Shift Left Logical Immediate Word") instruction. +;; rd ← sext32(uext32(rs1) << imm) +(decl rv_slliw (XReg Imm12) XReg) +(rule (rv_slliw rs1 imm) + (alu_rr_imm12 (AluOPRRI.Slliw) rs1 imm)) + +;; Helper for emitting the `srlw` ("Shift Right Logical Word") instruction. +;; rd ← sext32(uext32(rs1) >> rs2) +(decl rv_srlw (XReg XReg) XReg) +(rule (rv_srlw rs1 rs2) + (alu_rrr (AluOPRRR.Srlw) rs1 rs2)) + +;; Helper for emitting the `srliw` ("Shift Right Logical Immediate Word") instruction. +;; rd ← sext32(uext32(rs1) >> imm) +(decl rv_srliw (XReg Imm12) XReg) +(rule (rv_srliw rs1 imm) + (alu_rr_imm12 (AluOPRRI.SrliW) rs1 imm)) + +;; Helper for emitting the `sraw` ("Shift Right Arithmetic Word") instruction. +;; rd ← sext32(rs1 >> rs2) +(decl rv_sraw (XReg XReg) XReg) +(rule (rv_sraw rs1 rs2) + (alu_rrr (AluOPRRR.Sraw) rs1 rs2)) + +;; Helper for emitting the `sraiw` ("Shift Right Arithmetic Immediate Word") instruction. +;; rd ← sext32(rs1 >> imm) +(decl rv_sraiw (XReg Imm12) XReg) +(rule (rv_sraiw rs1 imm) + (alu_rr_imm12 (AluOPRRI.Sraiw) rs1 imm)) + + +;; RV32M Extension +;; TODO: Enable these instructions only when we have the M extension + +;; Helper for emitting the `mul` instruction. +;; rd ← rs1 × rs2 +(decl rv_mul (XReg XReg) XReg) +(rule (rv_mul rs1 rs2) + (alu_rrr (AluOPRRR.Mul) rs1 rs2)) + +;; Helper for emitting the `mulh` ("Multiply High Signed Signed") instruction. +;; rd ← (sext(rs1) × sext(rs2)) » xlen +(decl rv_mulh (XReg XReg) XReg) +(rule (rv_mulh rs1 rs2) + (alu_rrr (AluOPRRR.Mulh) rs1 rs2)) + +;; Helper for emitting the `mulhu` ("Multiply High Unsigned Unsigned") instruction. +;; rd ← (uext(rs1) × uext(rs2)) » xlen +(decl rv_mulhu (XReg XReg) XReg) +(rule (rv_mulhu rs1 rs2) + (alu_rrr (AluOPRRR.Mulhu) rs1 rs2)) + +;; Helper for emitting the `div` instruction. +;; rd ← rs1 ÷ rs2 +(decl rv_div (XReg XReg) XReg) +(rule (rv_div rs1 rs2) + (alu_rrr (AluOPRRR.Div) rs1 rs2)) + +;; Helper for emitting the `divu` ("Divide Unsigned") instruction. +;; rd ← rs1 ÷ rs2 +(decl rv_divu (XReg XReg) XReg) +(rule (rv_divu rs1 rs2) + (alu_rrr (AluOPRRR.DivU) rs1 rs2)) + +;; Helper for emitting the `rem` instruction. +;; rd ← rs1 mod rs2 +(decl rv_rem (XReg XReg) XReg) +(rule (rv_rem rs1 rs2) + (alu_rrr (AluOPRRR.Rem) rs1 rs2)) + +;; Helper for emitting the `remu` ("Remainder Unsigned") instruction. +;; rd ← rs1 mod rs2 +(decl rv_remu (XReg XReg) XReg) +(rule (rv_remu rs1 rs2) + (alu_rrr (AluOPRRR.RemU) rs1 rs2)) + + + +;; RV64M Extension +;; TODO: Enable these instructions only when we have the M extension + +;; Helper for emitting the `mulw` ("Multiply Word") instruction. +;; rd ← uext32(rs1) × uext32(rs2) +(decl rv_mulw (XReg XReg) XReg) +(rule (rv_mulw rs1 rs2) + (alu_rrr (AluOPRRR.Mulw) rs1 rs2)) + +;; Helper for emitting the `divw` ("Divide Word") instruction. +;; rd ← sext32(rs1) ÷ sext32(rs2) +(decl rv_divw (XReg XReg) XReg) +(rule (rv_divw rs1 rs2) + (alu_rrr (AluOPRRR.Divw) rs1 rs2)) + +;; Helper for emitting the `divuw` ("Divide Unsigned Word") instruction. +;; rd ← uext32(rs1) ÷ uext32(rs2) +(decl rv_divuw (XReg XReg) XReg) +(rule (rv_divuw rs1 rs2) + (alu_rrr (AluOPRRR.Divuw) rs1 rs2)) + +;; Helper for emitting the `remw` ("Remainder Word") instruction. +;; rd ← sext32(rs1) mod sext32(rs2) +(decl rv_remw (XReg XReg) XReg) +(rule (rv_remw rs1 rs2) + (alu_rrr (AluOPRRR.Remw) rs1 rs2)) + +;; Helper for emitting the `remuw` ("Remainder Unsigned Word") instruction. +;; rd ← uext32(rs1) mod uext32(rs2) +(decl rv_remuw (XReg XReg) XReg) +(rule (rv_remuw rs1 rs2) + (alu_rrr (AluOPRRR.Remuw) rs1 rs2)) + + +;; F and D Extensions +;; TODO: Enable these instructions only when we have the F or D extensions + +;; Helper for emitting the `fadd` instruction. +(decl rv_fadd (Type FReg FReg) FReg) +(rule (rv_fadd $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FaddS) $F32 rs1 rs2)) +(rule (rv_fadd $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FaddD) $F64 rs1 rs2)) + +;; Helper for emitting the `fsub` instruction. +(decl rv_fsub (Type FReg FReg) FReg) +(rule (rv_fsub $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsubS) $F32 rs1 rs2)) +(rule (rv_fsub $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsubD) $F64 rs1 rs2)) + +;; Helper for emitting the `fmul` instruction. +(decl rv_fmul (Type FReg FReg) FReg) +(rule (rv_fmul $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FmulS) $F32 rs1 rs2)) +(rule (rv_fmul $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FmulD) $F64 rs1 rs2)) + +;; Helper for emitting the `fdiv` instruction. +(decl rv_fdiv (Type FReg FReg) FReg) +(rule (rv_fdiv $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FdivS) $F32 rs1 rs2)) +(rule (rv_fdiv $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FdivD) $F64 rs1 rs2)) + +;; Helper for emitting the `fsqrt` instruction. +(decl rv_fsqrt (Type FReg) FReg) +(rule (rv_fsqrt $F32 rs1) (fpu_rr (FpuOPRR.FsqrtS) $F32 rs1)) +(rule (rv_fsqrt $F64 rs1) (fpu_rr (FpuOPRR.FsqrtD) $F64 rs1)) + +;; Helper for emitting the `fmadd` instruction. +(decl rv_fmadd (Type FReg FReg FReg) FReg) +(rule (rv_fmadd $F32 rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddS) $F32 rs1 rs2 rs3)) +(rule (rv_fmadd $F64 rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddD) $F64 rs1 rs2 rs3)) + +;; Helper for emitting the `fmv.x.w` instruction. +(decl rv_fmvxw (FReg) XReg) +(rule (rv_fmvxw r) (fpu_rr (FpuOPRR.FmvXW) $I32 r)) + +;; Helper for emitting the `fmv.x.d` instruction. +(decl rv_fmvxd (FReg) XReg) +(rule (rv_fmvxd r) (fpu_rr (FpuOPRR.FmvXD) $I64 r)) + +;; Helper for emitting the `fmv.w.x` instruction. +(decl rv_fmvwx (XReg) FReg) +(rule (rv_fmvwx r) (fpu_rr (FpuOPRR.FmvWX) $F32 r)) + +;; Helper for emitting the `fmv.d.x` instruction. +(decl rv_fmvdx (XReg) FReg) +(rule (rv_fmvdx r) (fpu_rr (FpuOPRR.FmvDX) $F64 r)) + +;; Helper for emitting the `fcvt.d.s` ("Float Convert Double to Single") instruction. +(decl rv_fcvtds (FReg) FReg) +(rule (rv_fcvtds rs1) (fpu_rr (FpuOPRR.FcvtDS) $F32 rs1)) + +;; Helper for emitting the `fcvt.s.d` ("Float Convert Single to Double") instruction. +(decl rv_fcvtsd (FReg) FReg) +(rule (rv_fcvtsd rs1) (fpu_rr (FpuOPRR.FcvtSD) $F64 rs1)) + +;; Helper for emitting the `fsgnj` ("Floating Point Sign Injection") instruction. +;; The output of this instruction is `rs1` with the sign bit from `rs2` +;; This implements the `copysign` operation +(decl rv_fsgnj (Type FReg FReg) FReg) +(rule (rv_fsgnj $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjS) $F32 rs1 rs2)) +(rule (rv_fsgnj $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjD) $F64 rs1 rs2)) + +;; Helper for emitting the `fsgnjn` ("Floating Point Sign Injection Negated") instruction. +;; The output of this instruction is `rs1` with the negated sign bit from `rs2` +;; When `rs1 == rs2` this implements the `neg` operation +(decl rv_fsgnjn (Type FReg FReg) FReg) +(rule (rv_fsgnjn $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjnS) $F32 rs1 rs2)) +(rule (rv_fsgnjn $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjnD) $F64 rs1 rs2)) + +;; Helper for emitting the `fneg` ("Floating Point Negate") instruction. +;; This instruction is a mnemonic for `fsgnjn rd, rs1, rs1` +(decl rv_fneg (Type FReg) FReg) +(rule (rv_fneg ty rs1) (rv_fsgnjn ty rs1 rs1)) + +;; Helper for emitting the `fsgnjx` ("Floating Point Sign Injection Exclusive") instruction. +;; The output of this instruction is `rs1` with the XOR of the sign bits from `rs1` and `rs2`. +;; When `rs1 == rs2` this implements `fabs` +(decl rv_fsgnjx (Type FReg FReg) FReg) +(rule (rv_fsgnjx $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjxS) $F32 rs1 rs2)) +(rule (rv_fsgnjx $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjxD) $F64 rs1 rs2)) + +;; Helper for emitting the `fabs` ("Floating Point Absolute") instruction. +;; This instruction is a mnemonic for `fsgnjx rd, rs1, rs1` +(decl rv_fabs (Type FReg) FReg) +(rule (rv_fabs ty rs1) (rv_fsgnjx ty rs1 rs1)) + +;; Helper for emitting the `feq` ("Float Equal") instruction. +(decl rv_feq (Type FReg FReg) XReg) +(rule (rv_feq $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FeqS) $I64 rs1 rs2)) +(rule (rv_feq $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FeqD) $I64 rs1 rs2)) + +;; Helper for emitting the `flt` ("Float Less Than") instruction. +(decl rv_flt (Type FReg FReg) XReg) +(rule (rv_flt $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FltS) $I64 rs1 rs2)) +(rule (rv_flt $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FltD) $I64 rs1 rs2)) + +;; Helper for emitting the `fle` ("Float Less Than or Equal") instruction. +(decl rv_fle (Type FReg FReg) XReg) +(rule (rv_fle $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FleS) $I64 rs1 rs2)) +(rule (rv_fle $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FleD) $I64 rs1 rs2)) + +;; Helper for emitting the `fgt` ("Float Greater Than") instruction. +;; Note: The arguments are reversed +(decl rv_fgt (Type FReg FReg) XReg) +(rule (rv_fgt ty rs1 rs2) (rv_flt ty rs2 rs1)) + +;; Helper for emitting the `fge` ("Float Greater Than or Equal") instruction. +;; Note: The arguments are reversed +(decl rv_fge (Type FReg FReg) XReg) +(rule (rv_fge ty rs1 rs2) (rv_fle ty rs2 rs1)) + + +;; `Zba` Extension Instructions + +;; Helper for emitting the `adduw` ("Add Unsigned Word") instruction. +;; rd ← uext32(rs1) + uext32(rs2) +(decl rv_adduw (XReg XReg) XReg) +(rule (rv_adduw rs1 rs2) + (alu_rrr (AluOPRRR.Adduw) rs1 rs2)) + +;; Helper for emitting the `zext.w` ("Zero Extend Word") instruction. +;; This instruction is a mnemonic for `adduw rd, rs1, zero`. +;; rd ← uext32(rs1) +(decl rv_zextw (XReg) XReg) +(rule (rv_zextw rs1) + (rv_adduw rs1 (zero_reg))) + +;; Helper for emitting the `slli.uw` ("Shift Left Logical Immediate Unsigned Word") instruction. +;; rd ← uext32(rs1) << imm +(decl rv_slliuw (XReg Imm12) XReg) +(rule (rv_slliuw rs1 imm) + (alu_rr_imm12 (AluOPRRI.SlliUw) rs1 imm)) + + +;; `Zbb` Extension Instructions + +;; Helper for emitting the `andn` ("And Negated") instruction. +;; rd ← rs1 ∧ ~(rs2) +(decl rv_andn (XReg XReg) XReg) +(rule (rv_andn rs1 rs2) + (alu_rrr (AluOPRRR.Andn) rs1 rs2)) + +;; Helper for emitting the `orn` ("Or Negated") instruction. +;; rd ← rs1 ∨ ~(rs2) +(decl rv_orn (XReg XReg) XReg) +(rule (rv_orn rs1 rs2) + (alu_rrr (AluOPRRR.Orn) rs1 rs2)) + +;; Helper for emitting the `clz` ("Count Leading Zero Bits") instruction. +(decl rv_clz (XReg) XReg) +(rule (rv_clz rs1) + (alu_rr_funct12 (AluOPRRI.Clz) rs1)) + +;; Helper for emitting the `clzw` ("Count Leading Zero Bits in Word") instruction. +(decl rv_clzw (XReg) XReg) +(rule (rv_clzw rs1) + (alu_rr_funct12 (AluOPRRI.Clzw) rs1)) + +;; Helper for emitting the `ctz` ("Count Trailing Zero Bits") instruction. +(decl rv_ctz (XReg) XReg) +(rule (rv_ctz rs1) + (alu_rr_funct12 (AluOPRRI.Ctz) rs1)) + +;; Helper for emitting the `ctzw` ("Count Trailing Zero Bits in Word") instruction. +(decl rv_ctzw (XReg) XReg) +(rule (rv_ctzw rs1) + (alu_rr_funct12 (AluOPRRI.Ctzw) rs1)) + +;; Helper for emitting the `cpop` ("Count Population") instruction. +(decl rv_cpop (XReg) XReg) +(rule (rv_cpop rs1) + (alu_rr_funct12 (AluOPRRI.Cpop) rs1)) + +;; Helper for emitting the `max` instruction. +(decl rv_max (XReg XReg) XReg) +(rule (rv_max rs1 rs2) + (alu_rrr (AluOPRRR.Max) rs1 rs2)) + +;; Helper for emitting the `sext.b` instruction. +(decl rv_sextb (XReg) XReg) +(rule (rv_sextb rs1) + (alu_rr_imm12 (AluOPRRI.Sextb) rs1 (imm12_const 0))) + +;; Helper for emitting the `sext.h` instruction. +(decl rv_sexth (XReg) XReg) +(rule (rv_sexth rs1) + (alu_rr_imm12 (AluOPRRI.Sexth) rs1 (imm12_const 0))) + +;; Helper for emitting the `zext.h` instruction. +(decl rv_zexth (XReg) XReg) +(rule (rv_zexth rs1) + (alu_rr_imm12 (AluOPRRI.Zexth) rs1 (imm12_const 0))) + +;; Helper for emitting the `rol` ("Rotate Left") instruction. +(decl rv_rol (XReg XReg) XReg) +(rule (rv_rol rs1 rs2) + (alu_rrr (AluOPRRR.Rol) rs1 rs2)) + +;; Helper for emitting the `rolw` ("Rotate Left Word") instruction. +(decl rv_rolw (XReg XReg) XReg) +(rule (rv_rolw rs1 rs2) + (alu_rrr (AluOPRRR.Rolw) rs1 rs2)) + +;; Helper for emitting the `ror` ("Rotate Right") instruction. +(decl rv_ror (XReg XReg) XReg) +(rule (rv_ror rs1 rs2) + (alu_rrr (AluOPRRR.Ror) rs1 rs2)) + +;; Helper for emitting the `rorw` ("Rotate Right Word") instruction. +(decl rv_rorw (XReg XReg) XReg) +(rule (rv_rorw rs1 rs2) + (alu_rrr (AluOPRRR.Rorw) rs1 rs2)) + +;; Helper for emitting the `rev8` ("Byte Reverse") instruction. +(decl rv_rev8 (XReg) XReg) +(rule (rv_rev8 rs1) + (alu_rr_funct12 (AluOPRRI.Rev8) rs1)) + +;; Helper for emitting the `brev8` ("Bit Reverse Inside Bytes") instruction. +;; TODO: This instruction is mentioned in some older versions of the +;; spec, but has since disappeared, we should follow up on this. +;; It probably was renamed to `rev.b` which seems to be the closest match. +(decl rv_brev8 (XReg) XReg) +(rule (rv_brev8 rs1) + (alu_rr_funct12 (AluOPRRI.Brev8) rs1)) + +;; Helper for emitting the `bseti` ("Single-Bit Set Immediate") instruction. +(decl rv_bseti (XReg Imm12) XReg) +(rule (rv_bseti rs1 imm) + (alu_rr_imm12 (AluOPRRI.Bseti) rs1 imm)) + + +;; `Zbkb` Extension Instructions + +;; Helper for emitting the `pack` ("Pack low halves of registers") instruction. +(decl rv_pack (XReg XReg) XReg) +(rule (rv_pack rs1 rs2) + (alu_rrr (AluOPRRR.Pack) rs1 rs2)) + +;; Helper for emitting the `packw` ("Pack low 16-bits of registers") instruction. +(decl rv_packw (XReg XReg) XReg) +(rule (rv_packw rs1 rs2) + (alu_rrr (AluOPRRR.Packw) rs1 rs2)) + + + + +;; Generate a mask for the bit-width of the given type +(decl pure shift_mask (Type) u64) +(rule (shift_mask ty) (u64_sub (ty_bits (lane_type ty)) 1)) + +;; for load immediate +(decl imm (Type u64) Reg) +(extern constructor imm imm) + +;; Imm12 Rules + +(decl pure imm12_zero () Imm12) +(rule + (imm12_zero) + (imm12_const 0)) + +(decl pure imm12_const (i32) Imm12) +(extern constructor imm12_const imm12_const) + +(decl load_imm12 (i32) Reg) +(rule + (load_imm12 x) + (rv_addi (zero_reg) (imm12_const x))) + +;; for load immediate +(decl imm_from_bits (u64) Imm12) +(extern constructor imm_from_bits imm_from_bits) + +(decl imm_from_neg_bits (i64) Imm12) +(extern constructor imm_from_neg_bits imm_from_neg_bits) + +(decl imm12_const_add (i32 i32) Imm12) +(extern constructor imm12_const_add imm12_const_add) + +(decl imm12_and (Imm12 u64) Imm12) +(extern constructor imm12_and imm12_and) + +;; Helper for get negative of Imm12 +(decl neg_imm12 (Imm12) Imm12) +(extern constructor neg_imm12 neg_imm12) + +;; Imm12 Extractors + +;; Helper to go directly from a `Value`, when it's an `iconst`, to an `Imm12`. +(decl imm12_from_value (Imm12) Value) +(extractor + (imm12_from_value n) + (def_inst (iconst (u64_from_imm64 (imm12_from_u64 n))))) + +(decl imm12_from_u64 (Imm12) u64) +(extern extractor imm12_from_u64 imm12_from_u64) + +(decl pure partial u64_to_imm12 (u64) Imm12) +(rule (u64_to_imm12 (imm12_from_u64 n)) n) + + +;; Imm5 Extractors + +(decl imm5_from_u64 (Imm5) u64) +(extern extractor imm5_from_u64 imm5_from_u64) + +;; Construct a Imm5 from an i8 +(decl pure partial imm5_from_i8 (i8) Imm5) +(extern constructor imm5_from_i8 imm5_from_i8) + +;; Extractor that matches a `Value` equivalent to a replicated Imm5 on all lanes. +;; TODO(#6527): Try matching vconst here as well +(decl replicated_imm5 (Imm5) Value) +(extractor (replicated_imm5 n) + (def_inst (splat (iconst (u64_from_imm64 (imm5_from_u64 n)))))) + +;; UImm5 Helpers + +;; Extractor that matches a `Value` equivalent to a replicated UImm5 on all lanes. +;; TODO(#6527): Try matching vconst here as well +(decl replicated_uimm5 (UImm5) Value) +(extractor (replicated_uimm5 n) + (def_inst (splat (uimm5_from_value n)))) + +;; Helper to go directly from a `Value`, when it's an `iconst`, to an `UImm5`. +(decl uimm5_from_value (UImm5) Value) +(extractor (uimm5_from_value n) + (iconst (u64_from_imm64 (uimm5_from_u64 n)))) + +;; Extract a `UImm5` from an `u8`. +(decl pure partial uimm5_from_u8 (UImm5) u8) +(extern extractor uimm5_from_u8 uimm5_from_u8) + +;; Extract a `UImm5` from an `u64`. +(decl pure partial uimm5_from_u64 (UImm5) u64) +(extern extractor uimm5_from_u64 uimm5_from_u64) + +;; Convert a `u64` into an `UImm5` +(decl pure partial u64_to_uimm5 (u64) UImm5) +(rule (u64_to_uimm5 (uimm5_from_u64 n)) n) + +(decl uimm5_bitcast_to_imm5 (UImm5) Imm5) +(extern constructor uimm5_bitcast_to_imm5 uimm5_bitcast_to_imm5) + +;; Float Helpers + +;; Returns the bitpattern of the Canonical NaN for the given type. +(decl pure canonical_nan_u64 (Type) u64) +(rule (canonical_nan_u64 $F32) 0x7fc00000) +(rule (canonical_nan_u64 $F64) 0x7ff8000000000000) + +(decl gen_default_frm () OptionFloatRoundingMode) +(extern constructor gen_default_frm gen_default_frm) + +;; Helper for emitting `MInst.FpuRR` instructions. +(decl fpu_rr (FpuOPRR Type Reg) Reg) +(rule (fpu_rr op ty src) + (let ((dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.FpuRR op (gen_default_frm) dst src)))) + dst)) + +;; Helper for emitting `MInst.AluRRR` instructions. +(decl alu_rrr (AluOPRRR Reg Reg) Reg) +(rule (alu_rrr op src1 src2) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.AluRRR op dst src1 src2)))) + dst)) + + +(decl pack_float_rounding_mode (FRM) OptionFloatRoundingMode) +(extern constructor pack_float_rounding_mode pack_float_rounding_mode) + +;; Helper for emitting `MInst.AluRRR` instructions. +(decl fpu_rrr (FpuOPRRR Type Reg Reg) Reg) +(rule (fpu_rrr op ty src1 src2) + (let ((dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.FpuRRR op (gen_default_frm) dst src1 src2)))) + dst)) + + +;; Helper for emitting `MInst.FpuRRRR` instructions. +(decl fpu_rrrr (FpuOPRRRR Type Reg Reg Reg) Reg) +(rule (fpu_rrrr op ty src1 src2 src3) + (let ((dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.FpuRRRR op (gen_default_frm) dst src1 src2 src3)))) + dst)) + + +;; Helper for emitting `MInst.AluRRImm12` instructions. +(decl alu_rr_imm12 (AluOPRRI Reg Imm12) Reg) +(rule (alu_rr_imm12 op src imm) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.AluRRImm12 op dst src imm)))) + dst)) + +;; some instruction use imm12 as funct12. +;; so we don't need the imm12 paramter. +(decl alu_rr_funct12 (AluOPRRI Reg) Reg) +(rule (alu_rr_funct12 op src) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.AluRRImm12 op dst src (imm12_zero))))) + dst)) + +(decl select_addi (Type) AluOPRRI) +(rule 1 (select_addi (fits_in_32 ty)) (AluOPRRI.Addiw)) +(rule (select_addi (fits_in_64 ty)) (AluOPRRI.Addi)) + + +(decl gen_bnot (Type ValueRegs) ValueRegs) +(rule 2 (gen_bnot (ty_scalar_float ty) x) + (let ((val FReg (value_regs_get x 0)) + (x_val XReg (move_f_to_x val ty)) + (inverted XReg (rv_not x_val)) + (res FReg (move_x_to_f inverted (float_int_of_same_size ty)))) + (value_reg res))) + +(rule 1 (gen_bnot $I128 x) + (let ((lo XReg (rv_not (value_regs_get x 0))) + (hi XReg (rv_not (value_regs_get x 1)))) + (value_regs lo hi))) + +(rule 0 (gen_bnot (ty_int_ref_scalar_64 _) x) + (rv_not (value_regs_get x 0))) + + +(decl gen_and (Type ValueRegs ValueRegs) ValueRegs) +(rule 1 (gen_and $I128 x y) + (value_regs + (rv_and (value_regs_get x 0) (value_regs_get y 0)) + (rv_and (value_regs_get x 1) (value_regs_get y 1)))) + +(rule 0 (gen_and (fits_in_64 _) x y) + (rv_and (value_regs_get x 0) (value_regs_get y 0))) + + +(decl gen_andi (XReg u64) XReg) +(rule 1 (gen_andi x (imm12_from_u64 y)) + (rv_andi x y)) + +(rule 0 (gen_andi x y) + (rv_and x (imm $I64 y))) + + +(decl gen_or (Type ValueRegs ValueRegs) ValueRegs) +(rule 1 (gen_or $I128 x y) + (value_regs + (rv_or (value_regs_get x 0) (value_regs_get y 0)) + (rv_or (value_regs_get x 1) (value_regs_get y 1)))) + +(rule 0 (gen_or (fits_in_64 _) x y) + (rv_or (value_regs_get x 0) (value_regs_get y 0))) + + + +(decl gen_bswap (Type XReg) XReg) + +;; This is only here to make the rule below work. bswap.i8 isn't valid +(rule 0 (gen_bswap $I8 x) x) + +(rule 1 (gen_bswap (ty_int_ref_16_to_64 ty) x) + (if-let half_ty (ty_half_width ty)) + (if-let half_size (u64_to_imm12 (ty_bits half_ty))) + (let (;; This swaps the top bytes and zeroes the bottom bytes, so that + ;; we can or it with the bottom bytes later. + (swap_top XReg (gen_bswap half_ty x)) + (top XReg (rv_slli swap_top half_size)) + + ;; Get the top half, swap it, and zero extend it so we can `or` it + ;; with the bottom half. + (shifted XReg (rv_srli x half_size)) + (swap_bot XReg (gen_bswap half_ty shifted)) + (bot XReg (zext swap_bot half_ty $I64))) + (rv_or top bot))) + +;; With `zbb` we can use `rev8` and shift the result +(rule 2 (gen_bswap (int_fits_in_32 ty) x) + (if-let $true (has_zbb)) + (if-let shift_amt (u64_to_imm12 (u64_sub 64 (ty_bits ty)))) + (rv_srli (rv_rev8 x) shift_amt)) + +;; With `zbb` we can use `rev8` that does this +(rule 3 (gen_bswap $I64 x) + (if-let $true (has_zbb)) + (rv_rev8 x)) + + + +(decl lower_bit_reverse (Reg Type) Reg) + +(rule + (lower_bit_reverse r $I8) + (gen_brev8 r $I8)) + +(rule + (lower_bit_reverse r $I16) + (let + ((tmp XReg (gen_brev8 r $I16)) + (tmp2 XReg (gen_rev8 tmp)) + (result XReg (rv_srli tmp2 (imm12_const 48)))) + result)) + +(rule + (lower_bit_reverse r $I32) + (let + ((tmp XReg (gen_brev8 r $I32)) + (tmp2 XReg (gen_rev8 tmp)) + (result XReg (rv_srli tmp2 (imm12_const 32)))) + result)) + +(rule + (lower_bit_reverse r $I64) + (let + ((tmp XReg (gen_rev8 r))) + (gen_brev8 tmp $I64))) + + +(decl lower_ctz (Type Reg) Reg) +(rule (lower_ctz ty x) + (gen_cltz $false x ty)) + +(rule 1 (lower_ctz (fits_in_16 ty) x) + (if-let $true (has_zbb)) + (let ((tmp Reg (gen_bseti x (ty_bits ty)))) + (rv_ctzw tmp))) + +(rule 2 (lower_ctz $I32 x) + (if-let $true (has_zbb)) + (rv_ctzw x)) + +(rule 2 (lower_ctz $I64 x) + (if-let $true (has_zbb)) + (rv_ctz x)) + +;; Count trailing zeros from a i128 bit value. +;; We count both halves separately and conditionally add them if it makes sense. +(decl lower_ctz_128 (ValueRegs) ValueRegs) +(rule (lower_ctz_128 x) + (let ((x_lo XReg (value_regs_get x 0)) + (x_hi XReg (value_regs_get x 1)) + ;; Count both halves + (high XReg (lower_ctz $I64 x_hi)) + (low XReg (lower_ctz $I64 x_lo)) + ;; Only add the top half if the bottom is zero + (high XReg (gen_select_reg (IntCC.Equal) x_lo (zero_reg) high (zero_reg))) + (result XReg (rv_add low high))) + (extend result (ExtendOp.Zero) $I64 $I128))) + +(decl lower_clz (Type XReg) XReg) +(rule (lower_clz ty rs) + (gen_cltz $true rs ty)) + +(rule 1 (lower_clz (fits_in_16 ty) r) + (if-let $true (has_zbb)) + (let ((tmp XReg (zext r ty $I64)) + (count XReg (rv_clz tmp)) + ;; We always do the operation on the full 64-bit register, so subtract 64 from the result. + (result XReg (rv_addi count (imm12_const_add (ty_bits ty) -64)))) + result)) + +(rule 2 (lower_clz $I32 r) + (if-let $true (has_zbb)) + (rv_clzw r)) + +(rule 2 (lower_clz $I64 r) + (if-let $true (has_zbb)) + (rv_clz r)) + + +;; Count leading zeros from a i128 bit value. +;; We count both halves separately and conditionally add them if it makes sense. +(decl lower_clz_i128 (ValueRegs) ValueRegs) +(rule (lower_clz_i128 x) + (let ((x_lo XReg (value_regs_get x 0)) + (x_hi XReg (value_regs_get x 1)) + ;; Count both halves + (high XReg (lower_clz $I64 x_hi)) + (low XReg (lower_clz $I64 x_lo)) + ;; Only add the bottom zeros if the top half is zero + (low XReg (gen_select_reg (IntCC.Equal) x_hi (zero_reg) low (zero_reg))) + (result XReg (rv_add high low))) + (extend result (ExtendOp.Zero) $I64 $I128))) + + +(decl lower_cls (Type XReg) XReg) +(rule (lower_cls ty r) + (let ((tmp XReg (sext r ty $I64)) + (tmp2 XReg (gen_select_reg (IntCC.SignedLessThan) tmp (zero_reg) (rv_not tmp) tmp)) + (tmp3 XReg (lower_clz ty tmp2))) + (rv_addi tmp3 (imm12_const -1)))) + +;; If the sign bit is set, we count the leading zeros of the inverted value. +;; Otherwise we can just count the leading zeros of the original value. +;; Subtract 1 since the sign bit does not count. +(decl lower_cls_i128 (ValueRegs) ValueRegs) +(rule (lower_cls_i128 x) + (let ((low XReg (value_regs_get x 0)) + (high XReg (value_regs_get x 1)) + (low XReg (gen_select_reg (IntCC.SignedLessThan) high (zero_reg) (rv_not low) low)) + (high XReg (gen_select_reg (IntCC.SignedLessThan) high (zero_reg) (rv_not high) high)) + (tmp ValueRegs (lower_clz_i128 (value_regs low high))) + (count XReg (value_regs_get tmp 0)) + (result XReg (rv_addi count (imm12_const -1)))) + (extend result (ExtendOp.Zero) $I64 $I128))) + + +(decl gen_cltz (bool XReg Type) XReg) +(rule (gen_cltz leading rs ty) + (let ((tmp WritableXReg (temp_writable_xreg)) + (step WritableXReg (temp_writable_xreg)) + (sum WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Cltz leading sum step tmp rs ty)))) + sum)) + + +;; Extends an integer if it is smaller than 64 bits. +(decl ext_int_if_need (bool ValueRegs Type) ValueRegs) +;;; For values smaller than 64 bits, we need to extend them to 64 bits +(rule 0 (ext_int_if_need $true val (fits_in_32 (ty_int ty))) + (extend val (ExtendOp.Signed) ty $I64)) +(rule 0 (ext_int_if_need $false val (fits_in_32 (ty_int ty))) + (extend val (ExtendOp.Zero) ty $I64)) +;; If the value is larger than one machine register, we don't need to do anything +(rule 1 (ext_int_if_need _ r $I64) r) +(rule 2 (ext_int_if_need _ r $I128) r) + + +;; Performs a zero extension of the given value +(decl zext (XReg Type Type) XReg) +(rule (zext val from_ty (fits_in_64 to_ty)) (value_regs_get (extend val (ExtendOp.Zero) from_ty to_ty) 0)) + +;; Performs a signed extension of the given value +(decl sext (XReg Type Type) XReg) +(rule (sext val from_ty (fits_in_64 to_ty)) (value_regs_get (extend val (ExtendOp.Signed) from_ty to_ty) 0)) + +(type ExtendOp + (enum + (Zero) + (Signed))) + +;; Performs either a sign or zero extension of the given value +(decl extend (ValueRegs ExtendOp Type Type) ValueRegs) + +;;; Generic Rules Extending to I64 +(decl pure extend_shift_op (ExtendOp) AluOPRRI) +(rule (extend_shift_op (ExtendOp.Zero)) (AluOPRRI.Srli)) +(rule (extend_shift_op (ExtendOp.Signed)) (AluOPRRI.Srai)) + +;; In the most generic case, we shift left and then shift right. +;; The type of right shift is determined by the extend op. +(rule 0 (extend val extend_op (fits_in_32 from_ty) (fits_in_64 to_ty)) + (let ((val XReg (value_regs_get val 0)) + (shift Imm12 (imm_from_bits (u64_sub 64 (ty_bits from_ty)))) + (left XReg (rv_slli val shift)) + (shift_op AluOPRRI (extend_shift_op extend_op)) + (right XReg (alu_rr_imm12 shift_op left shift))) + right)) + +;; If we are zero extending a U8 we can use a `andi` instruction. +(rule 1 (extend val (ExtendOp.Zero) $I8 (fits_in_64 to_ty)) + (let ((val XReg (value_regs_get val 0))) + (rv_andi val (imm12_const 255)))) + +;; When signed extending from 32 to 64 bits we can use a +;; `addiw val 0`. Also known as a `sext.w` +(rule 1 (extend val (ExtendOp.Signed) $I32 $I64) + (let ((val XReg (value_regs_get val 0))) + (rv_sextw val))) + + +;; No point in trying to use `packh` here to zero extend 8 bit values +;; since we can just use `andi` instead which is part of the base ISA. + +;; If we have the `zbkb` extension `packw` can be used to zero extend 16 bit values +(rule 1 (extend val (ExtendOp.Zero) $I16 (fits_in_64 _)) + (if-let $true (has_zbkb)) + (let ((val XReg (value_regs_get val 0))) + (rv_packw val (zero_reg)))) + +;; If we have the `zbkb` extension `pack` can be used to zero extend 32 bit registers +(rule 1 (extend val (ExtendOp.Zero) $I32 $I64) + (if-let $true (has_zbkb)) + (let ((val XReg (value_regs_get val 0))) + (rv_pack val (zero_reg)))) + + +;; If we have the `zbb` extension we can use the dedicated `sext.b` instruction. +(rule 1 (extend val (ExtendOp.Signed) $I8 (fits_in_64 _)) + (if-let $true (has_zbb)) + (let ((val XReg (value_regs_get val 0))) + (rv_sextb val))) + +;; If we have the `zbb` extension we can use the dedicated `sext.h` instruction. +(rule 1 (extend val (ExtendOp.Signed) $I16 (fits_in_64 _)) + (if-let $true (has_zbb)) + (let ((val XReg (value_regs_get val 0))) + (rv_sexth val))) + +;; If we have the `zbb` extension we can use the dedicated `zext.h` instruction. +(rule 2 (extend val (ExtendOp.Zero) $I16 (fits_in_64 _)) + (if-let $true (has_zbb)) + (let ((val XReg (value_regs_get val 0))) + (rv_zexth val))) + +;; With `zba` we have a `zext.w` instruction +(rule 2 (extend val (ExtendOp.Zero) $I32 $I64) + (if-let $true (has_zba)) + (let ((val XReg (value_regs_get val 0))) + (rv_zextw val))) + +;;; Signed rules extending to I128 +;; Extend the bottom part, and extract the sign bit from the bottom as the top +(rule 3 (extend val (ExtendOp.Signed) (fits_in_64 from_ty) $I128) + (let ((val XReg (value_regs_get val 0)) + (low XReg (sext val from_ty $I64)) + (high XReg (rv_srai low (imm12_const 63)))) + (value_regs low high))) + +;;; Unsigned rules extending to I128 +;; Extend the bottom register to I64 and then just zero out the top half. +(rule 3 (extend val (ExtendOp.Zero) (fits_in_64 from_ty) $I128) + (let ((val XReg (value_regs_get val 0)) + (low XReg (zext val from_ty $I64)) + (high XReg (load_u64_constant 0))) + (value_regs low high))) + +;; Catch all rule for ignoring extensions of the same type. +(rule 4 (extend val _ ty ty) val) + + + +(decl lower_b128_binary (AluOPRRR ValueRegs ValueRegs) ValueRegs) +(rule + (lower_b128_binary op a b) + (let + ( ;; low part. + (low XReg (alu_rrr op (value_regs_get a 0) (value_regs_get b 0))) + ;; high part. + (high XReg (alu_rrr op (value_regs_get a 1) (value_regs_get b 1)))) + (value_regs low high))) + +(decl lower_umlhi (Type XReg XReg) XReg) +(rule 1 (lower_umlhi $I64 rs1 rs2) + (rv_mulhu rs1 rs2)) + +(rule (lower_umlhi ty rs1 rs2) + (let + ((tmp XReg (rv_mul (zext rs1 ty $I64) (zext rs2 ty $I64)))) + (rv_srli tmp (imm12_const (ty_bits ty))))) + +(decl lower_smlhi (Type XReg XReg) XReg) +(rule 1 + (lower_smlhi $I64 rs1 rs2) + (rv_mulh rs1 rs2)) + +(rule + (lower_smlhi ty rs1 rs2) + (let + ((tmp XReg (rv_mul rs1 rs2))) + (rv_srli tmp (imm12_const (ty_bits ty))))) + + +(decl lower_rotl (Type XReg XReg) XReg) + +(rule 1 + (lower_rotl $I64 rs amount) + (if-let $true (has_zbb)) + (rv_rol rs amount)) + +(rule + (lower_rotl $I64 rs amount) + (if-let $false (has_zbb)) + (lower_rotl_shift $I64 rs amount)) + +(rule 1 + (lower_rotl $I32 rs amount) + (if-let $true (has_zbb)) + (rv_rolw rs amount)) + +(rule + (lower_rotl $I32 rs amount) + (if-let $false (has_zbb)) + (lower_rotl_shift $I32 rs amount)) + +(rule -1 + (lower_rotl ty rs amount) + (lower_rotl_shift ty rs amount)) + +;;; using shift to implement rotl. +(decl lower_rotl_shift (Type XReg XReg) XReg) + +;;; for I8 and I16 ... +(rule + (lower_rotl_shift ty rs amount) + (let + ((x ValueRegs (gen_shamt ty amount)) + (shamt Reg (value_regs_get x 0)) + (len_sub_shamt Reg (value_regs_get x 1)) + ;; + (part1 Reg (rv_sll rs shamt)) + ;; + (part2 Reg (rv_srl rs len_sub_shamt)) + (part3 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) part2))) + (rv_or part1 part3))) + + +;;;; construct shift amount.rotl on i128 will use shift to implement. So can call this function. +;;;; this will return shift amount and (ty_bits - "shift amount") +;;;; if ty_bits is greater than 64 like i128, then shmat will fallback to 64.because We are 64 bit platform. +(decl gen_shamt (Type XReg) ValueRegs) +(extern constructor gen_shamt gen_shamt) + +(decl lower_rotr (Type XReg XReg) XReg) + +(rule 1 + (lower_rotr $I64 rs amount) + (if-let $true (has_zbb)) + (rv_ror rs amount)) +(rule + (lower_rotr $I64 rs amount) + (if-let $false (has_zbb)) + (lower_rotr_shift $I64 rs amount)) + +(rule 1 + (lower_rotr $I32 rs amount) + (if-let $true (has_zbb)) + (rv_rorw rs amount)) + +(rule + (lower_rotr $I32 rs amount) + (if-let $false (has_zbb)) + (lower_rotr_shift $I32 rs amount)) + +(rule -1 + (lower_rotr ty rs amount) + (lower_rotr_shift ty rs amount)) + +(decl lower_rotr_shift (Type XReg XReg) XReg) + +;;; +(rule + (lower_rotr_shift ty rs amount) + (let + ((x ValueRegs (gen_shamt ty amount)) + (shamt XReg (value_regs_get x 0)) + (len_sub_shamt XReg (value_regs_get x 1)) + ;; + (part1 XReg (rv_srl rs shamt)) + ;; + (part2 XReg (rv_sll rs len_sub_shamt)) + ;; + (part3 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) part2))) + (rv_or part1 part3))) + + + +;; bseti: Set a single bit in a register, indexed by a constant. +(decl gen_bseti (Reg u64) Reg) +(rule (gen_bseti val bit) + (if-let $false (has_zbs)) + (if-let $false (u64_le bit 12)) + (let ((const XReg (load_u64_constant (u64_shl 1 bit)))) + (rv_or val const))) + +(rule (gen_bseti val bit) + (if-let $false (has_zbs)) + (if-let $true (u64_le bit 12)) + (rv_ori val (imm12_const (u64_as_i32 (u64_shl 1 bit))))) + +(rule (gen_bseti val bit) + (if-let $true (has_zbs)) + (rv_bseti val (imm12_const (u64_as_i32 bit)))) + + +(decl gen_popcnt (Reg Type) Reg) +(rule + (gen_popcnt rs ty) + (let + ((tmp WritableXReg (temp_writable_xreg)) + (step WritableXReg (temp_writable_xreg)) + (sum WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Popcnt sum step tmp rs ty)))) + (writable_reg_to_reg sum))) + +(decl lower_popcnt (XReg Type) XReg) +(rule 1 (lower_popcnt rs ty) + (if-let $true (has_zbb)) + (rv_cpop (zext rs ty $I64))) + +(rule (lower_popcnt rs ty) + (if-let $false (has_zbb)) + (gen_popcnt rs ty)) + +(decl lower_popcnt_i128 (ValueRegs) ValueRegs) +(rule + (lower_popcnt_i128 a) + (let + ( ;; low part. + (low XReg (lower_popcnt (value_regs_get a 0) $I64)) + ;; high part. + (high XReg (lower_popcnt (value_regs_get a 1) $I64)) + ;; add toghter. + (result XReg (rv_add low high))) + (value_regs result (load_u64_constant 0)))) + +(decl lower_i128_rotl (ValueRegs ValueRegs) ValueRegs) +(rule + (lower_i128_rotl x y) + (let + ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) + (shamt XReg (value_regs_get tmp 0)) + (len_sub_shamt XReg (value_regs_get tmp 1)) + ;; + (low_part1 XReg (rv_sll (value_regs_get x 0) shamt)) + (low_part2 XReg (rv_srl (value_regs_get x 1) len_sub_shamt)) + ;;; if shamt == 0 low_part2 will overflow we should zero instead. + (low_part3 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part2)) + (low XReg (rv_or low_part1 low_part3)) + ;; + (high_part1 XReg (rv_sll (value_regs_get x 1) shamt)) + (high_part2 XReg (rv_srl (value_regs_get x 0) len_sub_shamt)) + (high_part3 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) high_part2)) + (high XReg (rv_or high_part1 high_part3)) + ;; + (const64 XReg (load_u64_constant 64)) + (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127)))) + ;; right now we only rotate less than 64 bits. + ;; if shamt is greater than or equal 64 , we should switch low and high. + (value_regs + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high) + ))) + + +(decl lower_i128_rotr (ValueRegs ValueRegs) ValueRegs) +(rule + (lower_i128_rotr x y) + (let + ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) + (shamt XReg (value_regs_get tmp 0)) + (len_sub_shamt XReg (value_regs_get tmp 1)) + ;; + (low_part1 XReg (rv_srl (value_regs_get x 0) shamt)) + (low_part2 XReg (rv_sll (value_regs_get x 1) len_sub_shamt)) + ;;; if shamt == 0 low_part2 will overflow we should zero instead. + (low_part3 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part2)) + (low XReg (rv_or low_part1 low_part3)) + ;; + (high_part1 XReg (rv_srl (value_regs_get x 1) shamt)) + (high_part2 XReg (rv_sll (value_regs_get x 0) len_sub_shamt)) + (high_part3 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) high_part2)) + (high XReg (rv_or high_part1 high_part3)) + + ;; + (const64 XReg (load_u64_constant 64)) + (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127)))) + ;; right now we only rotate less than 64 bits. + ;; if shamt is greater than or equal 64 , we should switch low and high. + (value_regs + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high) + ))) + +(decl gen_amode (Reg Offset32 Type) AMode) +(extern constructor gen_amode gen_amode) + +;; Generates a AMode that points to a constant in the constant pool. +(decl gen_const_amode (VCodeConstant) AMode) +(extern constructor gen_const_amode gen_const_amode) + +(decl offset32_imm (i32) Offset32) +(extern constructor offset32_imm offset32_imm) + +;; helper function to load from memory. +(decl gen_load (Reg Offset32 LoadOP MemFlags Type) Reg) +(rule + (gen_load p offset op flags ty) + (let + ((tmp WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.Load tmp op flags (gen_amode p offset $I64))))) + tmp)) + +(decl gen_load_128 (Reg Offset32 MemFlags) ValueRegs) +(rule + (gen_load_128 p offset flags) + (let + ((low Reg (gen_load p offset (LoadOP.Ld) flags $I64)) + (high Reg (gen_load p (offset32_add offset 8) (LoadOP.Ld) flags $I64))) + (value_regs low high))) + +(decl default_memflags () MemFlags) +(extern constructor default_memflags default_memflags) + +(decl offset32_add (Offset32 i64) Offset32) +(extern constructor offset32_add offset32_add) + +;; helper function to store to memory. +(decl gen_store (Reg Offset32 StoreOP MemFlags Reg) InstOutput) +(rule + (gen_store base offset op flags src) + (side_effect (SideEffectNoResult.Inst (MInst.Store (gen_amode base offset $I64) op flags src))) +) + +(decl gen_store_128 (Reg Offset32 MemFlags ValueRegs) InstOutput) +(rule + (gen_store_128 p offset flags src) + (side_effect + (SideEffectNoResult.Inst2 + (MInst.Store (gen_amode p offset $I64) (StoreOP.Sd) flags (value_regs_get src 0)) + (MInst.Store (gen_amode p (offset32_add offset 8) $I64) (StoreOP.Sd) flags (value_regs_get src 1))))) + +(decl valid_atomic_transaction (Type) Type) +(extern extractor valid_atomic_transaction valid_atomic_transaction) + +;;helper function. +;;construct an atomic instruction. +(decl gen_atomic (AtomicOP Reg Reg AMO) Reg) +(rule + (gen_atomic op addr src amo) + (let + ((tmp WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Atomic op tmp addr src amo)))) + tmp)) + +;; helper function +(decl get_atomic_rmw_op (Type AtomicRmwOp) AtomicOP) +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Add)) + (AtomicOP.AmoaddW)) +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Add)) + (AtomicOP.AmoaddD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.And)) + (AtomicOP.AmoandW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.And)) + (AtomicOP.AmoandD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Or)) + (AtomicOP.AmoorW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Or)) + (AtomicOP.AmoorD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Smax)) + (AtomicOP.AmomaxW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Smax)) + (AtomicOP.AmomaxD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Smin)) + (AtomicOP.AmominW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Smin)) + (AtomicOP.AmominD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Umax)) + (AtomicOP.AmomaxuW) +) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Umax)) + (AtomicOP.AmomaxuD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Umin)) + (AtomicOP.AmominuW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Umin)) + (AtomicOP.AmominuD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Xchg)) + (AtomicOP.AmoswapW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Xchg)) + (AtomicOP.AmoswapD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Xor)) + (AtomicOP.AmoxorW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Xor)) + (AtomicOP.AmoxorD)) + +(decl atomic_amo () AMO) +(extern constructor atomic_amo atomic_amo) + + +(decl gen_atomic_load (Reg Type) Reg) +(rule + (gen_atomic_load p ty) + (let + ((tmp WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.AtomicLoad tmp ty p)))) + (writable_reg_to_reg tmp))) + +;;; +(decl gen_atomic_store (Reg Type Reg) InstOutput) +(rule + (gen_atomic_store p ty src) + (side_effect (SideEffectNoResult.Inst (MInst.AtomicStore src ty p))) +) + + +(decl gen_stack_addr (StackSlot Offset32) Reg) +(extern constructor gen_stack_addr gen_stack_addr) + +;; +(decl gen_select (Type Reg ValueRegs ValueRegs) ValueRegs) +(rule + (gen_select ty c x y) + (let + ((dst VecWritableReg (alloc_vec_writable ty)) + ;; + (reuslt VecWritableReg (vec_writable_clone dst)) + (_ Unit (emit (MInst.Select dst ty c x y)))) + (vec_writable_to_regs reuslt))) + +;; Parameters are "intcc compare_a compare_b rs1 rs2". +(decl gen_select_reg (IntCC XReg XReg Reg Reg) Reg) +(extern constructor gen_select_reg gen_select_reg) + +;; load a constant into reg. +(decl load_u64_constant (u64) Reg) +(extern constructor load_u64_constant load_u64_constant) + +;;; clone WritableReg +;;; if not rust compiler will complain about use moved value. +(decl vec_writable_clone (VecWritableReg) VecWritableReg) +(extern constructor vec_writable_clone vec_writable_clone) + +(decl vec_writable_to_regs (VecWritableReg) ValueRegs) +(extern constructor vec_writable_to_regs vec_writable_to_regs) + +(decl alloc_vec_writable (Type) VecWritableReg) +(extern constructor alloc_vec_writable alloc_vec_writable) + +(decl gen_int_select (Type IntSelectOP ValueRegs ValueRegs) ValueRegs) +(rule + (gen_int_select ty op x y) + (let + ( ;;; + (dst VecWritableReg (alloc_vec_writable ty)) + ;;; + (_ Unit (emit (MInst.IntSelect op (vec_writable_clone dst) x y ty)))) + (vec_writable_to_regs dst))) + +(decl udf (TrapCode) InstOutput) +(rule + (udf code) + (side_effect (SideEffectNoResult.Inst (MInst.Udf code)))) + +(decl load_op (Type) LoadOP) +(extern constructor load_op load_op) + +(decl store_op (Type) StoreOP) +(extern constructor store_op store_op) + +;; bool is "is_signed" +(decl int_load_op (bool u8) LoadOP) +(rule + (int_load_op $false 8) + (LoadOP.Lbu)) + +(rule + (int_load_op $true 8) + (LoadOP.Lb)) + +(rule + (int_load_op $false 16) + (LoadOP.Lhu)) +(rule + (int_load_op $true 16) + (LoadOP.Lh)) +(rule + (int_load_op $false 32) + (LoadOP.Lwu)) +(rule + (int_load_op $true 32) + (LoadOP.Lw)) + +(rule + (int_load_op _ 64) + (LoadOP.Ld)) + +;;;; load extern name +(decl load_ext_name (ExternalName i64) Reg) +(extern constructor load_ext_name load_ext_name) + +(decl int_convert_2_float_op (Type bool Type) FpuOPRR) +(extern constructor int_convert_2_float_op int_convert_2_float_op) + +;;;; +(decl gen_fcvt_int (bool FReg bool Type Type) XReg) +(rule + (gen_fcvt_int is_sat rs is_signed in_type out_type) + (let + ((result WritableReg (temp_writable_reg out_type)) + (tmp WritableFReg (temp_writable_freg)) + (_ Unit (emit (MInst.FcvtToInt is_sat result tmp rs is_signed in_type out_type)))) + (writable_reg_to_reg result))) + +;;; some float binary operation +;;; 1. need move into x reister. +;;; 2. do the operation. +;;; 3. move back. +(decl lower_float_binary (AluOPRRR FReg FReg Type) FReg) +(rule + (lower_float_binary op rs1 rs2 ty) + (let ((x_rs1 XReg (move_f_to_x rs1 ty)) + (x_rs2 XReg (move_f_to_x rs2 ty)) + (tmp XReg (alu_rrr op x_rs1 x_rs2))) + (move_x_to_f tmp (float_int_of_same_size ty)))) + + +;;; lower icmp +(decl lower_icmp (IntCC ValueRegs ValueRegs Type) Reg) +(rule 1 (lower_icmp cc x y ty) + (if (signed_cond_code cc)) + (gen_icmp cc (ext_int_if_need $true x ty) (ext_int_if_need $true y ty) ty)) +(rule (lower_icmp cc x y ty) + (gen_icmp cc (ext_int_if_need $false x ty) (ext_int_if_need $false y ty) ty)) + + +(decl i128_sub (ValueRegs ValueRegs) ValueRegs) +(rule + (i128_sub x y ) + (let + (;; low part. + (low XReg (rv_sub (value_regs_get x 0) (value_regs_get y 0))) + ;; compute borrow. + (borrow XReg (rv_sltu (value_regs_get x 0) low)) + ;; + (high_tmp XReg (rv_sub (value_regs_get x 1) (value_regs_get y 1))) + ;; + (high XReg (rv_sub high_tmp borrow))) + (value_regs low high))) + + +;;; Returns the sum in the first register, and the overflow test in the second. +(decl lower_uadd_overflow (XReg XReg Type) ValueRegs) + +(rule 1 + (lower_uadd_overflow x y $I64) + (let ((tmp XReg (rv_add x y)) + (test XReg (gen_icmp (IntCC.UnsignedLessThan) tmp x $I64))) + (value_regs tmp test))) + +(rule + (lower_uadd_overflow x y (fits_in_32 ty)) + (let ((tmp_x XReg (zext x ty $I64)) + (tmp_y XReg (zext y ty $I64)) + (sum XReg (rv_add tmp_x tmp_y)) + (test XReg (rv_srli sum (imm12_const (ty_bits ty))))) + (value_regs sum test))) + +(decl label_to_br_target (MachLabel) BranchTarget) +(extern constructor label_to_br_target label_to_br_target) + +(decl gen_jump (MachLabel) MInst) +(rule + (gen_jump v) + (MInst.Jal (label_to_br_target v))) + +(decl vec_label_get (VecMachLabel u8) MachLabel ) +(extern constructor vec_label_get vec_label_get) + +(decl partial lower_branch (Inst VecMachLabel) Unit) +(rule (lower_branch (jump _) targets ) + (emit_side_effect (SideEffectNoResult.Inst (gen_jump (vec_label_get targets 0))))) + +;;; cc a b targets Type +(decl lower_br_icmp (IntCC ValueRegs ValueRegs VecMachLabel Type) Unit) +(extern constructor lower_br_icmp lower_br_icmp) + +;; int scalar zero regs. +(decl int_zero_reg (Type) ValueRegs) +(extern constructor int_zero_reg int_zero_reg) + +(decl lower_cond_br (IntCC ValueRegs VecMachLabel Type) Unit) +(extern constructor lower_cond_br lower_cond_br) + +(decl intcc_to_extend_op (IntCC) ExtendOp) +(extern constructor intcc_to_extend_op intcc_to_extend_op) + +;; Normalize a value for comparision. +;; +;; This ensures that types smaller than a register don't accidentally +;; pass undefined high bits when being compared as a full register. +(decl normalize_cmp_value (Type ValueRegs ExtendOp) ValueRegs) + +(rule 1 (normalize_cmp_value (fits_in_32 ity) r op) + (extend r op ity $I64)) + +(rule (normalize_cmp_value $I64 r _) r) +(rule (normalize_cmp_value $I128 r _) r) + +(decl normalize_fcvt_from_int (XReg Type ExtendOp) XReg) +(rule 2 (normalize_fcvt_from_int r (fits_in_16 ty) op) + (value_regs_get (extend r op ty $I64) 0)) +(rule 1 (normalize_fcvt_from_int r _ _) + r) + +;; Convert a truthy value, possibly of more than one register (an +;; I128), to one register. If narrower than 64 bits, must have already +;; been masked (e.g. by `normalize_cmp_value`). +(decl truthy_to_reg (Type ValueRegs) XReg) +(rule 1 (truthy_to_reg (fits_in_64 _) regs) + (value_regs_get regs 0)) +(rule 0 (truthy_to_reg $I128 regs) + (let ((lo XReg (value_regs_get regs 0)) + (hi XReg (value_regs_get regs 1))) + (rv_or lo hi))) + +;; Default behavior for branching based on an input value. +(rule + (lower_branch (brif v @ (value_type ty) _ _) targets) + (lower_cond_br (IntCC.NotEqual) (normalize_cmp_value ty v (ExtendOp.Zero)) targets ty)) + +;; Special case for SI128 to reify the comparison value and branch on it. +(rule 2 + (lower_branch (brif v @ (value_type $I128) _ _) targets) + (let ((zero ValueRegs (value_regs (zero_reg) (zero_reg))) + (cmp XReg (gen_icmp (IntCC.NotEqual) v zero $I128))) + (lower_cond_br (IntCC.NotEqual) cmp targets $I64))) + +;; Branching on the result of an icmp +(rule 1 + (lower_branch (brif (maybe_uextend (icmp cc a @ (value_type ty) b)) _ _) targets) + (lower_br_icmp cc a b targets ty)) + +;; Branching on the result of an fcmp +(rule 1 + (lower_branch (brif (maybe_uextend (fcmp cc a @ (value_type ty) b)) _ _) targets) + (if-let $true (floatcc_unordered cc)) + (let ((then BranchTarget (label_to_br_target (vec_label_get targets 0))) + (else BranchTarget (label_to_br_target (vec_label_get targets 1)))) + (emit_side_effect (cond_br (emit_fcmp (floatcc_complement cc) ty a b) else then)))) + +(rule 1 + (lower_branch (brif (maybe_uextend (fcmp cc a @ (value_type ty) b)) _ _) targets) + (if-let $false (floatcc_unordered cc)) + (let ((then BranchTarget (label_to_br_target (vec_label_get targets 0))) + (else BranchTarget (label_to_br_target (vec_label_get targets 1)))) + (emit_side_effect (cond_br (emit_fcmp cc ty a b) then else)))) + +;;; +(decl lower_br_table (Reg VecMachLabel) Unit) +(extern constructor lower_br_table lower_br_table) + +(rule + (lower_branch (br_table index _) targets) + (lower_br_table index targets)) + +(decl load_ra () Reg) +(extern constructor load_ra load_ra) + + +;; Generates a bitcast instruction. +;; Args are: src, src_ty, dst_ty +(decl gen_bitcast (Reg Type Type) Reg) +(rule 1 (gen_bitcast r $F32 $I32) (rv_fmvxw r)) +(rule 1 (gen_bitcast r $F64 $I64) (rv_fmvxd r)) +(rule 1 (gen_bitcast r $I32 $F32) (rv_fmvwx r)) +(rule 1 (gen_bitcast r $I64 $F64) (rv_fmvdx r)) +(rule (gen_bitcast r _ _) r) + +(decl move_f_to_x (FReg Type) XReg) +(rule (move_f_to_x r $F32) (gen_bitcast r $F32 $I32)) +(rule (move_f_to_x r $F64) (gen_bitcast r $F64 $I64)) + +(decl move_x_to_f (XReg Type) FReg) +(rule (move_x_to_f r $I32) (gen_bitcast r $I32 $F32)) +(rule (move_x_to_f r $I64) (gen_bitcast r $I64 $F64)) + +(decl float_int_of_same_size (Type) Type) +(rule (float_int_of_same_size $F32) $I32) +(rule (float_int_of_same_size $F64) $I64) + + +(decl gen_rev8 (XReg) XReg) +(rule 1 + (gen_rev8 rs) + (if-let $true (has_zbb)) + (rv_rev8 rs)) + +(rule + (gen_rev8 rs) + (if-let $false (has_zbb)) + (let + ((rd WritableXReg (temp_writable_xreg)) + (tmp WritableXReg (temp_writable_xreg)) + (step WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Rev8 rs step tmp rd)))) + (writable_reg_to_reg rd))) + + +(decl gen_brev8 (Reg Type) Reg) +(rule 1 + (gen_brev8 rs _) + (if-let $true (has_zbkb)) + (rv_brev8 rs)) +(rule + (gen_brev8 rs ty) + (if-let $false (has_zbkb)) + (let + ((tmp WritableXReg (temp_writable_xreg)) + (tmp2 WritableXReg (temp_writable_xreg)) + (step WritableXReg (temp_writable_xreg)) + (rd WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Brev8 rs ty step tmp tmp2 rd)))) + (writable_reg_to_reg rd))) + +;; Negates x +;; Equivalent to 0 - x +(decl neg (Type ValueRegs) ValueRegs) +(rule 1 (neg (fits_in_64 (ty_int ty)) val) + (value_reg + (rv_neg (value_regs_get val 0)))) + +(rule 2 (neg $I128 val) + (i128_sub (value_regs_zero) val)) + + +;; Selects the greatest of two registers as signed values. +(decl max (Type XReg XReg) XReg) +(rule (max (fits_in_64 (ty_int ty)) x y) + (if-let $true (has_zbb)) + (rv_max x y)) + +(rule (max (fits_in_64 (ty_int ty)) x y) + (if-let $false (has_zbb)) + (gen_select_reg (IntCC.SignedGreaterThan) x y x y)) + + +(decl gen_trapif (XReg TrapCode) InstOutput) +(rule + (gen_trapif test trap_code) + (side_effect (SideEffectNoResult.Inst (MInst.TrapIf test trap_code)))) + +(decl gen_trapifc (IntCC XReg XReg TrapCode) InstOutput) +(rule + (gen_trapifc cc a b trap_code) + (side_effect (SideEffectNoResult.Inst (MInst.TrapIfC a b cc trap_code)))) + +(decl shift_int_to_most_significant (XReg Type) XReg) +(extern constructor shift_int_to_most_significant shift_int_to_most_significant) + +;;; generate div overflow. +(decl gen_div_overflow (XReg XReg Type) InstOutput) +(rule + (gen_div_overflow rs1 rs2 ty) + (let + ((r_const_neg_1 XReg (load_imm12 -1)) + (r_const_min XReg (rv_slli (load_imm12 1) (imm12_const 63))) + (tmp_rs1 XReg (shift_int_to_most_significant rs1 ty)) + (t1 XReg (gen_icmp (IntCC.Equal) r_const_neg_1 rs2 ty)) + (t2 XReg (gen_icmp (IntCC.Equal) r_const_min tmp_rs1 ty)) + (test XReg (rv_and t1 t2))) + (gen_trapif test (TrapCode.IntegerOverflow)))) + +(decl gen_div_by_zero (XReg) InstOutput) +(rule + (gen_div_by_zero r) + (gen_trapifc (IntCC.Equal) (zero_reg) r (TrapCode.IntegerDivisionByZero))) + +;;;; Helpers for Emitting Calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl gen_call (SigRef ExternalName RelocDistance ValueSlice) InstOutput) +(extern constructor gen_call gen_call) + +(decl gen_call_indirect (SigRef Value ValueSlice) InstOutput) +(extern constructor gen_call_indirect gen_call_indirect) + +;;; this is trying to imitate aarch64 `madd` instruction. +(decl madd (XReg XReg XReg) XReg) +(rule + (madd n m a) + (let + ((t XReg (rv_mul n m))) + (rv_add t a))) + +;;;; Helpers for bmask ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl lower_bmask (Type Type ValueRegs) ValueRegs) + +;; Produces -1 if the 64-bit value is non-zero, and 0 otherwise. +;; If the type is smaller than 64 bits, we need to mask off the +;; high bits. +(rule + 0 + (lower_bmask (fits_in_64 _) (fits_in_64 in_ty) val) + (let ((input XReg (truthy_to_reg in_ty (normalize_cmp_value in_ty val (ExtendOp.Zero)))) + (non_zero XReg (rv_snez input))) + (value_reg (rv_neg non_zero)))) + +;; Bitwise-or the two registers that make up the 128-bit value, then recurse as +;; though it was a 64-bit value. +(rule + 1 + (lower_bmask (fits_in_64 ty) $I128 val) + (let ((lo XReg (value_regs_get val 0)) + (hi XReg (value_regs_get val 1)) + (combined XReg (rv_or lo hi))) + (lower_bmask ty $I64 (value_reg combined)))) + +;; Conversion of one 64-bit value to a 128-bit one. Duplicate the result of the +;; bmask of the 64-bit value into both result registers of the i128. +(rule + 2 + (lower_bmask $I128 (fits_in_64 in_ty) val) + (let ((res ValueRegs (lower_bmask $I64 in_ty val))) + (value_regs (value_regs_get res 0) (value_regs_get res 0)))) + +;; Conversion of one 64-bit value to a 128-bit one. Duplicate the result of +;; bmasking the 128-bit value to a 64-bit value into both registers of the +;; 128-bit result. +(rule + 3 + (lower_bmask $I128 $I128 val) + (let ((res ValueRegs (lower_bmask $I64 $I128 val))) + (value_regs (value_regs_get res 0) (value_regs_get res 0)))) + + +;;;; Helpers for physical registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl gen_mov_from_preg (PReg) Reg) + +(rule + (gen_mov_from_preg rm) + (let ((rd WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.MovFromPReg rd rm)))) + rd)) + +(decl fp_reg () PReg) +(extern constructor fp_reg fp_reg) + +(decl sp_reg () PReg) +(extern constructor sp_reg sp_reg) + +;; Helper for creating the zero register. +(decl zero_reg () Reg) +(extern constructor zero_reg zero_reg) + +(decl value_regs_zero () ValueRegs) +(rule (value_regs_zero) + (value_regs (imm $I64 0) (imm $I64 0))) + +(decl writable_zero_reg () WritableReg) +(extern constructor writable_zero_reg writable_zero_reg) + + +;;;; Helpers for floating point comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl not (XReg) XReg) +(rule (not x) (rv_xori x (imm_from_bits 1))) + +(decl is_not_nan (Type FReg) XReg) +(rule (is_not_nan ty a) (rv_feq ty a a)) + +(decl ordered (Type FReg FReg) XReg) +(rule (ordered ty a b) (rv_and (is_not_nan ty a) (is_not_nan ty b))) + +(type CmpResult (enum + (Result + (result XReg) + (invert bool)))) + +;; Wrapper for the common case when constructing comparison results. It assumes +;; that the result isn't negated. +(decl cmp_result (XReg) CmpResult) +(rule (cmp_result result) (CmpResult.Result result $false)) + +;; Wrapper for the case where it's more convenient to construct the negated +;; version of the comparison. +(decl cmp_result_invert (XReg) CmpResult) +(rule (cmp_result_invert result) (CmpResult.Result result $true)) + +;; Consume a CmpResult, producing a branch on its result. +(decl cond_br (CmpResult BranchTarget BranchTarget) SideEffectNoResult) +(rule (cond_br cmp then else) + (SideEffectNoResult.Inst + (MInst.CondBr then else (cmp_integer_compare cmp)))) + +;; Construct an IntegerCompare value. +(decl int_compare (IntCC XReg XReg) IntegerCompare) +(extern constructor int_compare int_compare) + +;; Convert a comparison into a branch test. +(decl cmp_integer_compare (CmpResult) IntegerCompare) + +(rule + (cmp_integer_compare (CmpResult.Result res $false)) + (int_compare (IntCC.NotEqual) res (zero_reg))) + +(rule + (cmp_integer_compare (CmpResult.Result res $true)) + (int_compare (IntCC.Equal) res (zero_reg))) + +;; Convert a comparison into a boolean value. +(decl cmp_value (CmpResult) XReg) +(rule (cmp_value (CmpResult.Result res $false)) res) +(rule (cmp_value (CmpResult.Result res $true)) (not res)) + +;; Compare two floating point numbers and return a zero/non-zero result. +(decl emit_fcmp (FloatCC Type FReg FReg) CmpResult) + +;; a is not nan && b is not nan +(rule + (emit_fcmp (FloatCC.Ordered) ty a b) + (cmp_result (ordered ty a b))) + +;; a is nan || b is nan +;; == !(a is not nan && b is not nan) +(rule + (emit_fcmp (FloatCC.Unordered) ty a b) + (cmp_result_invert (ordered ty a b))) + +;; a == b +(rule + (emit_fcmp (FloatCC.Equal) ty a b) + (cmp_result (rv_feq ty a b))) + +;; a != b +;; == !(a == b) +(rule + (emit_fcmp (FloatCC.NotEqual) ty a b) + (cmp_result_invert (rv_feq ty a b))) + +;; a < b || a > b +(rule + (emit_fcmp (FloatCC.OrderedNotEqual) ty a b) + (cmp_result (rv_or (rv_flt ty a b) (rv_fgt ty a b)))) + +;; !(ordered a b) || a == b +(rule + (emit_fcmp (FloatCC.UnorderedOrEqual) ty a b) + (cmp_result (rv_or (not (ordered ty a b)) (rv_feq ty a b)))) + +;; a < b +(rule + (emit_fcmp (FloatCC.LessThan) ty a b) + (cmp_result (rv_flt ty a b))) + +;; a <= b +(rule + (emit_fcmp (FloatCC.LessThanOrEqual) ty a b) + (cmp_result (rv_fle ty a b))) + +;; a > b +(rule + (emit_fcmp (FloatCC.GreaterThan) ty a b) + (cmp_result (rv_fgt ty a b))) + +;; a >= b +(rule + (emit_fcmp (FloatCC.GreaterThanOrEqual) ty a b) + (cmp_result (rv_fge ty a b))) + +;; !(ordered a b) || a < b +;; == !(ordered a b && a >= b) +(rule + (emit_fcmp (FloatCC.UnorderedOrLessThan) ty a b) + (cmp_result_invert (rv_and (ordered ty a b) (rv_fge ty a b)))) + +;; !(ordered a b) || a <= b +;; == !(ordered a b && a > b) +(rule + (emit_fcmp (FloatCC.UnorderedOrLessThanOrEqual) ty a b) + (cmp_result_invert (rv_and (ordered ty a b) (rv_fgt ty a b)))) + +;; !(ordered a b) || a > b +;; == !(ordered a b && a <= b) +(rule + (emit_fcmp (FloatCC.UnorderedOrGreaterThan) ty a b) + (cmp_result_invert (rv_and (ordered ty a b) (rv_fle ty a b)))) + +;; !(ordered a b) || a >= b +;; == !(ordered a b && a < b) +(rule + (emit_fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) ty a b) + (cmp_result_invert (rv_and (ordered ty a b) (rv_flt ty a b)))) diff --git a/cranelift/codegen/src/isa/zkasm/inst/args.rs b/cranelift/codegen/src/isa/zkasm/inst/args.rs new file mode 100644 index 000000000000..03b0255bf9f5 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/args.rs @@ -0,0 +1,1812 @@ +//! Riscv64 ISA definitions: instruction arguments. + +// Some variants are never constructed, but we still want them as options in the future. +#![allow(dead_code)] +use super::*; +use crate::ir::condcodes::CondCode; + +use crate::isa::zkasm::inst::{reg_name, reg_to_gpr_num}; +use crate::machinst::isle::WritableReg; + +use std::fmt::{Display, Formatter, Result}; + +/// A macro for defining a newtype of `Reg` that enforces some invariant about +/// the wrapped `Reg` (such as that it is of a particular register class). +macro_rules! newtype_of_reg { + ( + $newtype_reg:ident, + $newtype_writable_reg:ident, + |$check_reg:ident| $check:expr + ) => { + /// A newtype wrapper around `Reg`. + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct $newtype_reg(Reg); + + impl PartialEq for $newtype_reg { + fn eq(&self, other: &Reg) -> bool { + self.0 == *other + } + } + + impl From<$newtype_reg> for Reg { + fn from(r: $newtype_reg) -> Self { + r.0 + } + } + + impl $newtype_reg { + /// Create this newtype from the given register, or return `None` if the register + /// is not a valid instance of this newtype. + pub fn new($check_reg: Reg) -> Option { + if $check { + Some(Self($check_reg)) + } else { + None + } + } + + /// Get this newtype's underlying `Reg`. + pub fn to_reg(self) -> Reg { + self.0 + } + } + + // Convenience impl so that people working with this newtype can use it + // "just like" a plain `Reg`. + // + // NB: We cannot implement `DerefMut` because that would let people do + // nasty stuff like `*my_xreg.deref_mut() = some_freg`, breaking the + // invariants that `XReg` provides. + impl std::ops::Deref for $newtype_reg { + type Target = Reg; + + fn deref(&self) -> &Reg { + &self.0 + } + } + + /// Writable Reg. + pub type $newtype_writable_reg = Writable<$newtype_reg>; + }; +} + +// Newtypes for registers classes. +newtype_of_reg!(XReg, WritableXReg, |reg| reg.class() == RegClass::Int); +newtype_of_reg!(FReg, WritableFReg, |reg| reg.class() == RegClass::Float); +newtype_of_reg!(VReg, WritableVReg, |reg| reg.class() == RegClass::Vector); + +/// An addressing mode specified for a load/store operation. +#[derive(Clone, Debug, Copy)] +pub enum AMode { + /// Arbitrary offset from a register. Converted to generation of large + /// offsets with multiple instructions as necessary during code emission. + RegOffset(Reg, i64, Type), + /// Offset from the stack pointer. + SPOffset(i64, Type), + + /// Offset from the frame pointer. + FPOffset(i64, Type), + + /// Offset from the "nominal stack pointer", which is where the real SP is + /// just after stack and spill slots are allocated in the function prologue. + /// At emission time, this is converted to `SPOffset` with a fixup added to + /// the offset constant. The fixup is a running value that is tracked as + /// emission iterates through instructions in linear order, and can be + /// adjusted up and down with [Inst::VirtualSPOffsetAdj]. + /// + /// The standard ABI is in charge of handling this (by emitting the + /// adjustment meta-instructions). It maintains the invariant that "nominal + /// SP" is where the actual SP is after the function prologue and before + /// clobber pushes. See the diagram in the documentation for + /// [crate::isa::zkasm::abi](the ABI module) for more details. + NominalSPOffset(i64, Type), + + /// A reference to a constant which is placed outside of the function's + /// body, typically at the end. + Const(VCodeConstant), + + /// A reference to a label. + Label(MachLabel), +} + +impl AMode { + pub(crate) fn with_allocs(self, allocs: &mut AllocationConsumer<'_>) -> Self { + match self { + AMode::RegOffset(reg, offset, ty) => AMode::RegOffset(allocs.next(reg), offset, ty), + AMode::SPOffset(..) + | AMode::FPOffset(..) + | AMode::NominalSPOffset(..) + | AMode::Const(..) + | AMode::Label(..) => self, + } + } + + /// Returns the registers that known to the register allocator. + /// Keep this in sync with `with_allocs`. + pub(crate) fn get_allocatable_register(&self) -> Option { + match self { + AMode::RegOffset(reg, ..) => Some(*reg), + AMode::SPOffset(..) + | AMode::FPOffset(..) + | AMode::NominalSPOffset(..) + | AMode::Const(..) + | AMode::Label(..) => None, + } + } + + pub(crate) fn get_base_register(&self) -> Option { + match self { + &AMode::RegOffset(reg, ..) => Some(reg), + &AMode::SPOffset(..) => Some(stack_reg()), + &AMode::FPOffset(..) => Some(fp_reg()), + &AMode::NominalSPOffset(..) => Some(stack_reg()), + &AMode::Const(..) | AMode::Label(..) => None, + } + } + + pub(crate) fn get_offset_with_state(&self, state: &EmitState) -> i64 { + match self { + &AMode::NominalSPOffset(offset, _) => offset + state.virtual_sp_offset, + _ => self.get_offset(), + } + } + + fn get_offset(&self) -> i64 { + match self { + &AMode::RegOffset(_, offset, ..) => offset, + &AMode::SPOffset(offset, _) => offset, + &AMode::FPOffset(offset, _) => offset, + &AMode::NominalSPOffset(offset, _) => offset, + &AMode::Const(_) | &AMode::Label(_) => 0, + } + } + + pub(crate) fn to_string_with_alloc(&self, allocs: &mut AllocationConsumer<'_>) -> String { + format!("{}", self.clone().with_allocs(allocs)) + } +} + +impl Display for AMode { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + match self { + &AMode::RegOffset(r, offset, ..) => { + write!(f, "{}({})", offset, reg_name(r)) + } + &AMode::SPOffset(offset, ..) => { + write!(f, "{}(sp)", offset) + } + &AMode::NominalSPOffset(offset, ..) => { + write!(f, "{}(nominal_sp)", offset) + } + &AMode::FPOffset(offset, ..) => { + write!(f, "{}(fp)", offset) + } + &AMode::Const(addr, ..) => { + write!(f, "[const({})]", addr.as_u32()) + } + &AMode::Label(label) => { + write!(f, "[label{}]", label.as_u32()) + } + } + } +} + +impl Into for StackAMode { + fn into(self) -> AMode { + match self { + StackAMode::FPOffset(offset, ty) => AMode::FPOffset(offset, ty), + StackAMode::SPOffset(offset, ty) => AMode::SPOffset(offset, ty), + StackAMode::NominalSPOffset(offset, ty) => AMode::NominalSPOffset(offset, ty), + } + } +} + +/// risc-v always take two register to compare +#[derive(Clone, Copy, Debug)] +pub struct IntegerCompare { + pub(crate) kind: IntCC, + pub(crate) rs1: Reg, + pub(crate) rs2: Reg, +} + +pub(crate) enum BranchFunct3 { + // == + Eq, + // != + Ne, + // signed < + Lt, + // signed >= + Ge, + // unsigned < + Ltu, + // unsigned >= + Geu, +} + +impl BranchFunct3 { + pub(crate) fn funct3(self) -> u32 { + match self { + BranchFunct3::Eq => 0b000, + BranchFunct3::Ne => 0b001, + BranchFunct3::Lt => 0b100, + BranchFunct3::Ge => 0b101, + BranchFunct3::Ltu => 0b110, + BranchFunct3::Geu => 0b111, + } + } + pub(crate) fn op_name(self) -> &'static str { + match self { + BranchFunct3::Eq => "eq", + BranchFunct3::Ne => "ne", + BranchFunct3::Lt => "lt", + BranchFunct3::Ge => "ge", + BranchFunct3::Ltu => "ltu", + BranchFunct3::Geu => "geu", + } + } +} +impl IntegerCompare { + pub(crate) fn op_code(self) -> u32 { + 0b1100011 + } + + // funct3 and if need inverse the register + pub(crate) fn funct3(&self) -> (BranchFunct3, bool) { + match self.kind { + IntCC::Equal => (BranchFunct3::Eq, false), + IntCC::NotEqual => (BranchFunct3::Ne, false), + IntCC::SignedLessThan => (BranchFunct3::Lt, false), + IntCC::SignedGreaterThanOrEqual => (BranchFunct3::Ge, false), + + IntCC::SignedGreaterThan => (BranchFunct3::Lt, true), + IntCC::SignedLessThanOrEqual => (BranchFunct3::Ge, true), + + IntCC::UnsignedLessThan => (BranchFunct3::Ltu, false), + IntCC::UnsignedGreaterThanOrEqual => (BranchFunct3::Geu, false), + + IntCC::UnsignedGreaterThan => (BranchFunct3::Ltu, true), + IntCC::UnsignedLessThanOrEqual => (BranchFunct3::Geu, true), + } + } + + #[inline] + pub(crate) fn op_name(&self) -> &'static str { + match self.kind { + IntCC::Equal => "beq", + IntCC::NotEqual => "bne", + IntCC::SignedLessThan => "blt", + IntCC::SignedGreaterThanOrEqual => "bge", + IntCC::SignedGreaterThan => "bgt", + IntCC::SignedLessThanOrEqual => "ble", + IntCC::UnsignedLessThan => "bltu", + IntCC::UnsignedGreaterThanOrEqual => "bgeu", + IntCC::UnsignedGreaterThan => "bgtu", + IntCC::UnsignedLessThanOrEqual => "bleu", + } + } + + pub(crate) fn emit(self) -> u32 { + let (funct3, reverse) = self.funct3(); + let (rs1, rs2) = if reverse { + (self.rs2, self.rs1) + } else { + (self.rs1, self.rs2) + }; + + self.op_code() + | funct3.funct3() << 12 + | reg_to_gpr_num(rs1) << 15 + | reg_to_gpr_num(rs2) << 20 + } + + pub(crate) fn inverse(self) -> Self { + Self { + kind: self.kind.complement(), + ..self + } + } +} + +impl FpuOPRRRR { + pub(crate) fn op_name(self) -> &'static str { + match self { + Self::FmaddS => "fmadd.s", + Self::FmsubS => "fmsub.s", + Self::FnmsubS => "fnmsub.s", + Self::FnmaddS => "fnmadd.s", + Self::FmaddD => "fmadd.d", + Self::FmsubD => "fmsub.d", + Self::FnmsubD => "fnmsub.d", + Self::FnmaddD => "fnmadd.d", + } + } + + pub(crate) fn funct2(self) -> u32 { + match self { + FpuOPRRRR::FmaddS | FpuOPRRRR::FmsubS | FpuOPRRRR::FnmsubS | FpuOPRRRR::FnmaddS => 0, + FpuOPRRRR::FmaddD | FpuOPRRRR::FmsubD | FpuOPRRRR::FnmsubD | FpuOPRRRR::FnmaddD => 1, + } + } + + pub(crate) fn funct3(self, rounding_mode: Option) -> u32 { + rounding_mode.unwrap_or_default().as_u32() + } + + pub(crate) fn op_code(self) -> u32 { + match self { + FpuOPRRRR::FmaddS => 0b1000011, + FpuOPRRRR::FmsubS => 0b1000111, + FpuOPRRRR::FnmsubS => 0b1001011, + FpuOPRRRR::FnmaddS => 0b1001111, + FpuOPRRRR::FmaddD => 0b1000011, + FpuOPRRRR::FmsubD => 0b1000111, + FpuOPRRRR::FnmsubD => 0b1001011, + FpuOPRRRR::FnmaddD => 0b1001111, + } + } +} + +impl FpuOPRR { + pub(crate) fn op_name(self) -> &'static str { + match self { + Self::FsqrtS => "fsqrt.s", + Self::FcvtWS => "fcvt.w.s", + Self::FcvtWuS => "fcvt.wu.s", + Self::FmvXW => "fmv.x.w", + Self::FclassS => "fclass.s", + Self::FcvtSw => "fcvt.s.w", + Self::FcvtSwU => "fcvt.s.wu", + Self::FmvWX => "fmv.w.x", + Self::FcvtLS => "fcvt.l.s", + Self::FcvtLuS => "fcvt.lu.s", + Self::FcvtSL => "fcvt.s.l", + Self::FcvtSLU => "fcvt.s.lu", + Self::FcvtLD => "fcvt.l.d", + Self::FcvtLuD => "fcvt.lu.d", + Self::FmvXD => "fmv.x.d", + Self::FcvtDL => "fcvt.d.l", + Self::FcvtDLu => "fcvt.d.lu", + Self::FmvDX => "fmv.d.x", + Self::FsqrtD => "fsqrt.d", + Self::FcvtSD => "fcvt.s.d", + Self::FcvtDS => "fcvt.d.s", + Self::FclassD => "fclass.d", + Self::FcvtWD => "fcvt.w.d", + Self::FcvtWuD => "fcvt.wu.d", + Self::FcvtDW => "fcvt.d.w", + Self::FcvtDWU => "fcvt.d.wu", + } + } + + pub(crate) fn is_convert_to_int(self) -> bool { + match self { + Self::FcvtWS + | Self::FcvtWuS + | Self::FcvtLS + | Self::FcvtLuS + | Self::FcvtWD + | Self::FcvtWuD + | Self::FcvtLD + | Self::FcvtLuD => true, + _ => false, + } + } + // move from x register to float register. + pub(crate) fn move_x_to_f_op(ty: Type) -> Self { + match ty { + F32 => Self::FmvWX, + F64 => Self::FmvDX, + _ => unreachable!("ty:{:?}", ty), + } + } + + // move from f register to x register. + pub(crate) fn move_f_to_x_op(ty: Type) -> Self { + match ty { + F32 => Self::FmvXW, + F64 => Self::FmvXD, + _ => unreachable!("ty:{:?}", ty), + } + } + + pub(crate) fn float_convert_2_int_op(from: Type, is_type_signed: bool, to: Type) -> Self { + let type_32 = to.bits() <= 32; + match from { + F32 => { + if is_type_signed { + if type_32 { + Self::FcvtWS + } else { + Self::FcvtLS + } + } else { + if type_32 { + Self::FcvtWuS + } else { + Self::FcvtLuS + } + } + } + F64 => { + if is_type_signed { + if type_32 { + Self::FcvtWD + } else { + Self::FcvtLD + } + } else { + if type_32 { + Self::FcvtWuD + } else { + Self::FcvtLuD + } + } + } + _ => unreachable!("from type:{}", from), + } + } + + pub(crate) fn int_convert_2_float_op(from: Type, is_type_signed: bool, to: Type) -> Self { + let type_32 = from.bits() == 32; + match to { + F32 => { + if is_type_signed { + if type_32 { + Self::FcvtSw + } else { + Self::FcvtSL + } + } else { + if type_32 { + Self::FcvtSwU + } else { + Self::FcvtSLU + } + } + } + F64 => { + if is_type_signed { + if type_32 { + Self::FcvtDW + } else { + Self::FcvtDL + } + } else { + if type_32 { + Self::FcvtDWU + } else { + Self::FcvtDLu + } + } + } + _ => unreachable!("to type:{}", to), + } + } + + pub(crate) fn op_code(self) -> u32 { + match self { + FpuOPRR::FsqrtS + | FpuOPRR::FcvtWS + | FpuOPRR::FcvtWuS + | FpuOPRR::FmvXW + | FpuOPRR::FclassS + | FpuOPRR::FcvtSw + | FpuOPRR::FcvtSwU + | FpuOPRR::FmvWX => 0b1010011, + + FpuOPRR::FcvtLS | FpuOPRR::FcvtLuS | FpuOPRR::FcvtSL | FpuOPRR::FcvtSLU => 0b1010011, + + FpuOPRR::FcvtLD + | FpuOPRR::FcvtLuD + | FpuOPRR::FmvXD + | FpuOPRR::FcvtDL + | FpuOPRR::FcvtDLu + | FpuOPRR::FmvDX => 0b1010011, + + FpuOPRR::FsqrtD + | FpuOPRR::FcvtSD + | FpuOPRR::FcvtDS + | FpuOPRR::FclassD + | FpuOPRR::FcvtWD + | FpuOPRR::FcvtWuD + | FpuOPRR::FcvtDW + | FpuOPRR::FcvtDWU => 0b1010011, + } + } + + pub(crate) fn rs2_funct5(self) -> u32 { + match self { + FpuOPRR::FsqrtS => 0b00000, + FpuOPRR::FcvtWS => 0b00000, + FpuOPRR::FcvtWuS => 0b00001, + FpuOPRR::FmvXW => 0b00000, + FpuOPRR::FclassS => 0b00000, + FpuOPRR::FcvtSw => 0b00000, + FpuOPRR::FcvtSwU => 0b00001, + FpuOPRR::FmvWX => 0b00000, + FpuOPRR::FcvtLS => 0b00010, + FpuOPRR::FcvtLuS => 0b00011, + FpuOPRR::FcvtSL => 0b00010, + FpuOPRR::FcvtSLU => 0b00011, + FpuOPRR::FcvtLD => 0b00010, + FpuOPRR::FcvtLuD => 0b00011, + FpuOPRR::FmvXD => 0b00000, + FpuOPRR::FcvtDL => 0b00010, + FpuOPRR::FcvtDLu => 0b00011, + FpuOPRR::FmvDX => 0b00000, + FpuOPRR::FcvtSD => 0b00001, + FpuOPRR::FcvtDS => 0b00000, + FpuOPRR::FclassD => 0b00000, + FpuOPRR::FcvtWD => 0b00000, + FpuOPRR::FcvtWuD => 0b00001, + FpuOPRR::FcvtDW => 0b00000, + FpuOPRR::FcvtDWU => 0b00001, + FpuOPRR::FsqrtD => 0b00000, + } + } + pub(crate) fn funct7(self) -> u32 { + match self { + FpuOPRR::FsqrtS => 0b0101100, + FpuOPRR::FcvtWS => 0b1100000, + FpuOPRR::FcvtWuS => 0b1100000, + FpuOPRR::FmvXW => 0b1110000, + FpuOPRR::FclassS => 0b1110000, + FpuOPRR::FcvtSw => 0b1101000, + FpuOPRR::FcvtSwU => 0b1101000, + FpuOPRR::FmvWX => 0b1111000, + FpuOPRR::FcvtLS => 0b1100000, + FpuOPRR::FcvtLuS => 0b1100000, + FpuOPRR::FcvtSL => 0b1101000, + FpuOPRR::FcvtSLU => 0b1101000, + FpuOPRR::FcvtLD => 0b1100001, + FpuOPRR::FcvtLuD => 0b1100001, + FpuOPRR::FmvXD => 0b1110001, + FpuOPRR::FcvtDL => 0b1101001, + FpuOPRR::FcvtDLu => 0b1101001, + FpuOPRR::FmvDX => 0b1111001, + FpuOPRR::FcvtSD => 0b0100000, + FpuOPRR::FcvtDS => 0b0100001, + FpuOPRR::FclassD => 0b1110001, + FpuOPRR::FcvtWD => 0b1100001, + FpuOPRR::FcvtWuD => 0b1100001, + FpuOPRR::FcvtDW => 0b1101001, + FpuOPRR::FcvtDWU => 0b1101001, + FpuOPRR::FsqrtD => 0b0101101, + } + } + + pub(crate) fn funct3(self, rounding_mode: Option) -> u32 { + let rounding_mode = rounding_mode.unwrap_or_default().as_u32(); + match self { + FpuOPRR::FsqrtS => rounding_mode, + FpuOPRR::FcvtWS => rounding_mode, + FpuOPRR::FcvtWuS => rounding_mode, + FpuOPRR::FmvXW => 0b000, + FpuOPRR::FclassS => 0b001, + FpuOPRR::FcvtSw => rounding_mode, + FpuOPRR::FcvtSwU => rounding_mode, + FpuOPRR::FmvWX => 0b000, + FpuOPRR::FcvtLS => rounding_mode, + FpuOPRR::FcvtLuS => rounding_mode, + FpuOPRR::FcvtSL => rounding_mode, + FpuOPRR::FcvtSLU => rounding_mode, + FpuOPRR::FcvtLD => rounding_mode, + FpuOPRR::FcvtLuD => rounding_mode, + FpuOPRR::FmvXD => 0b000, + FpuOPRR::FcvtDL => rounding_mode, + FpuOPRR::FcvtDLu => rounding_mode, + FpuOPRR::FmvDX => 0b000, + FpuOPRR::FcvtSD => rounding_mode, + FpuOPRR::FcvtDS => rounding_mode, + FpuOPRR::FclassD => 0b001, + FpuOPRR::FcvtWD => rounding_mode, + FpuOPRR::FcvtWuD => rounding_mode, + FpuOPRR::FcvtDW => rounding_mode, + FpuOPRR::FcvtDWU => 0b000, + FpuOPRR::FsqrtD => rounding_mode, + } + } +} + +impl FpuOPRRR { + pub(crate) const fn op_name(self) -> &'static str { + match self { + Self::FaddS => "fadd.s", + Self::FsubS => "fsub.s", + Self::FmulS => "fmul.s", + Self::FdivS => "fdiv.s", + Self::FsgnjS => "fsgnj.s", + Self::FsgnjnS => "fsgnjn.s", + Self::FsgnjxS => "fsgnjx.s", + Self::FminS => "fmin.s", + Self::FmaxS => "fmax.s", + Self::FeqS => "feq.s", + Self::FltS => "flt.s", + Self::FleS => "fle.s", + Self::FaddD => "fadd.d", + Self::FsubD => "fsub.d", + Self::FmulD => "fmul.d", + Self::FdivD => "fdiv.d", + Self::FsgnjD => "fsgnj.d", + Self::FsgnjnD => "fsgnjn.d", + Self::FsgnjxD => "fsgnjx.d", + Self::FminD => "fmin.d", + Self::FmaxD => "fmax.d", + Self::FeqD => "feq.d", + Self::FltD => "flt.d", + Self::FleD => "fle.d", + } + } + + pub fn funct3(self, rounding_mode: Option) -> u32 { + let rounding_mode = rounding_mode.unwrap_or_default(); + let rounding_mode = rounding_mode.as_u32(); + match self { + Self::FaddS => rounding_mode, + Self::FsubS => rounding_mode, + Self::FmulS => rounding_mode, + Self::FdivS => rounding_mode, + + Self::FsgnjS => 0b000, + Self::FsgnjnS => 0b001, + Self::FsgnjxS => 0b010, + Self::FminS => 0b000, + Self::FmaxS => 0b001, + + Self::FeqS => 0b010, + Self::FltS => 0b001, + Self::FleS => 0b000, + + Self::FaddD => rounding_mode, + Self::FsubD => rounding_mode, + Self::FmulD => rounding_mode, + Self::FdivD => rounding_mode, + + Self::FsgnjD => 0b000, + Self::FsgnjnD => 0b001, + Self::FsgnjxD => 0b010, + Self::FminD => 0b000, + Self::FmaxD => 0b001, + Self::FeqD => 0b010, + Self::FltD => 0b001, + Self::FleD => 0b000, + } + } + + pub fn op_code(self) -> u32 { + match self { + Self::FaddS + | Self::FsubS + | Self::FmulS + | Self::FdivS + | Self::FsgnjS + | Self::FsgnjnS + | Self::FsgnjxS + | Self::FminS + | Self::FmaxS + | Self::FeqS + | Self::FltS + | Self::FleS => 0b1010011, + + Self::FaddD + | Self::FsubD + | Self::FmulD + | Self::FdivD + | Self::FsgnjD + | Self::FsgnjnD + | Self::FsgnjxD + | Self::FminD + | Self::FmaxD + | Self::FeqD + | Self::FltD + | Self::FleD => 0b1010011, + } + } + + pub const fn funct7(self) -> u32 { + match self { + Self::FaddS => 0b0000000, + Self::FsubS => 0b0000100, + Self::FmulS => 0b0001000, + Self::FdivS => 0b0001100, + + Self::FsgnjS => 0b0010000, + Self::FsgnjnS => 0b0010000, + Self::FsgnjxS => 0b0010000, + Self::FminS => 0b0010100, + Self::FmaxS => 0b0010100, + Self::FeqS => 0b1010000, + Self::FltS => 0b1010000, + Self::FleS => 0b1010000, + + Self::FaddD => 0b0000001, + Self::FsubD => 0b0000101, + Self::FmulD => 0b0001001, + Self::FdivD => 0b0001101, + Self::FsgnjD => 0b0010001, + Self::FsgnjnD => 0b0010001, + Self::FsgnjxD => 0b0010001, + Self::FminD => 0b0010101, + Self::FmaxD => 0b0010101, + Self::FeqD => 0b1010001, + Self::FltD => 0b1010001, + Self::FleD => 0b1010001, + } + } + pub fn is_32(self) -> bool { + match self { + Self::FaddS + | Self::FsubS + | Self::FmulS + | Self::FdivS + | Self::FsgnjS + | Self::FsgnjnS + | Self::FsgnjxS + | Self::FminS + | Self::FmaxS + | Self::FeqS + | Self::FltS + | Self::FleS => true, + _ => false, + } + } + + pub fn is_copy_sign(self) -> bool { + match self { + Self::FsgnjD | Self::FsgnjS => true, + _ => false, + } + } + + pub fn is_copy_neg_sign(self) -> bool { + match self { + Self::FsgnjnD | Self::FsgnjnS => true, + _ => false, + } + } + pub fn is_copy_xor_sign(self) -> bool { + match self { + Self::FsgnjxS | Self::FsgnjxD => true, + _ => false, + } + } +} +impl AluOPRRR { + pub(crate) const fn op_name(self) -> &'static str { + match self { + Self::Add => "add", + Self::Sub => "sub", + Self::Sll => "sll", + Self::Slt => "slt", + Self::Sgt => "sgt", + Self::SltU => "sltu", + Self::Sgtu => "sgtu", + Self::Xor => "xor", + Self::Srl => "srl", + Self::Sra => "sra", + Self::Or => "or", + Self::And => "and", + Self::Addw => "addw", + Self::Subw => "subw", + Self::Sllw => "sllw", + Self::Srlw => "srlw", + Self::Sraw => "sraw", + Self::Mul => "mul", + Self::Mulh => "mulh", + Self::Mulhsu => "mulhsu", + Self::Mulhu => "mulhu", + Self::Div => "div", + Self::DivU => "divu", + Self::Rem => "rem", + Self::RemU => "remu", + Self::Mulw => "mulw", + Self::Divw => "divw", + Self::Divuw => "divuw", + Self::Remw => "remw", + Self::Remuw => "remuw", + Self::Adduw => "add.uw", + Self::Andn => "andn", + Self::Bclr => "bclr", + Self::Bext => "bext", + Self::Binv => "binv", + Self::Bset => "bset", + Self::Clmul => "clmul", + Self::Clmulh => "clmulh", + Self::Clmulr => "clmulr", + Self::Max => "max", + Self::Maxu => "maxu", + Self::Min => "min", + Self::Minu => "minu", + Self::Orn => "orn", + Self::Rol => "rol", + Self::Rolw => "rolw", + Self::Ror => "ror", + Self::Rorw => "rorw", + Self::Sh1add => "sh1add", + Self::Sh1adduw => "sh1add.uw", + Self::Sh2add => "sh2add", + Self::Sh2adduw => "sh2add.uw", + Self::Sh3add => "sh3add", + Self::Sh3adduw => "sh3add.uw", + Self::Xnor => "xnor", + Self::Pack => "pack", + Self::Packw => "packw", + Self::Packh => "packh", + } + } + + pub fn funct3(self) -> u32 { + match self { + AluOPRRR::Add => 0b000, + AluOPRRR::Sll => 0b001, + AluOPRRR::Slt => 0b010, + AluOPRRR::Sgt => 0b010, + AluOPRRR::SltU => 0b011, + AluOPRRR::Sgtu => 0b011, + AluOPRRR::Xor => 0b100, + AluOPRRR::Srl => 0b101, + AluOPRRR::Sra => 0b101, + AluOPRRR::Or => 0b110, + AluOPRRR::And => 0b111, + AluOPRRR::Sub => 0b000, + + AluOPRRR::Addw => 0b000, + AluOPRRR::Subw => 0b000, + AluOPRRR::Sllw => 0b001, + AluOPRRR::Srlw => 0b101, + AluOPRRR::Sraw => 0b101, + + AluOPRRR::Mul => 0b000, + AluOPRRR::Mulh => 0b001, + AluOPRRR::Mulhsu => 0b010, + AluOPRRR::Mulhu => 0b011, + AluOPRRR::Div => 0b100, + AluOPRRR::DivU => 0b101, + AluOPRRR::Rem => 0b110, + AluOPRRR::RemU => 0b111, + + AluOPRRR::Mulw => 0b000, + AluOPRRR::Divw => 0b100, + AluOPRRR::Divuw => 0b101, + AluOPRRR::Remw => 0b110, + AluOPRRR::Remuw => 0b111, + + // Zbb + AluOPRRR::Adduw => 0b000, + AluOPRRR::Andn => 0b111, + AluOPRRR::Bclr => 0b001, + AluOPRRR::Bext => 0b101, + AluOPRRR::Binv => 0b001, + AluOPRRR::Bset => 0b001, + AluOPRRR::Clmul => 0b001, + AluOPRRR::Clmulh => 0b011, + AluOPRRR::Clmulr => 0b010, + AluOPRRR::Max => 0b110, + AluOPRRR::Maxu => 0b111, + AluOPRRR::Min => 0b100, + AluOPRRR::Minu => 0b101, + AluOPRRR::Orn => 0b110, + AluOPRRR::Rol => 0b001, + AluOPRRR::Rolw => 0b001, + AluOPRRR::Ror => 0b101, + AluOPRRR::Rorw => 0b101, + AluOPRRR::Sh1add => 0b010, + AluOPRRR::Sh1adduw => 0b010, + AluOPRRR::Sh2add => 0b100, + AluOPRRR::Sh2adduw => 0b100, + AluOPRRR::Sh3add => 0b110, + AluOPRRR::Sh3adduw => 0b110, + AluOPRRR::Xnor => 0b100, + + // Zbkb + AluOPRRR::Pack => 0b100, + AluOPRRR::Packw => 0b100, + AluOPRRR::Packh => 0b111, + } + } + + pub fn op_code(self) -> u32 { + match self { + AluOPRRR::Add + | AluOPRRR::Sub + | AluOPRRR::Sll + | AluOPRRR::Slt + | AluOPRRR::Sgt + | AluOPRRR::SltU + | AluOPRRR::Sgtu + | AluOPRRR::Xor + | AluOPRRR::Srl + | AluOPRRR::Sra + | AluOPRRR::Or + | AluOPRRR::And + | AluOPRRR::Pack + | AluOPRRR::Packh => 0b0110011, + + AluOPRRR::Addw + | AluOPRRR::Subw + | AluOPRRR::Sllw + | AluOPRRR::Srlw + | AluOPRRR::Sraw + | AluOPRRR::Packw => 0b0111011, + + AluOPRRR::Mul + | AluOPRRR::Mulh + | AluOPRRR::Mulhsu + | AluOPRRR::Mulhu + | AluOPRRR::Div + | AluOPRRR::DivU + | AluOPRRR::Rem + | AluOPRRR::RemU => 0b0110011, + + AluOPRRR::Mulw + | AluOPRRR::Divw + | AluOPRRR::Divuw + | AluOPRRR::Remw + | AluOPRRR::Remuw => 0b0111011, + + AluOPRRR::Adduw => 0b0111011, + AluOPRRR::Andn + | AluOPRRR::Bclr + | AluOPRRR::Bext + | AluOPRRR::Binv + | AluOPRRR::Bset + | AluOPRRR::Clmul + | AluOPRRR::Clmulh + | AluOPRRR::Clmulr + | AluOPRRR::Max + | AluOPRRR::Maxu + | AluOPRRR::Min + | AluOPRRR::Minu + | AluOPRRR::Orn + | AluOPRRR::Rol + | AluOPRRR::Ror + | AluOPRRR::Sh1add + | AluOPRRR::Sh2add + | AluOPRRR::Sh3add + | AluOPRRR::Xnor => 0b0110011, + + AluOPRRR::Rolw + | AluOPRRR::Rorw + | AluOPRRR::Sh2adduw + | AluOPRRR::Sh3adduw + | AluOPRRR::Sh1adduw => 0b0111011, + } + } + + pub const fn funct7(self) -> u32 { + match self { + AluOPRRR::Add => 0b0000000, + AluOPRRR::Sub => 0b0100000, + AluOPRRR::Sll => 0b0000000, + AluOPRRR::Slt => 0b0000000, + AluOPRRR::Sgt => 0b0000000, + AluOPRRR::SltU => 0b0000000, + AluOPRRR::Sgtu => 0b0000000, + + AluOPRRR::Xor => 0b0000000, + AluOPRRR::Srl => 0b0000000, + AluOPRRR::Sra => 0b0100000, + AluOPRRR::Or => 0b0000000, + AluOPRRR::And => 0b0000000, + + AluOPRRR::Addw => 0b0000000, + AluOPRRR::Subw => 0b0100000, + AluOPRRR::Sllw => 0b0000000, + AluOPRRR::Srlw => 0b0000000, + AluOPRRR::Sraw => 0b0100000, + + AluOPRRR::Mul => 0b0000001, + AluOPRRR::Mulh => 0b0000001, + AluOPRRR::Mulhsu => 0b0000001, + AluOPRRR::Mulhu => 0b0000001, + AluOPRRR::Div => 0b0000001, + AluOPRRR::DivU => 0b0000001, + AluOPRRR::Rem => 0b0000001, + AluOPRRR::RemU => 0b0000001, + + AluOPRRR::Mulw => 0b0000001, + AluOPRRR::Divw => 0b0000001, + AluOPRRR::Divuw => 0b0000001, + AluOPRRR::Remw => 0b0000001, + AluOPRRR::Remuw => 0b0000001, + AluOPRRR::Adduw => 0b0000100, + AluOPRRR::Andn => 0b0100000, + AluOPRRR::Bclr => 0b0100100, + AluOPRRR::Bext => 0b0100100, + AluOPRRR::Binv => 0b0110100, + AluOPRRR::Bset => 0b0010100, + AluOPRRR::Clmul => 0b0000101, + AluOPRRR::Clmulh => 0b0000101, + AluOPRRR::Clmulr => 0b0000101, + AluOPRRR::Max => 0b0000101, + AluOPRRR::Maxu => 0b0000101, + AluOPRRR::Min => 0b0000101, + AluOPRRR::Minu => 0b0000101, + AluOPRRR::Orn => 0b0100000, + AluOPRRR::Rol => 0b0110000, + AluOPRRR::Rolw => 0b0110000, + AluOPRRR::Ror => 0b0110000, + AluOPRRR::Rorw => 0b0110000, + AluOPRRR::Sh1add => 0b0010000, + AluOPRRR::Sh1adduw => 0b0010000, + AluOPRRR::Sh2add => 0b0010000, + AluOPRRR::Sh2adduw => 0b0010000, + AluOPRRR::Sh3add => 0b0010000, + AluOPRRR::Sh3adduw => 0b0010000, + AluOPRRR::Xnor => 0b0100000, + + // Zbkb + AluOPRRR::Pack => 0b0000100, + AluOPRRR::Packw => 0b0000100, + AluOPRRR::Packh => 0b0000100, + } + } + + pub(crate) fn reverse_rs(self) -> bool { + // special case. + // sgt and sgtu is not defined in isa. + // emit should reverse rs1 and rs2. + self == AluOPRRR::Sgt || self == AluOPRRR::Sgtu + } +} + +impl AluOPRRI { + pub(crate) fn option_funct6(self) -> Option { + let x: Option = match self { + Self::Slli => Some(0b00_0000), + Self::Srli => Some(0b00_0000), + Self::Srai => Some(0b01_0000), + Self::Bclri => Some(0b010010), + Self::Bexti => Some(0b010010), + Self::Binvi => Some(0b011010), + Self::Bseti => Some(0b001010), + Self::Rori => Some(0b011000), + Self::SlliUw => Some(0b000010), + _ => None, + }; + x + } + + pub(crate) fn option_funct7(self) -> Option { + let x = match self { + Self::Slliw => Some(0b000_0000), + Self::SrliW => Some(0b000_0000), + Self::Sraiw => Some(0b010_0000), + Self::Roriw => Some(0b0110000), + _ => None, + }; + x + } + + pub(crate) fn imm12(self, imm12: Imm12) -> u32 { + let x = imm12.as_u32(); + if let Some(func) = self.option_funct6() { + func << 6 | (x & 0b11_1111) + } else if let Some(func) = self.option_funct7() { + func << 5 | (x & 0b1_1111) + } else if let Some(func) = self.option_funct12() { + func + } else { + x + } + } + + pub(crate) fn option_funct12(self) -> Option { + match self { + Self::Clz => Some(0b011000000000), + Self::Clzw => Some(0b011000000000), + Self::Cpop => Some(0b011000000010), + Self::Cpopw => Some(0b011000000010), + Self::Ctz => Some(0b011000000001), + Self::Ctzw => Some(0b011000000001), + Self::Rev8 => Some(0b011010111000), + Self::Sextb => Some(0b011000000100), + Self::Sexth => Some(0b011000000101), + Self::Zexth => Some(0b000010000000), + Self::Orcb => Some(0b001010000111), + Self::Brev8 => Some(0b0110_1000_0111), + _ => None, + } + } + + pub(crate) fn op_name(self) -> &'static str { + match self { + Self::Addi => "addi", + Self::Slti => "slti", + Self::SltiU => "sltiu", + Self::Xori => "xori", + Self::Ori => "ori", + Self::Andi => "andi", + Self::Slli => "slli", + Self::Srli => "srli", + Self::Srai => "srai", + Self::Addiw => "addiw", + Self::Slliw => "slliw", + Self::SrliW => "srliw", + Self::Sraiw => "sraiw", + Self::Bclri => "bclri", + Self::Bexti => "bexti", + Self::Binvi => "binvi", + Self::Bseti => "bseti", + Self::Rori => "rori", + Self::Roriw => "roriw", + Self::SlliUw => "slli.uw", + Self::Clz => "clz", + Self::Clzw => "clzw", + Self::Cpop => "cpop", + Self::Cpopw => "cpopw", + Self::Ctz => "ctz", + Self::Ctzw => "ctzw", + Self::Rev8 => "rev8", + Self::Sextb => "sext.b", + Self::Sexth => "sext.h", + Self::Zexth => "zext.h", + Self::Orcb => "orc.b", + Self::Brev8 => "brev8", + } + } + + pub fn funct3(self) -> u32 { + match self { + AluOPRRI::Addi => 0b000, + AluOPRRI::Slti => 0b010, + AluOPRRI::SltiU => 0b011, + AluOPRRI::Xori => 0b100, + AluOPRRI::Ori => 0b110, + AluOPRRI::Andi => 0b111, + AluOPRRI::Slli => 0b001, + AluOPRRI::Srli => 0b101, + AluOPRRI::Srai => 0b101, + AluOPRRI::Addiw => 0b000, + AluOPRRI::Slliw => 0b001, + AluOPRRI::SrliW => 0b101, + AluOPRRI::Sraiw => 0b101, + AluOPRRI::Bclri => 0b001, + AluOPRRI::Bexti => 0b101, + AluOPRRI::Binvi => 0b001, + AluOPRRI::Bseti => 0b001, + AluOPRRI::Rori => 0b101, + AluOPRRI::Roriw => 0b101, + AluOPRRI::SlliUw => 0b001, + AluOPRRI::Clz => 0b001, + AluOPRRI::Clzw => 0b001, + AluOPRRI::Cpop => 0b001, + AluOPRRI::Cpopw => 0b001, + AluOPRRI::Ctz => 0b001, + AluOPRRI::Ctzw => 0b001, + AluOPRRI::Rev8 => 0b101, + AluOPRRI::Sextb => 0b001, + AluOPRRI::Sexth => 0b001, + AluOPRRI::Zexth => 0b100, + AluOPRRI::Orcb => 0b101, + AluOPRRI::Brev8 => 0b101, + } + } + + pub fn op_code(self) -> u32 { + match self { + AluOPRRI::Addi + | AluOPRRI::Slti + | AluOPRRI::SltiU + | AluOPRRI::Xori + | AluOPRRI::Ori + | AluOPRRI::Andi + | AluOPRRI::Slli + | AluOPRRI::Srli + | AluOPRRI::Srai + | AluOPRRI::Bclri + | AluOPRRI::Bexti + | AluOPRRI::Binvi + | AluOPRRI::Bseti + | AluOPRRI::Rori + | AluOPRRI::Clz + | AluOPRRI::Cpop + | AluOPRRI::Ctz + | AluOPRRI::Rev8 + | AluOPRRI::Sextb + | AluOPRRI::Sexth + | AluOPRRI::Orcb + | AluOPRRI::Brev8 => 0b0010011, + + AluOPRRI::Addiw + | AluOPRRI::Slliw + | AluOPRRI::SrliW + | AluOPRRI::Sraiw + | AluOPRRI::Roriw + | AluOPRRI::SlliUw + | AluOPRRI::Clzw + | AluOPRRI::Cpopw + | AluOPRRI::Ctzw => 0b0011011, + AluOPRRI::Zexth => 0b0111011, + } + } +} + +impl Default for FRM { + fn default() -> Self { + Self::Fcsr + } +} + +/// float rounding mode. +impl FRM { + pub(crate) fn to_static_str(self) -> &'static str { + match self { + FRM::RNE => "rne", + FRM::RTZ => "rtz", + FRM::RDN => "rdn", + FRM::RUP => "rup", + FRM::RMM => "rmm", + FRM::Fcsr => "fcsr", + } + } + + #[inline] + pub(crate) fn bits(self) -> u8 { + match self { + FRM::RNE => 0b000, + FRM::RTZ => 0b001, + FRM::RDN => 0b010, + FRM::RUP => 0b011, + FRM::RMM => 0b100, + FRM::Fcsr => 0b111, + } + } + pub(crate) fn as_u32(self) -> u32 { + self.bits() as u32 + } +} + +impl FFlagsException { + #[inline] + pub(crate) fn mask(self) -> u32 { + match self { + FFlagsException::NV => 1 << 4, + FFlagsException::DZ => 1 << 3, + FFlagsException::OF => 1 << 2, + FFlagsException::UF => 1 << 1, + FFlagsException::NX => 1 << 0, + } + } +} + +impl LoadOP { + pub(crate) fn op_name(self) -> &'static str { + match self { + Self::Lb => "lb", + Self::Lh => "lh", + Self::Lw => "lw", + Self::Lbu => "lbu", + Self::Lhu => "lhu", + Self::Lwu => "lwu", + Self::Ld => "ld", + Self::Flw => "flw", + Self::Fld => "fld", + } + } + + pub(crate) fn from_type(t: Type) -> Self { + if t.is_float() { + return if t == F32 { Self::Flw } else { Self::Fld }; + } + match t { + R32 => Self::Lwu, + R64 | I64 => Self::Ld, + + I8 => Self::Lb, + I16 => Self::Lh, + I32 => Self::Lw, + _ => unreachable!(), + } + } + + pub(crate) fn op_code(self) -> u32 { + match self { + Self::Lb | Self::Lh | Self::Lw | Self::Lbu | Self::Lhu | Self::Lwu | Self::Ld => { + 0b0000011 + } + Self::Flw | Self::Fld => 0b0000111, + } + } + pub(crate) fn funct3(self) -> u32 { + match self { + Self::Lb => 0b000, + Self::Lh => 0b001, + Self::Lw => 0b010, + Self::Lwu => 0b110, + Self::Lbu => 0b100, + Self::Lhu => 0b101, + Self::Ld => 0b011, + Self::Flw => 0b010, + Self::Fld => 0b011, + } + } +} + +impl StoreOP { + pub(crate) fn op_name(self) -> &'static str { + match self { + Self::Sb => "sb", + Self::Sh => "sh", + Self::Sw => "sw", + Self::Sd => "sd", + Self::Fsw => "fsw", + Self::Fsd => "fsd", + } + } + pub(crate) fn from_type(t: Type) -> Self { + if t.is_float() { + return if t == F32 { Self::Fsw } else { Self::Fsd }; + } + match t.bits() { + 1 | 8 => Self::Sb, + 16 => Self::Sh, + 32 => Self::Sw, + 64 => Self::Sd, + _ => unreachable!(), + } + } + pub(crate) fn op_code(self) -> u32 { + match self { + Self::Sb | Self::Sh | Self::Sw | Self::Sd => 0b0100011, + Self::Fsw | Self::Fsd => 0b0100111, + } + } + pub(crate) fn funct3(self) -> u32 { + match self { + Self::Sb => 0b000, + Self::Sh => 0b001, + Self::Sw => 0b010, + Self::Sd => 0b011, + Self::Fsw => 0b010, + Self::Fsd => 0b011, + } + } +} + +impl FClassResult { + pub(crate) const fn bit(self) -> u32 { + match self { + FClassResult::NegInfinite => 1 << 0, + FClassResult::NegNormal => 1 << 1, + FClassResult::NegSubNormal => 1 << 2, + FClassResult::NegZero => 1 << 3, + FClassResult::PosZero => 1 << 4, + FClassResult::PosSubNormal => 1 << 5, + FClassResult::PosNormal => 1 << 6, + FClassResult::PosInfinite => 1 << 7, + FClassResult::SNaN => 1 << 8, + FClassResult::QNaN => 1 << 9, + } + } + + #[inline] + pub(crate) const fn is_nan_bits() -> u32 { + Self::SNaN.bit() | Self::QNaN.bit() + } + #[inline] + pub(crate) fn is_zero_bits() -> u32 { + Self::NegZero.bit() | Self::PosZero.bit() + } + + #[inline] + pub(crate) fn is_infinite_bits() -> u32 { + Self::PosInfinite.bit() | Self::NegInfinite.bit() + } +} + +impl AtomicOP { + #[inline] + pub(crate) fn is_load(self) -> bool { + match self { + Self::LrW | Self::LrD => true, + _ => false, + } + } + + #[inline] + pub(crate) fn op_name(self, amo: AMO) -> String { + let s = match self { + Self::LrW => "lr.w", + Self::ScW => "sc.w", + + Self::AmoswapW => "amoswap.w", + Self::AmoaddW => "amoadd.w", + Self::AmoxorW => "amoxor.w", + Self::AmoandW => "amoand.w", + Self::AmoorW => "amoor.w", + Self::AmominW => "amomin.w", + Self::AmomaxW => "amomax.w", + Self::AmominuW => "amominu.w", + Self::AmomaxuW => "amomaxu.w", + Self::LrD => "lr.d", + Self::ScD => "sc.d", + Self::AmoswapD => "amoswap.d", + Self::AmoaddD => "amoadd.d", + Self::AmoxorD => "amoxor.d", + Self::AmoandD => "amoand.d", + Self::AmoorD => "amoor.d", + Self::AmominD => "amomin.d", + Self::AmomaxD => "amomax.d", + Self::AmominuD => "amominu.d", + Self::AmomaxuD => "amomaxu.d", + }; + format!("{}{}", s, amo.to_static_str()) + } + #[inline] + pub(crate) fn op_code(self) -> u32 { + 0b0101111 + } + + #[inline] + pub(crate) fn funct7(self, amo: AMO) -> u32 { + self.funct5() << 2 | amo.as_u32() & 0b11 + } + + pub(crate) fn funct3(self) -> u32 { + match self { + AtomicOP::LrW + | AtomicOP::ScW + | AtomicOP::AmoswapW + | AtomicOP::AmoaddW + | AtomicOP::AmoxorW + | AtomicOP::AmoandW + | AtomicOP::AmoorW + | AtomicOP::AmominW + | AtomicOP::AmomaxW + | AtomicOP::AmominuW + | AtomicOP::AmomaxuW => 0b010, + AtomicOP::LrD + | AtomicOP::ScD + | AtomicOP::AmoswapD + | AtomicOP::AmoaddD + | AtomicOP::AmoxorD + | AtomicOP::AmoandD + | AtomicOP::AmoorD + | AtomicOP::AmominD + | AtomicOP::AmomaxD + | AtomicOP::AmominuD + | AtomicOP::AmomaxuD => 0b011, + } + } + pub(crate) fn funct5(self) -> u32 { + match self { + AtomicOP::LrW => 0b00010, + AtomicOP::ScW => 0b00011, + AtomicOP::AmoswapW => 0b00001, + AtomicOP::AmoaddW => 0b00000, + AtomicOP::AmoxorW => 0b00100, + AtomicOP::AmoandW => 0b01100, + AtomicOP::AmoorW => 0b01000, + AtomicOP::AmominW => 0b10000, + AtomicOP::AmomaxW => 0b10100, + AtomicOP::AmominuW => 0b11000, + AtomicOP::AmomaxuW => 0b11100, + AtomicOP::LrD => 0b00010, + AtomicOP::ScD => 0b00011, + AtomicOP::AmoswapD => 0b00001, + AtomicOP::AmoaddD => 0b00000, + AtomicOP::AmoxorD => 0b00100, + AtomicOP::AmoandD => 0b01100, + AtomicOP::AmoorD => 0b01000, + AtomicOP::AmominD => 0b10000, + AtomicOP::AmomaxD => 0b10100, + AtomicOP::AmominuD => 0b11000, + AtomicOP::AmomaxuD => 0b11100, + } + } + + pub(crate) fn load_op(t: Type) -> Self { + if t.bits() <= 32 { + Self::LrW + } else { + Self::LrD + } + } + pub(crate) fn store_op(t: Type) -> Self { + if t.bits() <= 32 { + Self::ScW + } else { + Self::ScD + } + } + + /// extract + pub(crate) fn extract(rd: WritableReg, offset: Reg, rs: Reg, ty: Type) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Srl, + rd: rd, + rs1: rs, + rs2: offset, + }); + // + insts.push(Inst::Extend { + rd: rd, + rn: rd.to_reg(), + signed: false, + from_bits: ty.bits() as u8, + to_bits: 64, + }); + insts + } + + /// like extract but sign extend the value. + /// suitable for smax,etc. + pub(crate) fn extract_sext( + rd: WritableReg, + offset: Reg, + rs: Reg, + ty: Type, + ) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Srl, + rd: rd, + rs1: rs, + rs2: offset, + }); + // + insts.push(Inst::Extend { + rd: rd, + rn: rd.to_reg(), + signed: true, + from_bits: ty.bits() as u8, + to_bits: 64, + }); + insts + } + + pub(crate) fn unset( + rd: WritableReg, + tmp: WritableReg, + offset: Reg, + ty: Type, + ) -> SmallInstVec { + assert!(rd != tmp); + let mut insts = SmallInstVec::new(); + insts.extend(Inst::load_int_mask(tmp, ty)); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Sll, + rd: tmp, + rs1: tmp.to_reg(), + rs2: offset, + }); + insts.push(Inst::construct_bit_not(tmp, tmp.to_reg())); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: rd, + rs1: rd.to_reg(), + rs2: tmp.to_reg(), + }); + insts + } + + pub(crate) fn set( + rd: WritableReg, + tmp: WritableReg, + offset: Reg, + rs: Reg, + ty: Type, + ) -> SmallInstVec { + assert!(rd != tmp); + let mut insts = SmallInstVec::new(); + // make rs into tmp. + insts.push(Inst::Extend { + rd: tmp, + rn: rs, + signed: false, + from_bits: ty.bits() as u8, + to_bits: 64, + }); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Sll, + rd: tmp, + rs1: tmp.to_reg(), + rs2: offset, + }); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Or, + rd: rd, + rs1: rd.to_reg(), + rs2: tmp.to_reg(), + }); + insts + } + + /// Merge reset part of rs into rd. + /// Call this function must make sure that other part of value is already in rd. + pub(crate) fn merge( + rd: WritableReg, + tmp: WritableReg, + offset: Reg, + rs: Reg, + ty: Type, + ) -> SmallInstVec { + let mut insts = Self::unset(rd, tmp, offset, ty); + insts.extend(Self::set(rd, tmp, offset, rs, ty)); + insts + } +} + +impl IntSelectOP { + #[inline] + pub(crate) fn from_ir_op(op: crate::ir::Opcode) -> Self { + match op { + crate::ir::Opcode::Smax => Self::Smax, + crate::ir::Opcode::Umax => Self::Umax, + crate::ir::Opcode::Smin => Self::Smin, + crate::ir::Opcode::Umin => Self::Umin, + _ => unreachable!(), + } + } + #[inline] + pub(crate) fn op_name(self) -> &'static str { + match self { + IntSelectOP::Smax => "smax", + IntSelectOP::Umax => "umax", + IntSelectOP::Smin => "smin", + IntSelectOP::Umin => "umin", + } + } + #[inline] + pub(crate) fn to_int_cc(self) -> IntCC { + match self { + IntSelectOP::Smax => IntCC::SignedGreaterThan, + IntSelectOP::Umax => IntCC::UnsignedGreaterThan, + IntSelectOP::Smin => IntCC::SignedLessThan, + IntSelectOP::Umin => IntCC::UnsignedLessThan, + } + } +} + +///Atomic Memory ordering. +#[derive(Copy, Clone, Debug)] +pub enum AMO { + Relax = 0b00, + Release = 0b01, + Aquire = 0b10, + SeqCst = 0b11, +} + +impl AMO { + pub(crate) fn to_static_str(self) -> &'static str { + match self { + AMO::Relax => "", + AMO::Release => ".rl", + AMO::Aquire => ".aq", + AMO::SeqCst => ".aqrl", + } + } + pub(crate) fn as_u32(self) -> u32 { + self as u32 + } +} + +impl Inst { + /// fence request bits. + pub(crate) const FENCE_REQ_I: u8 = 1 << 3; + pub(crate) const FENCE_REQ_O: u8 = 1 << 2; + pub(crate) const FENCE_REQ_R: u8 = 1 << 1; + pub(crate) const FENCE_REQ_W: u8 = 1 << 0; + pub(crate) fn fence_req_to_string(x: u8) -> String { + let mut s = String::default(); + if x & Self::FENCE_REQ_I != 0 { + s.push_str("i"); + } + if x & Self::FENCE_REQ_O != 0 { + s.push_str("o"); + } + if x & Self::FENCE_REQ_R != 0 { + s.push_str("r"); + } + if x & Self::FENCE_REQ_W != 0 { + s.push_str("w"); + } + s + } +} + +impl FloatRoundOP { + pub(crate) fn op_name(self) -> &'static str { + match self { + FloatRoundOP::Nearest => "nearest", + FloatRoundOP::Ceil => "ceil", + FloatRoundOP::Floor => "floor", + FloatRoundOP::Trunc => "trunc", + } + } + + pub(crate) fn to_frm(self) -> FRM { + match self { + FloatRoundOP::Nearest => FRM::RNE, + FloatRoundOP::Ceil => FRM::RUP, + FloatRoundOP::Floor => FRM::RDN, + FloatRoundOP::Trunc => FRM::RTZ, + } + } +} + +impl FloatSelectOP { + pub(crate) fn op_name(self) -> &'static str { + match self { + FloatSelectOP::Max => "max", + FloatSelectOP::Min => "min", + } + } + + pub(crate) fn to_fpuoprrr(self, ty: Type) -> FpuOPRRR { + match self { + FloatSelectOP::Max => { + if ty == F32 { + FpuOPRRR::FmaxS + } else { + FpuOPRRR::FmaxD + } + } + FloatSelectOP::Min => { + if ty == F32 { + FpuOPRRR::FminS + } else { + FpuOPRRR::FminD + } + } + } + } + // move qnan bits into int register. + pub(crate) fn snan_bits(self, rd: Writable, ty: Type) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + insts.push(Inst::load_imm12(rd, Imm12::from_bits(-1))); + let x = if ty == F32 { 22 } else { 51 }; + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: rd, + rs: rd.to_reg(), + imm12: Imm12::from_bits(x), + }); + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: rd, + rs: rd.to_reg(), + imm12: Imm12::from_bits(x), + }); + insts + } +} + +pub(crate) fn f32_bits(f: f32) -> u32 { + u32::from_le_bytes(f.to_le_bytes()) +} +pub(crate) fn f64_bits(f: f64) -> u64 { + u64::from_le_bytes(f.to_le_bytes()) +} + +/// +pub(crate) fn f32_cvt_to_int_bounds(signed: bool, out_bits: u8) -> (f32, f32) { + match (signed, out_bits) { + (true, 8) => (i8::min_value() as f32 - 1., i8::max_value() as f32 + 1.), + (true, 16) => (i16::min_value() as f32 - 1., i16::max_value() as f32 + 1.), + (true, 32) => (-2147483904.0, 2147483648.0), + (true, 64) => (-9223373136366403584.0, 9223372036854775808.0), + (false, 8) => (-1., u8::max_value() as f32 + 1.), + (false, 16) => (-1., u16::max_value() as f32 + 1.), + (false, 32) => (-1., 4294967296.0), + (false, 64) => (-1., 18446744073709551616.0), + _ => unreachable!(), + } +} + +pub(crate) fn f64_cvt_to_int_bounds(signed: bool, out_bits: u8) -> (f64, f64) { + match (signed, out_bits) { + (true, 8) => (i8::min_value() as f64 - 1., i8::max_value() as f64 + 1.), + (true, 16) => (i16::min_value() as f64 - 1., i16::max_value() as f64 + 1.), + (true, 32) => (-2147483649.0, 2147483648.0), + (true, 64) => (-9223372036854777856.0, 9223372036854775808.0), + (false, 8) => (-1., u8::max_value() as f64 + 1.), + (false, 16) => (-1., u16::max_value() as f64 + 1.), + (false, 32) => (-1., 4294967296.0), + (false, 64) => (-1., 18446744073709551616.0), + _ => unreachable!(), + } +} diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs new file mode 100644 index 000000000000..6e2b3f875465 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -0,0 +1,3226 @@ +//! Riscv64 ISA: binary code emission. + +use crate::binemit::StackMap; +use crate::ir::{self, RelSourceLoc, TrapCode}; +use crate::isa::zkasm::inst::*; +use crate::machinst::{AllocationConsumer, Reg, Writable}; +use crate::trace; +use cranelift_control::ControlPlane; +use regalloc2::Allocation; + +pub struct EmitInfo { + shared_flag: settings::Flags, + isa_flags: super::super::riscv_settings::Flags, +} + +impl EmitInfo { + pub(crate) fn new( + shared_flag: settings::Flags, + isa_flags: super::super::riscv_settings::Flags, + ) -> Self { + Self { + shared_flag, + isa_flags, + } + } +} + +/// load constant by put the constant in the code stream. +/// calculate the pc and using load instruction. +/// This is only allow used in the emit stage. +/// Because of those instruction must execute together. +/// see https://github.com/bytecodealliance/wasmtime/pull/5612 +#[derive(Clone, Copy)] +pub(crate) enum LoadConstant { + U32(u32), + U64(u64), +} + +impl LoadConstant { + fn to_le_bytes(self) -> Vec { + match self { + LoadConstant::U32(x) => Vec::from_iter(x.to_le_bytes().into_iter()), + LoadConstant::U64(x) => Vec::from_iter(x.to_le_bytes().into_iter()), + } + } + fn load_op(self) -> LoadOP { + match self { + LoadConstant::U32(_) => LoadOP::Lwu, + LoadConstant::U64(_) => LoadOP::Ld, + } + } + fn load_ty(self) -> Type { + match self { + LoadConstant::U32(_) => R32, + LoadConstant::U64(_) => R64, + } + } + + pub(crate) fn load_constant Writable>( + self, + rd: Writable, + alloc_tmp: &mut F, + ) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + // get current pc. + let pc = alloc_tmp(I64); + insts.push(Inst::Auipc { + rd: pc, + imm: Imm20 { bits: 0 }, + }); + // load + insts.push(Inst::Load { + rd, + op: self.load_op(), + flags: MemFlags::new(), + from: AMode::RegOffset(pc.to_reg(), 12, self.load_ty()), + }); + let data = self.to_le_bytes(); + // jump over. + insts.push(Inst::Jal { + dest: BranchTarget::ResolvedOffset(Inst::INSTRUCTION_SIZE + data.len() as i32), + }); + insts.push(Inst::RawData { data }); + insts + } + + // load and perform an extra add. + pub(crate) fn load_constant_and_add(self, rd: Writable, rs: Reg) -> SmallInstVec { + let mut insts = self.load_constant(rd, &mut |_| rd); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd, + rs1: rd.to_reg(), + rs2: rs, + }); + insts + } +} + +pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 { + u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap() +} + +#[derive(Clone, Debug, PartialEq, Default)] +pub enum EmitVState { + #[default] + Unknown, + Known(VState), +} + +/// State carried between emissions of a sequence of instructions. +#[derive(Default, Clone, Debug)] +pub struct EmitState { + pub(crate) virtual_sp_offset: i64, + pub(crate) nominal_sp_to_fp: i64, + /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. + stack_map: Option, + /// Current source-code location corresponding to instruction to be emitted. + cur_srcloc: RelSourceLoc, + /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and + /// optimized away at compiletime. See [cranelift_control]. + ctrl_plane: ControlPlane, + /// Vector State + /// Controls the current state of the vector unit at the emission point. + vstate: EmitVState, +} + +impl EmitState { + fn take_stack_map(&mut self) -> Option { + self.stack_map.take() + } + + fn clear_post_insn(&mut self) { + self.stack_map = None; + } + + fn cur_srcloc(&self) -> RelSourceLoc { + self.cur_srcloc + } +} + +impl MachInstEmitState for EmitState { + fn new( + abi: &Callee, + ctrl_plane: ControlPlane, + ) -> Self { + EmitState { + virtual_sp_offset: 0, + nominal_sp_to_fp: abi.frame_size() as i64, + stack_map: None, + cur_srcloc: RelSourceLoc::default(), + ctrl_plane, + vstate: EmitVState::Unknown, + } + } + + fn pre_safepoint(&mut self, stack_map: StackMap) { + self.stack_map = Some(stack_map); + } + + fn pre_sourceloc(&mut self, srcloc: RelSourceLoc) { + self.cur_srcloc = srcloc; + } + + fn ctrl_plane_mut(&mut self) -> &mut ControlPlane { + &mut self.ctrl_plane + } + + fn take_ctrl_plane(self) -> ControlPlane { + self.ctrl_plane + } + + fn on_new_block(&mut self) { + // Reset the vector state. + self.vstate = EmitVState::Unknown; + } +} + +impl Inst { + /// construct a "imm - rs". + pub(crate) fn construct_imm_sub_rs(rd: Writable, imm: u64, rs: Reg) -> SmallInstVec { + let mut insts = Inst::load_constant_u64(rd, imm, &mut |_| rd); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Sub, + rd, + rs1: rd.to_reg(), + rs2: rs, + }); + insts + } + + /// Load int mask. + /// If ty is int then 0xff in rd. + pub(crate) fn load_int_mask(rd: Writable, ty: Type) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + assert!(ty.is_int() && ty.bits() <= 64); + match ty { + I64 => { + insts.push(Inst::load_imm12(rd, Imm12::from_bits(-1))); + } + I32 | I16 => { + insts.push(Inst::load_imm12(rd, Imm12::from_bits(-1))); + insts.push(Inst::Extend { + rd: rd, + rn: rd.to_reg(), + signed: false, + from_bits: ty.bits() as u8, + to_bits: 64, + }); + } + I8 => { + insts.push(Inst::load_imm12(rd, Imm12::from_bits(255))); + } + _ => unreachable!("ty:{:?}", ty), + } + insts + } + /// inverse all bit + pub(crate) fn construct_bit_not(rd: Writable, rs: Reg) -> Inst { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Xori, + rd, + rs, + imm12: Imm12::from_bits(-1), + } + } + + // emit a float is not a nan. + pub(crate) fn emit_not_nan(rd: Writable, rs: Reg, ty: Type) -> Inst { + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FeqS + } else { + FpuOPRRR::FeqD + }, + frm: None, + rd: rd, + rs1: rs, + rs2: rs, + } + } + + pub(crate) fn emit_fabs(rd: Writable, rs: Reg, ty: Type) -> Inst { + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FsgnjxS + } else { + FpuOPRRR::FsgnjxD + }, + frm: None, + rd: rd, + rs1: rs, + rs2: rs, + } + } + /// If a float is zero. + pub(crate) fn emit_if_float_not_zero( + tmp: Writable, + rs: Reg, + ty: Type, + taken: BranchTarget, + not_taken: BranchTarget, + ) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + let class_op = if ty == F32 { + FpuOPRR::FclassS + } else { + FpuOPRR::FclassD + }; + insts.push(Inst::FpuRR { + alu_op: class_op, + frm: None, + rd: tmp, + rs: rs, + }); + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(FClassResult::is_zero_bits() as i16), + }); + insts.push(Inst::CondBr { + taken, + not_taken, + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + }); + insts + } + pub(crate) fn emit_fneg(rd: Writable, rs: Reg, ty: Type) -> Inst { + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FsgnjnS + } else { + FpuOPRRR::FsgnjnD + }, + frm: None, + rd: rd, + rs1: rs, + rs2: rs, + } + } + + pub(crate) fn lower_br_icmp( + cc: IntCC, + a: ValueRegs, + b: ValueRegs, + taken: BranchTarget, + not_taken: BranchTarget, + ty: Type, + ) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + if ty.bits() <= 64 { + let rs1 = a.only_reg().unwrap(); + let rs2 = b.only_reg().unwrap(); + let inst = Inst::CondBr { + taken, + not_taken, + kind: IntegerCompare { kind: cc, rs1, rs2 }, + }; + insts.push(inst); + return insts; + } + // compare i128 + let low = |cc: IntCC| -> IntegerCompare { + IntegerCompare { + rs1: a.regs()[0], + rs2: b.regs()[0], + kind: cc, + } + }; + let high = |cc: IntCC| -> IntegerCompare { + IntegerCompare { + rs1: a.regs()[1], + rs2: b.regs()[1], + kind: cc, + } + }; + match cc { + IntCC::Equal => { + // if high part not equal, + // then we can go to not_taken otherwise fallthrough. + insts.push(Inst::CondBr { + taken: not_taken, + not_taken: BranchTarget::zero(), + kind: high(IntCC::NotEqual), + }); + // the rest part. + insts.push(Inst::CondBr { + taken, + not_taken, + kind: low(IntCC::Equal), + }); + } + + IntCC::NotEqual => { + // if the high part not equal , + // we know the whole must be not equal, + // we can goto the taken part , otherwise fallthrought. + insts.push(Inst::CondBr { + taken, + not_taken: BranchTarget::zero(), // no branch + kind: high(IntCC::NotEqual), + }); + + insts.push(Inst::CondBr { + taken, + not_taken, + kind: low(IntCC::NotEqual), + }); + } + IntCC::SignedGreaterThanOrEqual + | IntCC::SignedLessThanOrEqual + | IntCC::UnsignedGreaterThanOrEqual + | IntCC::UnsignedLessThanOrEqual + | IntCC::SignedGreaterThan + | IntCC::SignedLessThan + | IntCC::UnsignedLessThan + | IntCC::UnsignedGreaterThan => { + // + insts.push(Inst::CondBr { + taken, + not_taken: BranchTarget::zero(), + kind: high(cc.without_equal()), + }); + // + insts.push(Inst::CondBr { + taken: not_taken, + not_taken: BranchTarget::zero(), + kind: high(IntCC::NotEqual), + }); + insts.push(Inst::CondBr { + taken, + not_taken, + kind: low(cc.unsigned()), + }); + } + } + insts + } + + /// Returns Some(VState) if this insturction is expecting a specific vector state + /// before emission. + fn expected_vstate(&self) -> Option<&VState> { + match self { + Inst::Nop0 + | Inst::Nop4 + | Inst::BrTable { .. } + | Inst::Auipc { .. } + | Inst::Lui { .. } + | Inst::LoadConst32 { .. } + | Inst::LoadConst64 { .. } + | Inst::AluRRR { .. } + | Inst::FpuRRR { .. } + | Inst::AluRRImm12 { .. } + | Inst::Load { .. } + | Inst::Store { .. } + | Inst::Args { .. } + | Inst::Ret { .. } + | Inst::Extend { .. } + | Inst::AdjustSp { .. } + | Inst::Call { .. } + | Inst::CallInd { .. } + | Inst::ReturnCall { .. } + | Inst::ReturnCallInd { .. } + | Inst::TrapIf { .. } + | Inst::Jal { .. } + | Inst::CondBr { .. } + | Inst::LoadExtName { .. } + | Inst::LoadAddr { .. } + | Inst::VirtualSPOffsetAdj { .. } + | Inst::Mov { .. } + | Inst::MovFromPReg { .. } + | Inst::Fence { .. } + | Inst::FenceI + | Inst::ECall + | Inst::EBreak + | Inst::Udf { .. } + | Inst::FpuRR { .. } + | Inst::FpuRRRR { .. } + | Inst::Jalr { .. } + | Inst::Atomic { .. } + | Inst::Select { .. } + | Inst::AtomicCas { .. } + | Inst::IntSelect { .. } + | Inst::Icmp { .. } + | Inst::SelectReg { .. } + | Inst::FcvtToInt { .. } + | Inst::RawData { .. } + | Inst::AtomicStore { .. } + | Inst::AtomicLoad { .. } + | Inst::AtomicRmwLoop { .. } + | Inst::TrapIfC { .. } + | Inst::Unwind { .. } + | Inst::DummyUse { .. } + | Inst::FloatRound { .. } + | Inst::FloatSelect { .. } + | Inst::Popcnt { .. } + | Inst::Rev8 { .. } + | Inst::Cltz { .. } + | Inst::Brev8 { .. } + | Inst::StackProbeLoop { .. } => None, + + // VecSetState does not expect any vstate, rather it updates it. + Inst::VecSetState { .. } => None, + + // `vmv` instructions copy a set of registers and ignore vstate. + Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None, + + Inst::VecAluRR { vstate, .. } | + Inst::VecAluRRR { vstate, .. } | + Inst::VecAluRRRR { vstate, .. } | + Inst::VecAluRImm5 { vstate, .. } | + Inst::VecAluRRImm5 { vstate, .. } | + Inst::VecAluRRRImm5 { vstate, .. } | + // TODO: Unit-stride loads and stores only need the AVL to be correct, not + // the full vtype. A future optimization could be to decouple these two when + // updating vstate. This would allow us to avoid emitting a VecSetState in + // some cases. + Inst::VecLoad { vstate, .. } + | Inst::VecStore { vstate, .. } => Some(vstate), + } + } +} + +impl MachInstEmit for Inst { + type State = EmitState; + type Info = EmitInfo; + + fn emit( + &self, + allocs: &[Allocation], + sink: &mut MachBuffer, + emit_info: &Self::Info, + state: &mut EmitState, + ) { + let mut allocs = AllocationConsumer::new(allocs); + + // Check if we need to update the vector state before emitting this instruction + if let Some(expected) = self.expected_vstate() { + if state.vstate != EmitVState::Known(expected.clone()) { + // Update the vector state. + Inst::VecSetState { + rd: writable_zero_reg(), + vstate: expected.clone(), + } + .emit(&[], sink, emit_info, state); + } + } + + // N.B.: we *must* not exceed the "worst-case size" used to compute + // where to insert islands, except when islands are explicitly triggered + // (with an `EmitIsland`). We check this in debug builds. This is `mut` + // to allow disabling the check for `JTSequence`, which is always + // emitted following an `EmitIsland`. + let mut start_off = sink.cur_offset(); + match self { + &Inst::Nop0 => { + // do nothing + } + // Addi x0, x0, 0 + &Inst::Nop4 => { + let x = Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: Writable::from_reg(zero_reg()), + rs: zero_reg(), + imm12: Imm12::zero(), + }; + x.emit(&[], sink, emit_info, state) + } + &Inst::RawData { ref data } => { + // Right now we only put a u32 or u64 in this instruction. + // It is not very long, no need to check if need `emit_island`. + // If data is very long , this is a bug because RawData is typecial + // use to load some data and rely on some positon in the code stream. + // and we may exceed `Inst::worst_case_size`. + // for more information see https://github.com/bytecodealliance/wasmtime/pull/5612. + sink.put_data(&data[..]); + } + &Inst::Lui { rd, ref imm } => { + let rd = allocs.next_writable(rd); + let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.as_u32() << 12); + sink.put4(x); + } + &Inst::LoadConst32 { rd, imm } => { + let rd = allocs.next_writable(rd); + LoadConstant::U32(imm) + .load_constant(rd, &mut |_| rd) + .into_iter() + .for_each(|inst| inst.emit(&[], sink, emit_info, state)); + } + &Inst::LoadConst64 { rd, imm } => { + let rd = allocs.next_writable(rd); + LoadConstant::U64(imm) + .load_constant(rd, &mut |_| rd) + .into_iter() + .for_each(|inst| inst.emit(&[], sink, emit_info, state)); + } + &Inst::FpuRR { + frm, + alu_op, + rd, + rs, + } => { + let rs = allocs.next(rs); + let rd = allocs.next_writable(rd); + let x = alu_op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | alu_op.funct3(frm) << 12 + | reg_to_gpr_num(rs) << 15 + | alu_op.rs2_funct5() << 20 + | alu_op.funct7() << 25; + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && alu_op.is_convert_to_int() { + sink.add_trap(TrapCode::BadConversionToInteger); + } + sink.put4(x); + } + &Inst::FpuRRRR { + alu_op, + rd, + rs1, + rs2, + rs3, + frm, + } => { + let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let rs3 = allocs.next(rs3); + let rd = allocs.next_writable(rd); + let x = alu_op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | alu_op.funct3(frm) << 12 + | reg_to_gpr_num(rs1) << 15 + | reg_to_gpr_num(rs2) << 20 + | alu_op.funct2() << 25 + | reg_to_gpr_num(rs3) << 27; + + sink.put4(x); + } + &Inst::FpuRRR { + alu_op, + frm, + rd, + rs1, + rs2, + } => { + let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let rd = allocs.next_writable(rd); + + let x: u32 = alu_op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | (alu_op.funct3(frm)) << 12 + | reg_to_gpr_num(rs1) << 15 + | reg_to_gpr_num(rs2) << 20 + | alu_op.funct7() << 25; + sink.put4(x); + } + &Inst::Unwind { ref inst } => { + sink.add_unwind(inst.clone()); + } + &Inst::DummyUse { reg } => { + allocs.next(reg); + } + &Inst::AluRRR { + alu_op, + rd, + rs1, + rs2, + } => { + let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let rd = allocs.next_writable(rd); + let (rs1, rs2) = if alu_op.reverse_rs() { + (rs2, rs1) + } else { + (rs1, rs2) + }; + + sink.put4(encode_r_type( + alu_op.op_code(), + rd, + alu_op.funct3(), + rs1, + rs2, + alu_op.funct7(), + )); + } + &Inst::AluRRImm12 { + alu_op, + rd, + rs, + imm12, + } => { + let rs = allocs.next(rs); + let rd = allocs.next_writable(rd); + let x = alu_op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | alu_op.funct3() << 12 + | reg_to_gpr_num(rs) << 15 + | alu_op.imm12(imm12) << 20; + sink.put4(x); + } + &Inst::Load { + rd, + op, + from, + flags, + } => { + let from = from.clone().with_allocs(&mut allocs); + let rd = allocs.next_writable(rd); + + let base = from.get_base_register(); + let offset = from.get_offset_with_state(state); + let offset_imm12 = Imm12::maybe_from_u64(offset as u64); + + let (addr, imm12) = match (base, offset_imm12) { + // If the offset fits into an imm12 we can directly encode it. + (Some(base), Some(imm12)) => (base, imm12), + // Otherwise load the address it into a reg and load from it. + _ => { + let tmp = writable_spilltmp_reg(); + Inst::LoadAddr { rd: tmp, mem: from }.emit(&[], sink, emit_info, state); + (tmp.to_reg(), Imm12::zero()) + } + }; + + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); + } + + sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12)); + } + &Inst::Store { op, src, flags, to } => { + let to = to.clone().with_allocs(&mut allocs); + let src = allocs.next(src); + + let base = to.get_base_register(); + let offset = to.get_offset_with_state(state); + let offset_imm12 = Imm12::maybe_from_u64(offset as u64); + + let (addr, imm12) = match (base, offset_imm12) { + // If the offset fits into an imm12 we can directly encode it. + (Some(base), Some(imm12)) => (base, imm12), + // Otherwise load the address it into a reg and load from it. + _ => { + let tmp = writable_spilltmp_reg(); + Inst::LoadAddr { rd: tmp, mem: to }.emit(&[], sink, emit_info, state); + (tmp.to_reg(), Imm12::zero()) + } + }; + + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); + } + + sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12)); + } + &Inst::Args { .. } => { + // Nothing: this is a pseudoinstruction that serves + // only to constrain registers at a certain point. + } + &Inst::Ret { + stack_bytes_to_pop, .. + } => { + if stack_bytes_to_pop != 0 { + Inst::AdjustSp { + amount: i64::from(stack_bytes_to_pop), + } + .emit(&[], sink, emit_info, state); + } + //jalr x0, x1, 0 + let x: u32 = (0b1100111) | (1 << 15); + sink.put4(x); + } + + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits: _to_bits, + } => { + let rn = allocs.next(rn); + let rd = allocs.next_writable(rd); + let mut insts = SmallInstVec::new(); + let shift_bits = (64 - from_bits) as i16; + let is_u8 = || from_bits == 8 && signed == false; + if is_u8() { + // special for u8. + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd, + rs: rn, + imm12: Imm12::from_bits(255), + }); + } else { + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd, + rs: rn, + imm12: Imm12::from_bits(shift_bits), + }); + insts.push(Inst::AluRRImm12 { + alu_op: if signed { + AluOPRRI::Srai + } else { + AluOPRRI::Srli + }, + rd, + rs: rd.to_reg(), + imm12: Imm12::from_bits(shift_bits), + }); + } + insts + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + &Inst::AdjustSp { amount } => { + if let Some(imm) = Imm12::maybe_from_u64(amount as u64) { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: writable_stack_reg(), + rs: stack_reg(), + imm12: imm, + } + .emit(&[], sink, emit_info, state); + } else { + let tmp = writable_spilltmp_reg(); + let mut insts = Inst::load_constant_u64(tmp, amount as u64, &mut |_| tmp); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: writable_stack_reg(), + rs1: tmp.to_reg(), + rs2: stack_reg(), + }); + insts + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + } + &Inst::Call { ref info } => { + // call + match info.dest { + ExternalName::User { .. } => { + if info.opcode.is_call() { + sink.add_call_site(info.opcode); + } + sink.add_reloc(Reloc::RiscvCall, &info.dest, 0); + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(8), s); + } + Inst::construct_auipc_and_jalr( + Some(writable_link_reg()), + writable_link_reg(), + 0, + ) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + ExternalName::LibCall(..) + | ExternalName::TestCase { .. } + | ExternalName::KnownSymbol(..) => { + // use indirect call. it is more simple. + // load ext name. + Inst::LoadExtName { + rd: writable_spilltmp_reg2(), + name: Box::new(info.dest.clone()), + offset: 0, + } + .emit(&[], sink, emit_info, state); + + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); + } + if info.opcode.is_call() { + sink.add_call_site(info.opcode); + } + // call + Inst::Jalr { + rd: writable_link_reg(), + base: spilltmp_reg2(), + offset: Imm12::zero(), + } + .emit(&[], sink, emit_info, state); + } + } + + let callee_pop_size = i64::from(info.callee_pop_size); + state.virtual_sp_offset -= callee_pop_size; + trace!( + "call adjusts virtual sp offset by {callee_pop_size} -> {}", + state.virtual_sp_offset + ); + } + &Inst::CallInd { ref info } => { + let rn = allocs.next(info.rn); + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); + } + + if info.opcode.is_call() { + sink.add_call_site(info.opcode); + } + Inst::Jalr { + rd: writable_link_reg(), + base: rn, + offset: Imm12::zero(), + } + .emit(&[], sink, emit_info, state); + + let callee_pop_size = i64::from(info.callee_pop_size); + state.virtual_sp_offset -= callee_pop_size; + trace!( + "call adjusts virtual sp offset by {callee_pop_size} -> {}", + state.virtual_sp_offset + ); + } + + &Inst::ReturnCall { + ref callee, + ref info, + } => { + emit_return_call_common_sequence( + &mut allocs, + sink, + emit_info, + state, + info.new_stack_arg_size, + info.old_stack_arg_size, + &info.uses, + ); + + sink.add_call_site(ir::Opcode::ReturnCall); + sink.add_reloc(Reloc::RiscvCall, &callee, 0); + Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + // `emit_return_call_common_sequence` emits an island if + // necessary, so we can safely disable the worst-case-size check + // in this case. + start_off = sink.cur_offset(); + } + + &Inst::ReturnCallInd { callee, ref info } => { + let callee = allocs.next(callee); + + emit_return_call_common_sequence( + &mut allocs, + sink, + emit_info, + state, + info.new_stack_arg_size, + info.old_stack_arg_size, + &info.uses, + ); + + Inst::Jalr { + rd: writable_zero_reg(), + base: callee, + offset: Imm12::zero(), + } + .emit(&[], sink, emit_info, state); + + // `emit_return_call_common_sequence` emits an island if + // necessary, so we can safely disable the worst-case-size check + // in this case. + start_off = sink.cur_offset(); + } + + &Inst::Jal { dest } => { + let code: u32 = 0b1101111; + match dest { + BranchTarget::Label(lable) => { + sink.use_label_at_offset(start_off, lable, LabelUse::Jal20); + sink.add_uncond_branch(start_off, start_off + 4, lable); + sink.put4(code); + } + BranchTarget::ResolvedOffset(offset) => { + let offset = offset as i64; + if offset != 0 { + if LabelUse::Jal20.offset_in_range(offset) { + let mut code = code.to_le_bytes(); + LabelUse::Jal20.patch_raw_offset(&mut code, offset); + sink.put_data(&code[..]); + } else { + Inst::construct_auipc_and_jalr( + None, + writable_spilltmp_reg(), + offset, + ) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + } else { + // CondBr often generate Jal {dest : 0}, means otherwise no jump. + } + } + } + } + &Inst::CondBr { + taken, + not_taken, + mut kind, + } => { + kind.rs1 = allocs.next(kind.rs1); + kind.rs2 = allocs.next(kind.rs2); + match taken { + BranchTarget::Label(label) => { + let code = kind.emit(); + let code_inverse = kind.inverse().emit().to_le_bytes(); + sink.use_label_at_offset(start_off, label, LabelUse::B12); + sink.add_cond_branch(start_off, start_off + 4, label, &code_inverse); + sink.put4(code); + } + BranchTarget::ResolvedOffset(offset) => { + assert!(offset != 0); + if LabelUse::B12.offset_in_range(offset as i64) { + let code = kind.emit(); + let mut code = code.to_le_bytes(); + LabelUse::B12.patch_raw_offset(&mut code, offset as i64); + sink.put_data(&code[..]) + } else { + let mut code = kind.emit().to_le_bytes(); + // jump over the condbr , 4 bytes. + LabelUse::B12.patch_raw_offset(&mut code[..], 4); + sink.put_data(&code[..]); + Inst::construct_auipc_and_jalr( + None, + writable_spilltmp_reg(), + offset as i64, + ) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + } + } + Inst::Jal { dest: not_taken }.emit(&[], sink, emit_info, state); + } + + &Inst::Mov { rd, rm, ty } => { + debug_assert_eq!(rd.to_reg().class(), rm.class()); + if rd.to_reg() == rm { + return; + } + + let rm = allocs.next(rm); + let rd = allocs.next_writable(rd); + + match rm.class() { + RegClass::Int => Inst::AluRRImm12 { + alu_op: AluOPRRI::Ori, + rd: rd, + rs: rm, + imm12: Imm12::zero(), + }, + RegClass::Float => Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FsgnjS + } else { + FpuOPRRR::FsgnjD + }, + frm: None, + rd: rd, + rs1: rm, + rs2: rm, + }, + RegClass::Vector => Inst::VecAluRRImm5 { + op: VecAluOpRRImm5::VmvrV, + vd: rd, + vs2: rm, + // Imm 0 means copy 1 register. + imm: Imm5::maybe_from_i8(0).unwrap(), + mask: VecOpMasking::Disabled, + // Vstate for this instruction is ignored. + vstate: VState::from_type(ty), + }, + } + .emit(&[], sink, emit_info, state); + } + + &Inst::MovFromPReg { rd, rm } => { + debug_assert!([px_reg(2), px_reg(8)].contains(&rm)); + let rd = allocs.next_writable(rd); + let x = Inst::AluRRImm12 { + alu_op: AluOPRRI::Ori, + rd, + rs: Reg::from(rm), + imm12: Imm12::zero(), + }; + x.emit(&[], sink, emit_info, state); + } + + &Inst::BrTable { + index, + tmp1, + tmp2, + ref targets, + } => { + let index = allocs.next(index); + let tmp1 = allocs.next_writable(tmp1); + let tmp2 = allocs.next_writable(tmp2); + let ext_index = writable_spilltmp_reg(); + + // The default target is passed in as the 0th element of `targets` + // separate it here for clarity. + let default_target = targets[0]; + let targets = &targets[1..]; + + // We emit a bounds check on the index, if the index is larger than the number of + // jump table entries, we jump to the default block. Otherwise we compute a jump + // offset by multiplying the index by 8 (the size of each entry) and then jump to + // that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially. + // + // Build the following sequence: + // + // extend_index: + // zext.w ext_index, index + // bounds_check: + // li tmp, n_labels + // bltu ext_index, tmp, compute_target + // jump_to_default_block: + // auipc pc, 0 + // jalr zero, pc, default_block + // compute_target: + // auipc pc, 0 + // slli tmp, ext_index, 3 + // add pc, pc, tmp + // jalr zero, pc, 0x10 + // jump_table: + // ; This repeats for each entry in the jumptable + // auipc pc, 0 + // jalr zero, pc, block_target + + // Extend the index to 64 bits. + // + // This prevents us branching on the top 32 bits of the index, which + // are undefined. + Inst::Extend { + rd: ext_index, + rn: index, + signed: false, + from_bits: 32, + to_bits: 64, + } + .emit(&[], sink, emit_info, state); + + // Bounds check. + // + // Check if the index passed in is larger than the number of jumptable + // entries that we have. If it is, we fallthrough to a jump into the + // default block. + Inst::load_constant_u32(tmp2, targets.len() as u64, &mut |_| tmp2) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::CondBr { + taken: BranchTarget::offset(Inst::INSTRUCTION_SIZE * 3), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::UnsignedLessThan, + rs1: ext_index.to_reg(), + rs2: tmp2.to_reg(), + }, + } + .emit(&[], sink, emit_info, state); + sink.use_label_at_offset( + sink.cur_offset(), + default_target.as_label().unwrap(), + LabelUse::PCRel32, + ); + Inst::construct_auipc_and_jalr(None, tmp2, 0) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + // Compute the jump table offset. + // We need to emit a PC relative offset, + + // Get the current PC. + Inst::Auipc { + rd: tmp1, + imm: Imm20::from_bits(0), + } + .emit(&[], sink, emit_info, state); + + // Multiply the index by 8, since that is the size in + // bytes of each jump table entry + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp2, + rs: ext_index.to_reg(), + imm12: Imm12::from_bits(3), + } + .emit(&[], sink, emit_info, state); + + // Calculate the base of the jump, PC + the offset from above. + Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: tmp1, + rs1: tmp1.to_reg(), + rs2: tmp2.to_reg(), + } + .emit(&[], sink, emit_info, state); + + // Jump to the middle of the jump table. + // We add a 16 byte offset here, since we used 4 instructions + // since the AUIPC that was used to get the PC. + Inst::Jalr { + rd: writable_zero_reg(), + base: tmp1.to_reg(), + offset: Imm12::from_bits((4 * Inst::INSTRUCTION_SIZE) as i16), + } + .emit(&[], sink, emit_info, state); + + // Emit the jump table. + // + // Each entry is a aupc + jalr to the target block. We also start with a island + // if necessary. + + // Each entry in the jump table is 2 instructions, so 8 bytes. Check if + // we need to emit a jump table here to support that jump. + let distance = (targets.len() * 2 * Inst::INSTRUCTION_SIZE as usize) as u32; + if sink.island_needed(distance) { + sink.emit_island(&mut state.ctrl_plane); + } + + // Emit the jumps back to back + for target in targets.iter() { + sink.use_label_at_offset( + sink.cur_offset(), + target.as_label().unwrap(), + LabelUse::PCRel32, + ); + + Inst::construct_auipc_and_jalr(None, tmp2, 0) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + + // We've just emitted an island that is safe up to *here*. + // Mark it as such so that we don't needlessly emit additional islands. + start_off = sink.cur_offset(); + } + + &Inst::VirtualSPOffsetAdj { amount } => { + crate::trace!( + "virtual sp offset adjusted by {} -> {}", + amount, + state.virtual_sp_offset + amount + ); + state.virtual_sp_offset += amount; + } + &Inst::Atomic { + op, + rd, + addr, + src, + amo, + } => { + let addr = allocs.next(addr); + let src = allocs.next(src); + let rd = allocs.next_writable(rd); + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() { + sink.add_trap(TrapCode::HeapOutOfBounds); + } + let x = op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | op.funct3() << 12 + | reg_to_gpr_num(addr) << 15 + | reg_to_gpr_num(src) << 20 + | op.funct7(amo) << 25; + + sink.put4(x); + } + &Inst::Fence { pred, succ } => { + let x = 0b0001111 + | 0b00000 << 7 + | 0b000 << 12 + | 0b00000 << 15 + | (succ as u32) << 20 + | (pred as u32) << 24; + + sink.put4(x); + } + &Inst::FenceI => sink.put4(0x0000100f), + &Inst::Auipc { rd, imm } => { + let rd = allocs.next_writable(rd); + let x = enc_auipc(rd, imm); + sink.put4(x); + } + + &Inst::LoadAddr { rd, mem } => { + let mem = mem.with_allocs(&mut allocs); + let rd = allocs.next_writable(rd); + + let base = mem.get_base_register(); + let offset = mem.get_offset_with_state(state); + let offset_imm12 = Imm12::maybe_from_u64(offset as u64); + + match (mem, base, offset_imm12) { + (_, Some(rs), Some(imm12)) => { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd, + rs, + imm12, + } + .emit(&[], sink, emit_info, state); + } + (_, Some(rs), None) => { + LoadConstant::U64(offset as u64) + .load_constant_and_add(rd, rs) + .into_iter() + .for_each(|inst| inst.emit(&[], sink, emit_info, state)); + } + (AMode::Const(addr), None, _) => { + // Get an address label for the constant and recurse. + let label = sink.get_label_for_constant(addr); + Inst::LoadAddr { + rd, + mem: AMode::Label(label), + } + .emit(&[], sink, emit_info, state); + } + (AMode::Label(label), None, _) => { + // Get the current PC. + sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20); + let inst = Inst::Auipc { + rd, + imm: Imm20::from_bits(0), + }; + inst.emit(&[], sink, emit_info, state); + + // Emit an add to the address with a relocation. + // This later gets patched up with the correct offset. + sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd, + rs: rd.to_reg(), + imm12: Imm12::zero(), + } + .emit(&[], sink, emit_info, state); + } + (amode, _, _) => { + unimplemented!("LoadAddr: {:?}", amode); + } + } + } + + &Inst::Select { + ref dst, + condition, + ref x, + ref y, + ty: _ty, + } => { + let condition = allocs.next(condition); + let x = alloc_value_regs(x, &mut allocs); + let y = alloc_value_regs(y, &mut allocs); + let dst: Vec<_> = dst + .clone() + .into_iter() + .map(|r| allocs.next_writable(r)) + .collect(); + + let mut insts = SmallInstVec::new(); + let label_false = sink.get_label(); + insts.push(Inst::CondBr { + taken: BranchTarget::Label(label_false), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: condition, + rs2: zero_reg(), + }, + }); + // here is the true + // select the first value + insts.extend(gen_moves(&dst[..], x.regs())); + let label_jump_over = sink.get_label(); + insts.push(Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + }); + // here is false + insts + .drain(..) + .for_each(|i: Inst| i.emit(&[], sink, emit_info, state)); + sink.bind_label(label_false, &mut state.ctrl_plane); + // select second value1 + insts.extend(gen_moves(&dst[..], y.regs())); + insts + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); + } + &Inst::Jalr { rd, base, offset } => { + let rd = allocs.next_writable(rd); + let x = enc_jalr(rd, base, offset); + sink.put4(x); + } + &Inst::ECall => { + sink.put4(0x00000073); + } + &Inst::EBreak => { + sink.put4(0x00100073); + } + &Inst::Icmp { + cc, + rd, + ref a, + ref b, + ty, + } => { + let a = alloc_value_regs(a, &mut allocs); + let b = alloc_value_regs(b, &mut allocs); + let rd = allocs.next_writable(rd); + let label_true = sink.get_label(); + let label_false = sink.get_label(); + Inst::lower_br_icmp( + cc, + a, + b, + BranchTarget::Label(label_true), + BranchTarget::Label(label_false), + ty, + ) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + sink.bind_label(label_true, &mut state.ctrl_plane); + Inst::load_imm12(rd, Imm12::TRUE).emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::offset(Inst::INSTRUCTION_SIZE * 2), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_false, &mut state.ctrl_plane); + Inst::load_imm12(rd, Imm12::FALSE).emit(&[], sink, emit_info, state); + } + &Inst::AtomicCas { + offset, + t0, + dst, + e, + addr, + v, + ty, + } => { + let offset = allocs.next(offset); + let e = allocs.next(e); + let addr = allocs.next(addr); + let v = allocs.next(v); + let t0 = allocs.next_writable(t0); + let dst = allocs.next_writable(dst); + + // # addr holds address of memory location + // # e holds expected value + // # v holds desired value + // # dst holds return value + // cas: + // lr.w dst, (addr) # Load original value. + // bne dst, e, fail # Doesn’t match, so fail. + // sc.w t0, v, (addr) # Try to update. + // bnez t0 , cas # if store not ok,retry. + // fail: + let fail_label = sink.get_label(); + let cas_lebel = sink.get_label(); + sink.bind_label(cas_lebel, &mut state.ctrl_plane); + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: dst, + addr, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + if ty.bits() < 32 { + AtomicOP::extract(dst, offset, dst.to_reg(), ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } else if ty.bits() == 32 { + Inst::Extend { + rd: dst, + rn: dst.to_reg(), + signed: false, + from_bits: 32, + to_bits: 64, + } + .emit(&[], sink, emit_info, state); + } + Inst::CondBr { + taken: BranchTarget::Label(fail_label), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: e, + rs2: dst.to_reg(), + }, + } + .emit(&[], sink, emit_info, state); + let store_value = if ty.bits() < 32 { + // reload value to t0. + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: t0, + addr, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + // set reset part. + AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + t0.to_reg() + } else { + v + }; + Inst::Atomic { + op: AtomicOP::store_op(ty), + rd: t0, + addr, + src: store_value, + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + // check is our value stored. + Inst::CondBr { + taken: BranchTarget::Label(cas_lebel), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: t0.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + sink.bind_label(fail_label, &mut state.ctrl_plane); + } + &Inst::AtomicRmwLoop { + offset, + op, + dst, + ty, + p, + x, + t0, + } => { + let offset = allocs.next(offset); + let p = allocs.next(p); + let x = allocs.next(x); + let t0 = allocs.next_writable(t0); + let dst = allocs.next_writable(dst); + let retry = sink.get_label(); + sink.bind_label(retry, &mut state.ctrl_plane); + // load old value. + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: dst, + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + // + + let store_value: Reg = match op { + crate::ir::AtomicRmwOp::Add + | crate::ir::AtomicRmwOp::Sub + | crate::ir::AtomicRmwOp::And + | crate::ir::AtomicRmwOp::Or + | crate::ir::AtomicRmwOp::Xor => { + AtomicOP::extract(dst, offset, dst.to_reg(), ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::AluRRR { + alu_op: match op { + crate::ir::AtomicRmwOp::Add => AluOPRRR::Add, + crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub, + crate::ir::AtomicRmwOp::And => AluOPRRR::And, + crate::ir::AtomicRmwOp::Or => AluOPRRR::Or, + crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor, + _ => unreachable!(), + }, + rd: t0, + rs1: dst.to_reg(), + rs2: x, + } + .emit(&[], sink, emit_info, state); + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: writable_spilltmp_reg2(), + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + AtomicOP::merge( + writable_spilltmp_reg2(), + writable_spilltmp_reg(), + offset, + t0.to_reg(), + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + spilltmp_reg2() + } + crate::ir::AtomicRmwOp::Nand => { + if ty.bits() < 32 { + AtomicOP::extract(dst, offset, dst.to_reg(), ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: t0, + rs1: x, + rs2: dst.to_reg(), + } + .emit(&[], sink, emit_info, state); + Inst::construct_bit_not(t0, t0.to_reg()).emit(&[], sink, emit_info, state); + if ty.bits() < 32 { + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: writable_spilltmp_reg2(), + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + AtomicOP::merge( + writable_spilltmp_reg2(), + writable_spilltmp_reg(), + offset, + t0.to_reg(), + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + spilltmp_reg2() + } else { + t0.to_reg() + } + } + + crate::ir::AtomicRmwOp::Umin + | crate::ir::AtomicRmwOp::Umax + | crate::ir::AtomicRmwOp::Smin + | crate::ir::AtomicRmwOp::Smax => { + let label_select_dst = sink.get_label(); + let label_select_done = sink.get_label(); + if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax + { + AtomicOP::extract(dst, offset, dst.to_reg(), ty) + } else { + AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty) + } + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::lower_br_icmp( + match op { + crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan, + crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan, + crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan, + crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan, + _ => unreachable!(), + }, + ValueRegs::one(dst.to_reg()), + ValueRegs::one(x), + BranchTarget::Label(label_select_dst), + BranchTarget::zero(), + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + // here we select x. + Inst::gen_move(t0, x, I64).emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_select_done), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_select_dst, &mut state.ctrl_plane); + Inst::gen_move(t0, dst.to_reg(), I64).emit(&[], sink, emit_info, state); + sink.bind_label(label_select_done, &mut state.ctrl_plane); + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: writable_spilltmp_reg2(), + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + AtomicOP::merge( + writable_spilltmp_reg2(), + writable_spilltmp_reg(), + offset, + t0.to_reg(), + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + spilltmp_reg2() + } + crate::ir::AtomicRmwOp::Xchg => { + AtomicOP::extract(dst, offset, dst.to_reg(), ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: writable_spilltmp_reg2(), + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + AtomicOP::merge( + writable_spilltmp_reg2(), + writable_spilltmp_reg(), + offset, + x, + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + spilltmp_reg2() + } + }; + + Inst::Atomic { + op: AtomicOP::store_op(ty), + rd: t0, + addr: p, + src: store_value, + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + + // if store is not ok,retry. + Inst::CondBr { + taken: BranchTarget::Label(retry), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: t0.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + } + + &Inst::IntSelect { + op, + ref dst, + ref x, + ref y, + ty, + } => { + let x = alloc_value_regs(x, &mut allocs); + let y = alloc_value_regs(y, &mut allocs); + let dst: Vec<_> = dst.iter().map(|r| allocs.next_writable(*r)).collect(); + let label_true = sink.get_label(); + let label_false = sink.get_label(); + let label_done = sink.get_label(); + Inst::lower_br_icmp( + op.to_int_cc(), + x, + y, + BranchTarget::Label(label_true), + BranchTarget::Label(label_false), + ty, + ) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + let gen_move = |dst: &Vec>, + val: &ValueRegs, + sink: &mut MachBuffer, + state: &mut EmitState| { + let mut insts = SmallInstVec::new(); + insts.push(Inst::Mov { + rd: dst[0], + rm: val.regs()[0], + ty: I64, + }); + if ty.bits() == 128 { + insts.push(Inst::Mov { + rd: dst[1], + rm: val.regs()[1], + ty, + }); + } + insts + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + }; + //here is true , use x. + sink.bind_label(label_true, &mut state.ctrl_plane); + gen_move(&dst, &x, sink, state); + Inst::gen_jump(label_done).emit(&[], sink, emit_info, state); + // here is false use y + sink.bind_label(label_false, &mut state.ctrl_plane); + gen_move(&dst, &y, sink, state); + sink.bind_label(label_done, &mut state.ctrl_plane); + } + + &Inst::SelectReg { + condition, + rd, + rs1, + rs2, + } => { + let mut condition = condition.clone(); + condition.rs1 = allocs.next(condition.rs1); + condition.rs2 = allocs.next(condition.rs2); + let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let rd = allocs.next_writable(rd); + let label_true = sink.get_label(); + let label_jump_over = sink.get_label(); + let ty = Inst::canonical_type_for_rc(rs1.class()); + + sink.use_label_at_offset(sink.cur_offset(), label_true, LabelUse::B12); + let x = condition.emit(); + sink.put4(x); + // here is false , use rs2 + Inst::gen_move(rd, rs2, ty).emit(&[], sink, emit_info, state); + // and jump over + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here condition is true , use rs1 + sink.bind_label(label_true, &mut state.ctrl_plane); + Inst::gen_move(rd, rs1, ty).emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); + } + &Inst::FcvtToInt { + is_sat, + rd, + rs, + is_signed, + in_type, + out_type, + tmp, + } => { + let rs = allocs.next(rs); + let tmp = allocs.next_writable(tmp); + let rd = allocs.next_writable(rd); + let label_nan = sink.get_label(); + let label_jump_over = sink.get_label(); + // get if nan. + Inst::emit_not_nan(rd, rs, in_type).emit(&[], sink, emit_info, state); + // jump to nan. + Inst::CondBr { + taken: BranchTarget::Label(label_nan), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs2: zero_reg(), + rs1: rd.to_reg(), + }, + } + .emit(&[], sink, emit_info, state); + + if !is_sat { + let f32_bounds = f32_cvt_to_int_bounds(is_signed, out_type.bits() as u8); + let f64_bounds = f64_cvt_to_int_bounds(is_signed, out_type.bits() as u8); + if in_type == F32 { + Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.0), |_| { + writable_spilltmp_reg() + }) + } else { + Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.0), |_| { + writable_spilltmp_reg() + }) + } + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + let le_op = if in_type == F32 { + FpuOPRRR::FleS + } else { + FpuOPRRR::FleD + }; + + // rd := rs <= tmp + Inst::FpuRRR { + alu_op: le_op, + frm: None, + rd, + rs1: rs, + rs2: tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + Inst::TrapIf { + test: rd.to_reg(), + trap_code: TrapCode::IntegerOverflow, + } + .emit(&[], sink, emit_info, state); + + if in_type == F32 { + Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.1), |_| { + writable_spilltmp_reg() + }) + } else { + Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.1), |_| { + writable_spilltmp_reg() + }) + } + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + // rd := rs >= tmp + Inst::FpuRRR { + alu_op: le_op, + frm: None, + rd, + rs1: tmp.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + + Inst::TrapIf { + test: rd.to_reg(), + trap_code: TrapCode::IntegerOverflow, + } + .emit(&[], sink, emit_info, state); + } + // convert to int normally. + Inst::FpuRR { + frm: Some(FRM::RTZ), + alu_op: FpuOPRR::float_convert_2_int_op(in_type, is_signed, out_type), + rd, + rs, + } + .emit(&[], sink, emit_info, state); + if out_type.bits() < 32 && is_signed { + // load value part mask. + Inst::load_constant_u32( + writable_spilltmp_reg(), + if 16 == out_type.bits() { + (u16::MAX >> 1) as u64 + } else { + // I8 + (u8::MAX >> 1) as u64 + }, + &mut |_| writable_spilltmp_reg2(), + ) + .into_iter() + .for_each(|x| x.emit(&[], sink, emit_info, state)); + // keep value part. + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_spilltmp_reg(), + rs1: rd.to_reg(), + rs2: spilltmp_reg(), + } + .emit(&[], sink, emit_info, state); + // extact sign bit. + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: rd, + rs: rd.to_reg(), + imm12: Imm12::from_bits(31), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: rd, + rs: rd.to_reg(), + imm12: Imm12::from_bits(if 16 == out_type.bits() { + 15 + } else { + // I8 + 7 + }), + } + .emit(&[], sink, emit_info, state); + // make result,sign bit and value part. + Inst::AluRRR { + alu_op: AluOPRRR::Or, + rd: rd, + rs1: rd.to_reg(), + rs2: spilltmp_reg(), + } + .emit(&[], sink, emit_info, state); + } + + // I already have the result,jump over. + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here is nan , move 0 into rd register + sink.bind_label(label_nan, &mut state.ctrl_plane); + if is_sat { + Inst::load_imm12(rd, Imm12::from_bits(0)).emit(&[], sink, emit_info, state); + } else { + // here is ud2. + Inst::Udf { + trap_code: TrapCode::BadConversionToInteger, + } + .emit(&[], sink, emit_info, state); + } + // bind jump_over + sink.bind_label(label_jump_over, &mut state.ctrl_plane); + } + + &Inst::LoadExtName { + rd, + ref name, + offset, + } => { + let rd = allocs.next_writable(rd); + // get the current pc. + Inst::Auipc { + rd: rd, + imm: Imm20::from_bits(0), + } + .emit(&[], sink, emit_info, state); + // load the value. + Inst::Load { + rd: rd, + op: LoadOP::Ld, + flags: MemFlags::trusted(), + from: AMode::RegOffset( + rd.to_reg(), + 12, // auipc load and jal. + I64, + ), + } + .emit(&[], sink, emit_info, state); + // jump over. + Inst::Jal { + // jal and abs8 size for 12. + dest: BranchTarget::offset(12), + } + .emit(&[], sink, emit_info, state); + + sink.add_reloc(Reloc::Abs8, name.as_ref(), offset); + sink.put8(0); + } + &Inst::TrapIfC { + rs1, + rs2, + cc, + trap_code, + } => { + let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let label_trap = sink.get_label(); + let label_jump_over = sink.get_label(); + Inst::CondBr { + taken: BranchTarget::Label(label_trap), + not_taken: BranchTarget::Label(label_jump_over), + kind: IntegerCompare { kind: cc, rs1, rs2 }, + } + .emit(&[], sink, emit_info, state); + // trap + sink.bind_label(label_trap, &mut state.ctrl_plane); + Inst::Udf { trap_code }.emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); + } + &Inst::TrapIf { test, trap_code } => { + let test = allocs.next(test); + let label_trap = sink.get_label(); + let label_jump_over = sink.get_label(); + Inst::CondBr { + taken: BranchTarget::Label(label_trap), + not_taken: BranchTarget::Label(label_jump_over), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: test, + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // trap + sink.bind_label(label_trap, &mut state.ctrl_plane); + Inst::Udf { + trap_code: trap_code, + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); + } + &Inst::Udf { trap_code } => { + sink.add_trap(trap_code); + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); + } + sink.put_data(Inst::TRAP_OPCODE); + } + &Inst::AtomicLoad { rd, ty, p } => { + let p = allocs.next(p); + let rd = allocs.next_writable(rd); + // emit the fence. + Inst::Fence { + pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, + succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, + } + .emit(&[], sink, emit_info, state); + // load. + Inst::Load { + rd: rd, + op: LoadOP::from_type(ty), + flags: MemFlags::new(), + from: AMode::RegOffset(p, 0, ty), + } + .emit(&[], sink, emit_info, state); + Inst::Fence { + pred: Inst::FENCE_REQ_R, + succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, + } + .emit(&[], sink, emit_info, state); + } + &Inst::AtomicStore { src, ty, p } => { + let src = allocs.next(src); + let p = allocs.next(p); + Inst::Fence { + pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, + succ: Inst::FENCE_REQ_W, + } + .emit(&[], sink, emit_info, state); + Inst::Store { + to: AMode::RegOffset(p, 0, ty), + op: StoreOP::from_type(ty), + flags: MemFlags::new(), + src, + } + .emit(&[], sink, emit_info, state); + } + &Inst::FloatRound { + op, + rd, + int_tmp, + f_tmp, + rs, + ty, + } => { + // this code is port from glibc ceil floor ... implementation. + let rs = allocs.next(rs); + let int_tmp = allocs.next_writable(int_tmp); + let f_tmp = allocs.next_writable(f_tmp); + let rd = allocs.next_writable(rd); + let label_nan = sink.get_label(); + let label_x = sink.get_label(); + let label_jump_over = sink.get_label(); + // check if is nan. + Inst::emit_not_nan(int_tmp, rs, ty).emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_nan), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: int_tmp.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + fn max_value_need_round(ty: Type) -> u64 { + match ty { + F32 => { + let x: u64 = 1 << f32::MANTISSA_DIGITS; + let x = x as f32; + let x = u32::from_le_bytes(x.to_le_bytes()); + x as u64 + } + F64 => { + let x: u64 = 1 << f64::MANTISSA_DIGITS; + let x = x as f64; + u64::from_le_bytes(x.to_le_bytes()) + } + _ => unreachable!(), + } + } + // load max value need to round. + if ty == F32 { + Inst::load_fp_constant32(f_tmp, max_value_need_round(ty) as u32, &mut |_| { + writable_spilltmp_reg() + }) + } else { + Inst::load_fp_constant64(f_tmp, max_value_need_round(ty), &mut |_| { + writable_spilltmp_reg() + }) + } + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + // get abs value. + Inst::emit_fabs(rd, rs, ty).emit(&[], sink, emit_info, state); + + // branch if f_tmp < rd + Inst::FpuRRR { + frm: None, + alu_op: if ty == F32 { + FpuOPRRR::FltS + } else { + FpuOPRRR::FltD + }, + rd: int_tmp, + rs1: f_tmp.to_reg(), + rs2: rd.to_reg(), + } + .emit(&[], sink, emit_info, state); + + Inst::CondBr { + taken: BranchTarget::Label(label_x), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: int_tmp.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + + //convert to int. + Inst::FpuRR { + alu_op: FpuOPRR::float_convert_2_int_op(ty, true, I64), + frm: Some(op.to_frm()), + rd: int_tmp, + rs: rs, + } + .emit(&[], sink, emit_info, state); + //convert back. + Inst::FpuRR { + alu_op: FpuOPRR::int_convert_2_float_op(I64, true, ty), + frm: Some(op.to_frm()), + rd, + rs: int_tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + // copy sign. + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FsgnjS + } else { + FpuOPRRR::FsgnjD + }, + frm: None, + rd, + rs1: rd.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + // jump over. + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here is nan. + sink.bind_label(label_nan, &mut state.ctrl_plane); + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FaddS + } else { + FpuOPRRR::FaddD + }, + frm: None, + rd: rd, + rs1: rs, + rs2: rs, + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here select origin x. + sink.bind_label(label_x, &mut state.ctrl_plane); + Inst::gen_move(rd, rs, ty).emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); + } + + &Inst::FloatSelect { + op, + rd, + tmp, + rs1, + rs2, + ty, + } => { + let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let tmp = allocs.next_writable(tmp); + let rd = allocs.next_writable(rd); + let label_nan = sink.get_label(); + let label_jump_over = sink.get_label(); + // check if rs1 is nan. + Inst::emit_not_nan(tmp, rs1, ty).emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_nan), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // check if rs2 is nan. + Inst::emit_not_nan(tmp, rs2, ty).emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_nan), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // here rs1 and rs2 is not nan. + Inst::FpuRRR { + alu_op: op.to_fpuoprrr(ty), + frm: None, + rd: rd, + rs1: rs1, + rs2: rs2, + } + .emit(&[], sink, emit_info, state); + // special handle for +0 or -0. + { + // check is rs1 and rs2 all equal to zero. + let label_done = sink.get_label(); + { + // if rs1 == 0 + let mut insts = Inst::emit_if_float_not_zero( + tmp, + rs1, + ty, + BranchTarget::Label(label_done), + BranchTarget::zero(), + ); + insts.extend(Inst::emit_if_float_not_zero( + tmp, + rs2, + ty, + BranchTarget::Label(label_done), + BranchTarget::zero(), + )); + insts + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + Inst::FpuRR { + alu_op: FpuOPRR::move_f_to_x_op(ty), + frm: None, + rd: tmp, + rs: rs1, + } + .emit(&[], sink, emit_info, state); + Inst::FpuRR { + alu_op: FpuOPRR::move_f_to_x_op(ty), + frm: None, + rd: writable_spilltmp_reg(), + rs: rs2, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRR { + alu_op: if op == FloatSelectOP::Max { + AluOPRRR::And + } else { + AluOPRRR::Or + }, + rd: tmp, + rs1: tmp.to_reg(), + rs2: spilltmp_reg(), + } + .emit(&[], sink, emit_info, state); + // move back to rd. + Inst::FpuRR { + alu_op: FpuOPRR::move_x_to_f_op(ty), + frm: None, + rd, + rs: tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + // + sink.bind_label(label_done, &mut state.ctrl_plane); + } + // we have the reuslt,jump over. + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here is nan. + sink.bind_label(label_nan, &mut state.ctrl_plane); + op.snan_bits(tmp, ty) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + // move to rd. + Inst::FpuRR { + alu_op: FpuOPRR::move_x_to_f_op(ty), + frm: None, + rd, + rs: tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); + } + &Inst::Popcnt { + sum, + tmp, + step, + rs, + ty, + } => { + let rs = allocs.next(rs); + let tmp = allocs.next_writable(tmp); + let step = allocs.next_writable(step); + let sum = allocs.next_writable(sum); + // load 0 to sum , init. + Inst::gen_move(sum, zero_reg(), I64).emit(&[], sink, emit_info, state); + // load + Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit( + &[], + sink, + emit_info, + state, + ); + // + Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits((ty.bits() - 1) as i16), + } + .emit(&[], sink, emit_info, state); + let label_done = sink.get_label(); + let label_loop = sink.get_label(); + sink.bind_label(label_loop, &mut state.ctrl_plane); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::SignedLessThanOrEqual, + rs1: step.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // test and add sum. + { + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_spilltmp_reg2(), + rs1: tmp.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + let label_over = sink.get_label(); + Inst::CondBr { + taken: BranchTarget::Label(label_over), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: zero_reg(), + rs2: spilltmp_reg2(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: sum, + rs: sum.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_over, &mut state.ctrl_plane); + } + // set step and tmp. + { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: step, + rs: step.to_reg(), + imm12: Imm12::from_bits(-1), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_loop), + } + .emit(&[], sink, emit_info, state); + } + sink.bind_label(label_done, &mut state.ctrl_plane); + } + &Inst::Rev8 { rs, rd, tmp, step } => { + let rs = allocs.next(rs); + let tmp = allocs.next_writable(tmp); + let step = allocs.next_writable(step); + let rd = allocs.next_writable(rd); + // init. + Inst::gen_move(rd, zero_reg(), I64).emit(&[], sink, emit_info, state); + Inst::gen_move(tmp, rs, I64).emit(&[], sink, emit_info, state); + // load 56 to step. + Inst::load_imm12(step, Imm12::from_bits(56)).emit(&[], sink, emit_info, state); + let label_done = sink.get_label(); + let label_loop = sink.get_label(); + sink.bind_label(label_loop, &mut state.ctrl_plane); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::SignedLessThan, + rs1: step.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd: writable_spilltmp_reg(), + rs: tmp.to_reg(), + imm12: Imm12::from_bits(255), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRR { + alu_op: AluOPRRR::Sll, + rd: writable_spilltmp_reg(), + rs1: spilltmp_reg(), + rs2: step.to_reg(), + } + .emit(&[], sink, emit_info, state); + + Inst::AluRRR { + alu_op: AluOPRRR::Or, + rd: rd, + rs1: rd.to_reg(), + rs2: spilltmp_reg(), + } + .emit(&[], sink, emit_info, state); + { + // reset step + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: step, + rs: step.to_reg(), + imm12: Imm12::from_bits(-8), + } + .emit(&[], sink, emit_info, state); + //reset tmp. + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(8), + } + .emit(&[], sink, emit_info, state); + // loop. + Inst::Jal { + dest: BranchTarget::Label(label_loop), + } + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_done, &mut state.ctrl_plane); + } + &Inst::Cltz { + sum, + tmp, + step, + rs, + leading, + ty, + } => { + let rs = allocs.next(rs); + let tmp = allocs.next_writable(tmp); + let step = allocs.next_writable(step); + let sum = allocs.next_writable(sum); + // load 0 to sum , init. + Inst::gen_move(sum, zero_reg(), I64).emit(&[], sink, emit_info, state); + // load + Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit( + &[], + sink, + emit_info, + state, + ); + // + Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); + if leading { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits((ty.bits() - 1) as i16), + } + .emit(&[], sink, emit_info, state); + } + let label_done = sink.get_label(); + let label_loop = sink.get_label(); + sink.bind_label(label_loop, &mut state.ctrl_plane); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::SignedLessThanOrEqual, + rs1: step.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // test and add sum. + { + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_spilltmp_reg2(), + rs1: tmp.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: zero_reg(), + rs2: spilltmp_reg2(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: sum, + rs: sum.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + } + // set step and tmp. + { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: step, + rs: step.to_reg(), + imm12: Imm12::from_bits(-1), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: if leading { + AluOPRRI::Srli + } else { + AluOPRRI::Slli + }, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_loop), + } + .emit(&[], sink, emit_info, state); + } + sink.bind_label(label_done, &mut state.ctrl_plane); + } + &Inst::Brev8 { + rs, + ty, + step, + tmp, + tmp2, + rd, + } => { + let rs = allocs.next(rs); + let step = allocs.next_writable(step); + let tmp = allocs.next_writable(tmp); + let tmp2 = allocs.next_writable(tmp2); + let rd = allocs.next_writable(rd); + Inst::gen_move(rd, zero_reg(), I64).emit(&[], sink, emit_info, state); + Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit( + &[], + sink, + emit_info, + state, + ); + // + Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits((ty.bits() - 1) as i16), + } + .emit(&[], sink, emit_info, state); + Inst::load_imm12(tmp2, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp2, + rs: tmp2.to_reg(), + imm12: Imm12::from_bits((ty.bits() - 8) as i16), + } + .emit(&[], sink, emit_info, state); + + let label_done = sink.get_label(); + let label_loop = sink.get_label(); + sink.bind_label(label_loop, &mut state.ctrl_plane); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::SignedLessThanOrEqual, + rs1: step.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // test and set bit. + { + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_spilltmp_reg2(), + rs1: tmp.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + let label_over = sink.get_label(); + Inst::CondBr { + taken: BranchTarget::Label(label_over), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: zero_reg(), + rs2: spilltmp_reg2(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRR { + alu_op: AluOPRRR::Or, + rd: rd, + rs1: rd.to_reg(), + rs2: tmp2.to_reg(), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_over, &mut state.ctrl_plane); + } + // set step and tmp. + { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: step, + rs: step.to_reg(), + imm12: Imm12::from_bits(-1), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + { + // reset tmp2 + // if (step %=8 == 0) then tmp2 = tmp2 >> 15 + // if (step %=8 != 0) then tmp2 = tmp2 << 1 + let label_over = sink.get_label(); + let label_sll_1 = sink.get_label(); + Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_bits(8)).emit( + &[], + sink, + emit_info, + state, + ); + Inst::AluRRR { + alu_op: AluOPRRR::Rem, + rd: writable_spilltmp_reg2(), + rs1: step.to_reg(), + rs2: spilltmp_reg2(), + } + .emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_sll_1), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: spilltmp_reg2(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: tmp2, + rs: tmp2.to_reg(), + imm12: Imm12::from_bits(15), + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_over), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_sll_1, &mut state.ctrl_plane); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp2, + rs: tmp2.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_over, &mut state.ctrl_plane); + } + Inst::Jal { + dest: BranchTarget::Label(label_loop), + } + .emit(&[], sink, emit_info, state); + } + sink.bind_label(label_done, &mut state.ctrl_plane); + } + &Inst::StackProbeLoop { + guard_size, + probe_count, + tmp: guard_size_tmp, + } => { + let step = writable_spilltmp_reg(); + Inst::load_constant_u64( + step, + (guard_size as u64) * (probe_count as u64), + &mut |_| step, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::load_constant_u64(guard_size_tmp, guard_size as u64, &mut |_| guard_size_tmp) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + let loop_start = sink.get_label(); + let label_done = sink.get_label(); + sink.bind_label(loop_start, &mut state.ctrl_plane); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::UnsignedLessThanOrEqual, + rs1: step.to_reg(), + rs2: guard_size_tmp.to_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // compute address. + Inst::AluRRR { + alu_op: AluOPRRR::Sub, + rd: writable_spilltmp_reg2(), + rs1: stack_reg(), + rs2: step.to_reg(), + } + .emit(&[], sink, emit_info, state); + Inst::Store { + to: AMode::RegOffset(spilltmp_reg2(), 0, I8), + op: StoreOP::Sb, + flags: MemFlags::new(), + src: zero_reg(), + } + .emit(&[], sink, emit_info, state); + // reset step. + Inst::AluRRR { + alu_op: AluOPRRR::Sub, + rd: step, + rs1: step.to_reg(), + rs2: guard_size_tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(loop_start), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_done, &mut state.ctrl_plane); + } + &Inst::VecAluRRRImm5 { + op, + vd, + vd_src, + imm, + vs2, + ref mask, + .. + } => { + let vs2 = allocs.next(vs2); + let vd_src = allocs.next(vd_src); + let vd = allocs.next_writable(vd); + let mask = mask.with_allocs(&mut allocs); + + debug_assert_eq!(vd.to_reg(), vd_src); + + sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, mask)); + } + &Inst::VecAluRRRR { + op, + vd, + vd_src, + vs1, + vs2, + ref mask, + .. + } => { + let vs1 = allocs.next(vs1); + let vs2 = allocs.next(vs2); + let vd_src = allocs.next(vd_src); + let vd = allocs.next_writable(vd); + let mask = mask.with_allocs(&mut allocs); + + debug_assert_eq!(vd.to_reg(), vd_src); + + sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, mask)); + } + &Inst::VecAluRRR { + op, + vd, + vs1, + vs2, + ref mask, + .. + } => { + let vs1 = allocs.next(vs1); + let vs2 = allocs.next(vs2); + let vd = allocs.next_writable(vd); + let mask = mask.with_allocs(&mut allocs); + + sink.put4(encode_valu(op, vd, vs1, vs2, mask)); + } + &Inst::VecAluRRImm5 { + op, + vd, + imm, + vs2, + ref mask, + .. + } => { + let vs2 = allocs.next(vs2); + let vd = allocs.next_writable(vd); + let mask = mask.with_allocs(&mut allocs); + + sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, mask)); + } + &Inst::VecAluRR { + op, + vd, + vs, + ref mask, + .. + } => { + let vs = allocs.next(vs); + let vd = allocs.next_writable(vd); + let mask = mask.with_allocs(&mut allocs); + + sink.put4(encode_valu_rr(op, vd, vs, mask)); + } + &Inst::VecAluRImm5 { + op, + vd, + imm, + ref mask, + .. + } => { + let vd = allocs.next_writable(vd); + let mask = mask.with_allocs(&mut allocs); + + sink.put4(encode_valu_r_imm(op, vd, imm, mask)); + } + &Inst::VecSetState { rd, ref vstate } => { + let rd = allocs.next_writable(rd); + + sink.put4(encode_vcfg_imm( + 0x57, + rd.to_reg(), + vstate.avl.unwrap_static(), + &vstate.vtype, + )); + + // Update the current vector emit state. + state.vstate = EmitVState::Known(vstate.clone()); + } + + &Inst::VecLoad { + eew, + to, + ref from, + ref mask, + flags, + .. + } => { + let from = from.clone().with_allocs(&mut allocs); + let to = allocs.next_writable(to); + let mask = mask.with_allocs(&mut allocs); + + // Vector Loads don't support immediate offsets, so we need to load it into a register. + let addr = match from { + VecAMode::UnitStride { base } => { + let base_reg = base.get_base_register(); + let offset = base.get_offset_with_state(state); + + // Reg+0 Offset can be directly encoded + if let (Some(base_reg), 0) = (base_reg, offset) { + base_reg + } else { + // Otherwise load the address it into a reg and load from it. + let tmp = writable_spilltmp_reg(); + Inst::LoadAddr { + rd: tmp, + mem: base.clone(), + } + .emit(&[], sink, emit_info, state); + tmp.to_reg() + } + } + }; + + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); + } + + sink.put4(encode_vmem_load( + 0x07, + to.to_reg(), + eew, + addr, + from.lumop(), + mask, + from.mop(), + from.nf(), + )); + } + + &Inst::VecStore { + eew, + ref to, + from, + ref mask, + flags, + .. + } => { + let to = to.clone().with_allocs(&mut allocs); + let from = allocs.next(from); + let mask = mask.with_allocs(&mut allocs); + + // Vector Stores don't support immediate offsets, so we need to load it into a register. + let addr = match to { + VecAMode::UnitStride { base } => { + let base_reg = base.get_base_register(); + let offset = base.get_offset_with_state(state); + + // Reg+0 Offset can be directly encoded + if let (Some(base_reg), 0) = (base_reg, offset) { + base_reg + } else { + // Otherwise load the address it into a reg and load from it. + let tmp = writable_spilltmp_reg(); + Inst::LoadAddr { + rd: tmp, + mem: base.clone(), + } + .emit(&[], sink, emit_info, state); + tmp.to_reg() + } + } + }; + + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); + } + + sink.put4(encode_vmem_store( + 0x27, + from, + eew, + addr, + to.sumop(), + mask, + to.mop(), + to.nf(), + )); + } + }; + let end_off = sink.cur_offset(); + assert!( + (end_off - start_off) <= Inst::worst_case_size(), + "Inst:{:?} length:{} worst_case_size:{}", + self, + end_off - start_off, + Inst::worst_case_size() + ); + } + + fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut Self::State) -> String { + let mut allocs = AllocationConsumer::new(allocs); + self.print_with_state(state, &mut allocs) + } +} + +// helper function. +fn alloc_value_regs(orgin: &ValueRegs, alloc: &mut AllocationConsumer) -> ValueRegs { + match orgin.regs().len() { + 1 => ValueRegs::one(alloc.next(orgin.regs()[0])), + 2 => ValueRegs::two(alloc.next(orgin.regs()[0]), alloc.next(orgin.regs()[1])), + _ => unreachable!(), + } +} + +fn emit_return_call_common_sequence( + allocs: &mut AllocationConsumer<'_>, + sink: &mut MachBuffer, + emit_info: &EmitInfo, + state: &mut EmitState, + new_stack_arg_size: u32, + old_stack_arg_size: u32, + uses: &CallArgList, +) { + for u in uses { + let _ = allocs.next(u.vreg); + } + + // We are emitting a dynamic number of instructions and might need an + // island. We emit four instructions regardless of how many stack arguments + // we have, up to two instructions for the actual call, and then two + // instructions per word of stack argument space. + let new_stack_words = new_stack_arg_size / 8; + let insts = 4 + 2 + 2 * new_stack_words; + let space_needed = insts * u32::try_from(Inst::INSTRUCTION_SIZE).unwrap(); + if sink.island_needed(space_needed) { + let jump_around_label = sink.get_label(); + Inst::Jal { + dest: BranchTarget::Label(jump_around_label), + } + .emit(&[], sink, emit_info, state); + sink.emit_island(&mut state.ctrl_plane); + sink.bind_label(jump_around_label, &mut state.ctrl_plane); + } + + // Copy the new frame on top of our current frame. + // + // The current stack layout is the following: + // + // | ... | + // +---------------------+ + // | ... | + // | stack arguments | + // | ... | + // current | return address | + // frame | old FP | <-- FP + // | ... | + // | old stack slots | + // | ... | + // +---------------------+ + // | ... | + // new | new stack arguments | + // frame | ... | <-- SP + // +---------------------+ + // + // We need to restore the old FP, restore the return address from the stack + // to the link register, copy the new stack arguments over the old stack + // arguments, adjust SP to point to the new stack arguments, and then jump + // to the callee (which will push the old FP and RA again). Note that the + // actual jump happens outside this helper function. + + assert_eq!( + new_stack_arg_size % 8, + 0, + "size of new stack arguments must be 8-byte aligned" + ); + + // The delta from our frame pointer to the (eventual) stack pointer value + // when we jump to the tail callee. This is the difference in size of stack + // arguments as well as accounting for the two words we pushed onto the + // stack upon entry to this function (the return address and old frame + // pointer). + let fp_to_callee_sp = i64::from(old_stack_arg_size) - i64::from(new_stack_arg_size) + 16; + + let tmp1 = regs::writable_spilltmp_reg(); + let tmp2 = regs::writable_spilltmp_reg2(); + + // Restore the return address to the link register, and load the old FP into + // a temporary register. + // + // We can't put the old FP into the FP register until after we copy the + // stack arguments into place, since that uses address modes that are + // relative to our current FP. + // + // Note that the FP is saved in the function prologue for all non-leaf + // functions, even when `preserve_frame_pointers=false`. Note also that + // `return_call` instructions make it so that a function is considered + // non-leaf. Therefore we always have an FP to restore here. + + Inst::gen_load( + writable_link_reg(), + AMode::FPOffset(8, I64), + I64, + MemFlags::trusted(), + ) + .emit(&[], sink, emit_info, state); + Inst::gen_load(tmp1, AMode::FPOffset(0, I64), I64, MemFlags::trusted()).emit( + &[], + sink, + emit_info, + state, + ); + + // Copy the new stack arguments over the old stack arguments. + for i in (0..new_stack_words).rev() { + // Load the `i`th new stack argument word from the temporary stack + // space. + Inst::gen_load( + tmp2, + AMode::SPOffset(i64::from(i * 8), types::I64), + types::I64, + ir::MemFlags::trusted(), + ) + .emit(&[], sink, emit_info, state); + + // Store it to its final destination on the stack, overwriting our + // current frame. + Inst::gen_store( + AMode::FPOffset(fp_to_callee_sp + i64::from(i * 8), types::I64), + tmp2.to_reg(), + types::I64, + ir::MemFlags::trusted(), + ) + .emit(&[], sink, emit_info, state); + } + + // Initialize the SP for the tail callee, deallocating the temporary stack + // argument space and our current frame at the same time. + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: regs::writable_stack_reg(), + rs: regs::fp_reg(), + imm12: Imm12::maybe_from_u64(fp_to_callee_sp as u64).unwrap(), + } + .emit(&[], sink, emit_info, state); + + // Move the old FP value from the temporary into the FP register. + Inst::Mov { + ty: types::I64, + rd: regs::writable_fp_reg(), + rm: tmp1.to_reg(), + } + .emit(&[], sink, emit_info, state); + + state.virtual_sp_offset -= i64::from(new_stack_arg_size); + trace!( + "return_call[_ind] adjusts virtual sp offset by {} -> {}", + new_stack_arg_size, + state.virtual_sp_offset + ); +} diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit_tests.rs b/cranelift/codegen/src/isa/zkasm/inst/emit_tests.rs new file mode 100644 index 000000000000..41e8ea6f8a6f --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/emit_tests.rs @@ -0,0 +1,2338 @@ +#[allow(unused)] +use crate::ir::LibCall; +use crate::isa::zkasm::inst::*; +use crate::settings; +use alloc::vec::Vec; +use std::borrow::Cow; + +#[test] +fn test_zkasm_binemit() { + struct TestUnit { + inst: Inst, + assembly: &'static str, + code: TestEncoding, + } + + struct TestEncoding(Cow<'static, str>); + + impl From<&'static str> for TestEncoding { + fn from(value: &'static str) -> Self { + Self(value.into()) + } + } + + impl From for TestEncoding { + fn from(value: u32) -> Self { + let value = value.swap_bytes(); + let value = format!("{value:08X}"); + Self(value.into()) + } + } + + impl TestUnit { + fn new(inst: Inst, assembly: &'static str, code: impl Into) -> Self { + let code = code.into(); + Self { + inst, + assembly, + code, + } + } + } + + let mut insns = Vec::::with_capacity(500); + + insns.push(TestUnit::new( + Inst::Ret { + rets: vec![], + stack_bytes_to_pop: 0, + }, + "ret", + 0x00008067, + )); + insns.push(TestUnit::new( + Inst::Ret { + rets: vec![], + stack_bytes_to_pop: 16, + }, + "add sp, sp, #16 ; ret", + "1301010167800000", + )); + + insns.push(TestUnit::new( + Inst::Mov { + rd: writable_fa0(), + rm: fa1(), + ty: F32, + }, + "fmv.s fa0,fa1", + 0x20b58553, + )); + + insns.push(TestUnit::new( + Inst::Mov { + rd: writable_fa0(), + rm: fa1(), + ty: F64, + }, + "fmv.d fa0,fa1", + 0x22b58553, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Brev8, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "brev8 a1,a0", + 0x68755593, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Rev8, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "rev8 a1,a0", + 0x6b855593, + )); + + // + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Bclri, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "bclri a1,a0,5", + 0x48551593, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Bexti, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "bexti a1,a0,5", + 0x48555593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Binvi, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "binvi a1,a0,5", + 0x68551593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Bseti, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "bseti a1,a0,5", + 0x28551593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Rori, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "rori a1,a0,5", + 0x60555593, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Roriw, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "roriw a1,a0,5", + 0x6055559b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::SlliUw, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "slli.uw a1,a0,5", + 0x855159b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Clz, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "clz a1,a0", + 0x60051593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Clzw, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "clzw a1,a0", + 0x6005159b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Cpop, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "cpop a1,a0", + 0x60251593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Cpopw, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "cpopw a1,a0", + 0x6025159b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Ctz, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "ctz a1,a0", + 0x60151593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Ctzw, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "ctzw a1,a0", + 0x6015159b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Sextb, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "sext.b a1,a0", + 0x60451593, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Sexth, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "sext.h a1,a0", + 0x60551593, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Zexth, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "zext.h a1,a0", + 0x80545bb, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Orcb, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "orc.b a1,a0", + 0x28755593, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Adduw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "zext.w a1,a0", + 0x80505bb, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Adduw, + rd: writable_a1(), + rs1: a0(), + rs2: a1(), + }, + "add.uw a1,a0,a1", + 0x08b505bb, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Andn, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "andn a1,a0,zero", + 0x400575b3, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Bclr, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "bclr a1,a0,zero", + 0x480515b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Bext, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "bext a1,a0,zero", + 0x480555b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Binv, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "binv a1,a0,zero", + 0x680515b3, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Bset, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "bset a1,a0,zero", + 0x280515b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Clmul, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "clmul a1,a0,zero", + 0xa0515b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Clmulh, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "clmulh a1,a0,zero", + 0xa0535b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Clmulr, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "clmulr a1,a0,zero", + 0xa0525b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Max, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "max a1,a0,zero", + 0xa0565b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Maxu, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "maxu a1,a0,zero", + 0xa0575b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Min, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "min a1,a0,zero", + 0xa0545b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Minu, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "minu a1,a0,zero", + 0xa0555b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Orn, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "orn a1,a0,zero", + 0x400565b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Rol, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "rol a1,a0,zero", + 0x600515b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Rolw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "rolw a1,a0,zero", + 0x600515bb, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Ror, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "ror a1,a0,zero", + 0x600555b3, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Rorw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "rorw a1,a0,zero", + 0x600555bb, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh1add, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh1add a1,a0,zero", + 0x200525b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh1adduw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh1add.uw a1,a0,zero", + 0x200525bb, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh2add, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh2add a1,a0,zero", + 0x200545b3, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh2adduw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh2add.uw a1,a0,zero", + 0x200545bb, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh3add, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh3add a1,a0,zero", + 0x200565b3, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh3adduw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh3add.uw a1,a0,zero", + 0x200565bb, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Xnor, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "xnor a1,a0,zero", + 0x400545b3, + )); + + // Zbkb + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Pack, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "pack a1,a0,zero", + 0x080545b3, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Packw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "packw a1,a0,zero", + 0x080545bb, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Packh, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "packh a1,a0,zero", + 0x080575b3, + )); + + // + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: writable_fp_reg(), + rs1: fp_reg(), + rs2: zero_reg(), + }, + "add fp,fp,zero", + 0x40433, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: writable_fp_reg(), + rs: stack_reg(), + imm12: Imm12::maybe_from_u64(100).unwrap(), + }, + "addi fp,sp,100", + 0x6410413, + )); + insns.push(TestUnit::new( + Inst::Lui { + rd: writable_zero_reg(), + imm: Imm20::from_bits(120), + }, + "lui zero,120", + 0x78037, + )); + insns.push(TestUnit::new( + Inst::Auipc { + rd: writable_zero_reg(), + imm: Imm20::from_bits(120), + }, + "auipc zero,120", + 0x78017, + )); + + insns.push(TestUnit::new( + Inst::Jalr { + rd: writable_a0(), + base: a0(), + offset: Imm12::from_bits(100), + }, + "jalr a0,100(a0)", + 0x6450567, + )); + + insns.push(TestUnit::new( + Inst::Load { + rd: writable_a0(), + op: LoadOP::Lb, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I8), + }, + "lb a0,100(a1)", + 0x6458503, + )); + insns.push(TestUnit::new( + Inst::Load { + rd: writable_a0(), + op: LoadOP::Lh, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I16), + }, + "lh a0,100(a1)", + 0x6459503, + )); + + insns.push(TestUnit::new( + Inst::Load { + rd: writable_a0(), + op: LoadOP::Lw, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I32), + }, + "lw a0,100(a1)", + 0x645a503, + )); + + insns.push(TestUnit::new( + Inst::Load { + rd: writable_a0(), + op: LoadOP::Ld, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I64), + }, + "ld a0,100(a1)", + 0x645b503, + )); + insns.push(TestUnit::new( + Inst::Load { + rd: Writable::from_reg(fa0()), + op: LoadOP::Flw, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I64), + }, + "flw fa0,100(a1)", + 0x645a507, + )); + + insns.push(TestUnit::new( + Inst::Load { + rd: Writable::from_reg(fa0()), + op: LoadOP::Fld, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I64), + }, + "fld fa0,100(a1)", + 0x645b507, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I8), + op: StoreOP::Sb, + flags: MemFlags::new(), + src: a0(), + }, + "sb a0,100(sp)", + 0x6a10223, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I16), + op: StoreOP::Sh, + flags: MemFlags::new(), + src: a0(), + }, + "sh a0,100(sp)", + 0x6a11223, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I32), + op: StoreOP::Sw, + flags: MemFlags::new(), + src: a0(), + }, + "sw a0,100(sp)", + 0x6a12223, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I64), + op: StoreOP::Sd, + flags: MemFlags::new(), + src: a0(), + }, + "sd a0,100(sp)", + 0x6a13223, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I64), + op: StoreOP::Fsw, + flags: MemFlags::new(), + src: fa0(), + }, + "fsw fa0,100(sp)", + 0x6a12227, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I64), + op: StoreOP::Fsd, + flags: MemFlags::new(), + src: fa0(), + }, + "fsd fa0,100(sp)", + 0x6a13227, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(100), + }, + "addi a0,a0,100", + 0x6450513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slti, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(100), + }, + "slti a0,a0,100", + 0x6452513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::SltiU, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(100), + }, + "sltiu a0,a0,100", + 0x6453513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Xori, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(100), + }, + "xori a0,a0,100", + 0x6454513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(100), + }, + "andi a0,a0,100", + 0x6457513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "slli a0,a0,5", + 0x551513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "srli a0,a0,5", + 0x555513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srai, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "srai a0,a0,5", + 0x40555513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addiw, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(120), + }, + "addiw a0,a0,120", + 0x785051b, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slliw, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "slliw a0,a0,5", + 0x55151b, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::SrliW, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "srliw a0,a0,5", + 0x55551b, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Sraiw, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "sraiw a0,a0,5", + 0x4055551b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Sraiw, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "sraiw a0,a0,5", + 0x4055551b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "add a0,a0,a1", + 0xb50533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sub, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sub a0,a0,a1", + 0x40b50533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sll, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sll a0,a0,a1", + 0xb51533, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Slt, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "slt a0,a0,a1", + 0xb52533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::SltU, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sltu a0,a0,a1", + 0xb53533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Xor, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "xor a0,a0,a1", + 0xb54533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Srl, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "srl a0,a0,a1", + 0xb55533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sra, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sra a0,a0,a1", + 0x40b55533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Or, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "or a0,a0,a1", + 0xb56533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "and a0,a0,a1", + 0xb57533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Addw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "addw a0,a0,a1", + 0xb5053b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Subw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "subw a0,a0,a1", + 0x40b5053b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sllw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sllw a0,a0,a1", + 0xb5153b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Srlw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "srlw a0,a0,a1", + 0xb5553b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sraw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sraw a0,a0,a1", + 0x40b5553b, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Mul, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "mul a0,a0,a1", + 0x2b50533, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Mulh, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "mulh a0,a0,a1", + 0x2b51533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Mulhsu, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "mulhsu a0,a0,a1", + 0x2b52533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Mulhu, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "mulhu a0,a0,a1", + 0x2b53533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Div, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "div a0,a0,a1", + 0x2b54533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::DivU, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "divu a0,a0,a1", + 0x2b55533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Rem, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "rem a0,a0,a1", + 0x2b56533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::RemU, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "remu a0,a0,a1", + 0x2b57533, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Mulw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "mulw a0,a0,a1", + 0x2b5053b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Divw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "divw a0,a0,a1", + 0x2b5453b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Remw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "remw a0,a0,a1", + 0x2b5653b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Remuw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "remuw a0,a0,a1", + 0x2b5753b, + )); + + // + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: Some(FRM::RNE), + alu_op: FpuOPRRR::FaddS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fadd.s fa0,fa0,fa1,rne", + 0xb50553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: Some(FRM::RTZ), + alu_op: FpuOPRRR::FsubS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsub.s fa0,fa0,fa1,rtz", + 0x8b51553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: Some(FRM::RUP), + alu_op: FpuOPRRR::FmulS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmul.s fa0,fa0,fa1,rup", + 0x10b53553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FdivS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fdiv.s fa0,fa0,fa1", + 0x18b57553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnj.s fa0,fa0,fa1", + 0x20b50553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjnS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnjn.s fa0,fa0,fa1", + 0x20b51553, + )); + + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjxS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnjx.s fa0,fa0,fa1", + 0x20b52553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FminS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmin.s fa0,fa0,fa1", + 0x28b50553, + )); + + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FmaxS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmax.s fa0,fa0,fa1", + 0x28b51553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FeqS, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "feq.s a0,fa0,fa1", + 0xa0b52553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FltS, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "flt.s a0,fa0,fa1", + 0xa0b51553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FleS, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "fle.s a0,fa0,fa1", + 0xa0b50553, + )); + + // + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FaddD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fadd.d fa0,fa0,fa1", + 0x2b57553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsubD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsub.d fa0,fa0,fa1", + 0xab57553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FmulD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmul.d fa0,fa0,fa1", + 0x12b57553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FdivD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fdiv.d fa0,fa0,fa1", + 0x1ab57553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnj.d fa0,fa0,fa1", + 0x22b50553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjnD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnjn.d fa0,fa0,fa1", + 0x22b51553, + )); + + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjxD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnjx.d fa0,fa0,fa1", + 0x22b52553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FminD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmin.d fa0,fa0,fa1", + 0x2ab50553, + )); + + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FmaxD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmax.d fa0,fa0,fa1", + 0x2ab51553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FeqD, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "feq.d a0,fa0,fa1", + 0xa2b52553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FltD, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "flt.d a0,fa0,fa1", + 0xa2b51553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FleD, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "fle.d a0,fa0,fa1", + 0xa2b50553, + )); + + // + insns.push(TestUnit::new( + Inst::FpuRR { + frm: Some(FRM::RNE), + alu_op: FpuOPRR::FsqrtS, + rd: writable_fa0(), + rs: fa1(), + }, + "fsqrt.s fa0,fa1,rne", + 0x58058553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtWS, + rd: writable_a0(), + rs: fa1(), + }, + "fcvt.w.s a0,fa1", + 0xc005f553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtWuS, + rd: writable_a0(), + rs: fa1(), + }, + "fcvt.wu.s a0,fa1", + 0xc015f553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FmvXW, + rd: writable_a0(), + rs: fa1(), + }, + "fmv.x.w a0,fa1", + 0xe0058553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FclassS, + rd: writable_a0(), + rs: fa1(), + }, + "fclass.s a0,fa1", + 0xe0059553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtSw, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.s.w fa0,a0", + 0xd0057553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtSwU, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.s.wu fa0,a0", + 0xd0157553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FmvWX, + rd: writable_fa0(), + rs: a0(), + }, + "fmv.w.x fa0,a0", + 0xf0050553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtLS, + rd: writable_a0(), + rs: fa0(), + }, + "fcvt.l.s a0,fa0", + 0xc0257553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtLuS, + rd: writable_a0(), + rs: fa0(), + }, + "fcvt.lu.s a0,fa0", + 0xc0357553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + + alu_op: FpuOPRR::FcvtSL, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.s.l fa0,a0", + 0xd0257553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtSLU, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.s.lu fa0,a0", + 0xd0357553, + )); + + // + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FsqrtD, + rd: writable_fa0(), + rs: fa1(), + }, + "fsqrt.d fa0,fa1", + 0x5a05f553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtWD, + rd: writable_a0(), + rs: fa1(), + }, + "fcvt.w.d a0,fa1", + 0xc205f553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtWuD, + rd: writable_a0(), + rs: fa1(), + }, + "fcvt.wu.d a0,fa1", + 0xc215f553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FmvXD, + rd: writable_a0(), + rs: fa1(), + }, + "fmv.x.d a0,fa1", + 0xe2058553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FclassD, + rd: writable_a0(), + rs: fa1(), + }, + "fclass.d a0,fa1", + 0xe2059553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtSD, + rd: writable_fa0(), + rs: fa0(), + }, + "fcvt.s.d fa0,fa0", + 0x40157553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtDWU, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.d.wu fa0,a0", + 0xd2150553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FmvDX, + rd: writable_fa0(), + rs: a0(), + }, + "fmv.d.x fa0,a0", + 0xf2050553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtLD, + rd: writable_a0(), + rs: fa0(), + }, + "fcvt.l.d a0,fa0", + 0xc2257553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtLuD, + rd: writable_a0(), + rs: fa0(), + }, + "fcvt.lu.d a0,fa0", + 0xc2357553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtDL, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.d.l fa0,a0", + 0xd2257553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtDLu, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.d.lu fa0,a0", + 0xd2357553, + )); + ////////////////////// + + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: Some(FRM::RNE), + alu_op: FpuOPRRRR::FmaddS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fmadd.s fa0,fa0,fa1,fa7,rne", + 0x88b50543, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FmsubS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fmsub.s fa0,fa0,fa1,fa7", + 0x88b57547, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FnmsubS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fnmsub.s fa0,fa0,fa1,fa7", + 0x88b5754b, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FnmaddS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fnmadd.s fa0,fa0,fa1,fa7", + 0x88b5754f, + )); + + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FmaddD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fmadd.d fa0,fa0,fa1,fa7", + 0x8ab57543, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + + alu_op: FpuOPRRRR::FmsubD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fmsub.d fa0,fa0,fa1,fa7", + 0x8ab57547, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FnmsubD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fnmsub.d fa0,fa0,fa1,fa7", + 0x8ab5754b, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FnmaddD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fnmadd.d fa0,fa0,fa1,fa7", + 0x8ab5754f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::LrW, + rd: writable_a0(), + addr: a1(), + src: zero_reg(), + amo: AMO::Relax, + }, + "lr.w a0,(a1)", + 0x1005a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::ScW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Release, + }, + "sc.w.rl a0,a2,(a1)", + 0x1ac5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoswapW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Aquire, + }, + "amoswap.w.aq a0,a2,(a1)", + 0xcc5a52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoaddW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::SeqCst, + }, + "amoadd.w.aqrl a0,a2,(a1)", + 0x6c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoxorW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoxor.w a0,a2,(a1)", + 0x20c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoandW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoand.w a0,a2,(a1)", + 0x60c5a52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoorW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoor.w a0,a2,(a1)", + 0x40c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmominW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomin.w a0,a2,(a1)", + 0x80c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmomaxW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomax.w a0,a2,(a1)", + 0xa0c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmominuW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amominu.w a0,a2,(a1)", + 0xc0c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmomaxuW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomaxu.w a0,a2,(a1)", + 0xe0c5a52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::LrD, + rd: writable_a0(), + addr: a1(), + src: zero_reg(), + amo: AMO::Relax, + }, + "lr.d a0,(a1)", + 0x1005b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::ScD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "sc.d a0,a2,(a1)", + 0x18c5b52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoswapD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoswap.d a0,a2,(a1)", + 0x8c5b52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoaddD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoadd.d a0,a2,(a1)", + 0xc5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoxorD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoxor.d a0,a2,(a1)", + 0x20c5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoandD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoand.d a0,a2,(a1)", + 0x60c5b52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoorD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoor.d a0,a2,(a1)", + 0x40c5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmominD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomin.d a0,a2,(a1)", + 0x80c5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmomaxD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomax.d a0,a2,(a1)", + 0xa0c5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmominuD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amominu.d a0,a2,(a1)", + 0xc0c5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmomaxuD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomaxu.d a0,a2,(a1)", + 0xe0c5b52f, + )); + + ///////// + insns.push(TestUnit::new( + Inst::Fence { + pred: 1, + succ: 1 << 1, + }, + "fence w,r", + 0x120000f, + )); + insns.push(TestUnit::new(Inst::FenceI {}, "fence.i", 0x100f)); + insns.push(TestUnit::new(Inst::ECall {}, "ecall", 0x73)); + insns.push(TestUnit::new(Inst::EBreak {}, "ebreak", 0x100073)); + + insns.push(TestUnit::new( + Inst::FpuRRR { + alu_op: FpuOPRRR::FsgnjS, + frm: None, + rd: writable_fa0(), + rs1: fa1(), + rs2: fa1(), + }, + "fmv.s fa0,fa1", + 0x20b58553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + alu_op: FpuOPRRR::FsgnjD, + frm: None, + rd: writable_fa0(), + rs1: fa1(), + rs2: fa1(), + }, + "fmv.d fa0,fa1", + 0x22b58553, + )); + + insns.push(TestUnit::new( + Inst::FpuRRR { + alu_op: FpuOPRRR::FsgnjnS, + frm: None, + rd: writable_fa0(), + rs1: fa1(), + rs2: fa1(), + }, + "fneg.s fa0,fa1", + 0x20b59553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + alu_op: FpuOPRRR::FsgnjnD, + frm: None, + rd: writable_fa0(), + rs1: fa1(), + rs2: fa1(), + }, + "fneg.d fa0,fa1", + 0x22b59553, + )); + + let (flags, isa_flags) = make_test_flags(); + let emit_info = EmitInfo::new(flags, isa_flags); + + for unit in insns.iter() { + println!("Riscv64: {:?}, {}", unit.inst, unit.assembly); + // Check the printed text is as expected. + let actual_printing = unit + .inst + .print_with_state(&mut EmitState::default(), &mut AllocationConsumer::new(&[])); + assert_eq!(unit.assembly, actual_printing); + let mut buffer = MachBuffer::new(); + unit.inst + .emit(&[], &mut buffer, &emit_info, &mut Default::default()); + let buffer = buffer.finish(&Default::default(), &mut Default::default()); + let actual_encoding = buffer.stringify_code_bytes(); + + assert_eq!(actual_encoding, unit.code.0); + } +} + +fn make_test_flags() -> (settings::Flags, super::super::riscv_settings::Flags) { + let b = settings::builder(); + let flags = settings::Flags::new(b.clone()); + let b2 = super::super::riscv_settings::builder(); + let isa_flags = super::super::riscv_settings::Flags::new(&flags, &b2); + (flags, isa_flags) +} + +#[derive(Debug)] +pub(crate) struct DebugRTypeInst { + op_code: u32, + rd: u32, + funct3: u32, + rs1: u32, + rs2: u32, + funct7: u32, +} + +impl DebugRTypeInst { + pub(crate) fn from_bs(x: &[u8]) -> Option { + if x.len() != 4 { + return None; + } + let a = [x[0], x[1], x[2], x[3]]; + Some(Self::from_u32(u32::from_le_bytes(a))) + } + + pub(crate) fn from_u32(x: u32) -> Self { + let op_code = x & 0b111_1111; + let x = x >> 7; + let rd = x & 0b1_1111; + let x = x >> 5; + let funct3 = x & 0b111; + let x = x >> 3; + let rs1 = x & 0b1_1111; + let x = x >> 5; + let rs2 = x & 0b1_1111; + let x = x >> 5; + let funct7 = x & 0b111_1111; + Self { + op_code, + rd, + funct3, + rs1, + rs2, + funct7, + } + } +} + +#[derive(Debug)] +pub(crate) struct DebugITypeInst { + op_code: u32, + rd: u32, + funct3: u32, + rs: u32, + imm12: u32, + shamt5: u32, + shamt6: u32, + funct7: u32, + funct6: u32, +} + +impl DebugITypeInst { + pub(crate) fn from_bs(x: &[u8]) -> Self { + let a = [x[0], x[1], x[2], x[3]]; + Self::from_u32(u32::from_le_bytes(a)) + } + pub(crate) fn from_u32(x: u32) -> Self { + let op_code = x & 0b111_1111; + let x = x >> 7; + let rd = x & 0b1_1111; + let x = x >> 5; + let funct3 = x & 0b111; + let x = x >> 3; + let rs = x & 0b1_1111; + let x = x >> 5; + let imm12 = x & 0b1111_1111_1111; + let shamt5 = imm12 & 0b1_1111; + let shamt6 = imm12 & 0b11_1111; + let funct7 = imm12 >> 5; + let funct6 = funct7 >> 1; + Self { + op_code, + rd, + funct3, + rs, + imm12, + shamt5, + shamt6, + funct7, + funct6, + } + } + fn print_b(self) { + println!("opcode:{:b}", self.op_code); + println!("rd:{}", self.rd); + println!("funct3:{:b}", self.funct3); + println!("rs:{}", self.rs); + println!("shamt5:{:b}", self.shamt5); + println!("shamt6:{:b}", self.shamt6); + println!("funct6:{:b}", self.funct6); + println!("funct7:{:b}", self.funct7); + } +} + +#[test] +fn xxx() { + let x = 1240847763; + let x = DebugITypeInst::from_u32(x); + x.print_b(); +} + +#[test] +fn zkasm_worst_case_instruction_size() { + let (flags, isa_flags) = make_test_flags(); + let emit_info = EmitInfo::new(flags, isa_flags); + + //there are all candidates potential generate a lot of bytes. + let mut candidates: Vec = vec![]; + + candidates.push(Inst::IntSelect { + dst: vec![writable_a0(), writable_a0()], + ty: I128, + op: IntSelectOP::Smax, + x: ValueRegs::two(x_reg(1), x_reg(2)), + y: ValueRegs::two(x_reg(3), x_reg(4)), + }); + + candidates.push(Inst::FcvtToInt { + rd: writable_a0(), + rs: fa0(), + is_signed: true, + in_type: F64, + out_type: I8, + is_sat: false, + tmp: writable_a1(), + }); + candidates.push(Inst::FcvtToInt { + rd: writable_a0(), + rs: fa0(), + is_signed: true, + in_type: F64, + out_type: I16, + is_sat: false, + tmp: writable_a1(), + }); + candidates.push(Inst::FcvtToInt { + rd: writable_a0(), + rs: fa0(), + is_signed: true, + in_type: F32, + out_type: I8, + is_sat: false, + tmp: writable_a1(), + }); + candidates.push(Inst::FcvtToInt { + rd: writable_a0(), + rs: fa0(), + is_signed: true, + in_type: F32, + out_type: I16, + is_sat: false, + tmp: writable_a1(), + }); + candidates.push(Inst::FcvtToInt { + rd: writable_a0(), + rs: fa0(), + is_signed: true, + in_type: F64, + out_type: I8, + is_sat: false, + tmp: writable_a1(), + }); + candidates.push(Inst::FcvtToInt { + rd: writable_a0(), + rs: fa0(), + is_signed: true, + in_type: F64, + out_type: I16, + is_sat: false, + tmp: writable_a1(), + }); + + candidates.push(Inst::FloatRound { + op: FloatRoundOP::Trunc, + int_tmp: writable_a0(), + f_tmp: writable_a0(), + rd: writable_fa0(), + rs: fa0(), + ty: F64, + }); + + candidates.push(Inst::FloatSelect { + op: FloatSelectOP::Max, + rd: writable_fa0(), + tmp: writable_a0(), + rs1: fa0(), + rs2: fa0(), + ty: F64, + }); + + let mut max: (u32, MInst) = (0, Inst::Nop0); + for i in candidates { + let mut buffer = MachBuffer::new(); + i.emit(&[], &mut buffer, &emit_info, &mut Default::default()); + let buffer = buffer.finish(&Default::default(), &mut Default::default()); + let length = buffer.data().len() as u32; + if length > max.0 { + let length = buffer.data().len() as u32; + max = (length, i.clone()); + } + println!("insn:{:?} length: {}", i, length); + } + println!("calculate max size is {} , inst is {:?}", max.0, max.1); + assert!(max.0 <= Inst::worst_case_size()); +} diff --git a/cranelift/codegen/src/isa/zkasm/inst/encode.rs b/cranelift/codegen/src/isa/zkasm/inst/encode.rs new file mode 100644 index 000000000000..69d18d9bae77 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/encode.rs @@ -0,0 +1,299 @@ +//! Contains the RISC-V instruction encoding logic. +//! +//! These formats are specified in the RISC-V specification in section 2.2. +//! See: https://riscv.org/wp-content/uploads/2017/05/riscv-spec-v2.2.pdf +//! +//! Some instructions especially in extensions have slight variations from +//! the base RISC-V specification. + +use super::{Imm12, Imm5, UImm5, VType}; +use crate::isa::zkasm::inst::reg_to_gpr_num; +use crate::isa::zkasm::lower::isle::generated_code::{ + VecAluOpRImm5, VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, VecAluOpRRRImm5, VecAluOpRRRR, + VecElementWidth, VecOpCategory, VecOpMasking, +}; +use crate::machinst::isle::WritableReg; +use crate::Reg; + +fn unsigned_field_width(value: u32, width: u8) -> u32 { + debug_assert_eq!(value & (!0 << width), 0); + value +} + +/// Layout: +/// 0-------6-7-------11-12------14-15------19-20------24-25-------31 +/// | Opcode | rd | funct3 | rs1 | rs2 | funct7 | +fn encode_r_type_bits(opcode: u32, rd: u32, funct3: u32, rs1: u32, rs2: u32, funct7: u32) -> u32 { + let mut bits = 0; + bits |= unsigned_field_width(opcode, 7); + bits |= unsigned_field_width(rd, 5) << 7; + bits |= unsigned_field_width(funct3, 3) << 12; + bits |= unsigned_field_width(rs1, 5) << 15; + bits |= unsigned_field_width(rs2, 5) << 20; + bits |= unsigned_field_width(funct7, 7) << 25; + bits +} + +/// Encode an R-type instruction. +pub fn encode_r_type( + opcode: u32, + rd: WritableReg, + funct3: u32, + rs1: Reg, + rs2: Reg, + funct7: u32, +) -> u32 { + encode_r_type_bits( + opcode, + reg_to_gpr_num(rd.to_reg()), + funct3, + reg_to_gpr_num(rs1), + reg_to_gpr_num(rs2), + funct7, + ) +} + +/// Encode an I-type instruction. +/// +/// Layout: +/// 0-------6-7-------11-12------14-15------19-20------------------31 +/// | Opcode | rd | width | rs1 | Offset[11:0] | +pub fn encode_i_type(opcode: u32, rd: WritableReg, width: u32, rs1: Reg, offset: Imm12) -> u32 { + let mut bits = 0; + bits |= unsigned_field_width(opcode, 7); + bits |= reg_to_gpr_num(rd.to_reg()) << 7; + bits |= unsigned_field_width(width, 3) << 12; + bits |= reg_to_gpr_num(rs1) << 15; + bits |= unsigned_field_width(offset.as_u32(), 12) << 20; + bits +} + +/// Encode an S-type instruction. +/// +/// Layout: +/// 0-------6-7-------11-12------14-15------19-20---24-25-------------31 +/// | Opcode | imm[4:0] | width | base | src | imm[11:5] | +pub fn encode_s_type(opcode: u32, width: u32, base: Reg, src: Reg, offset: Imm12) -> u32 { + let mut bits = 0; + bits |= unsigned_field_width(opcode, 7); + bits |= (offset.as_u32() & 0b11111) << 7; + bits |= unsigned_field_width(width, 3) << 12; + bits |= reg_to_gpr_num(base) << 15; + bits |= reg_to_gpr_num(src) << 20; + bits |= unsigned_field_width(offset.as_u32() >> 5, 7) << 25; + bits +} + +/// Encodes a Vector ALU instruction. +/// +/// Fields: +/// - opcode (7 bits) +/// - vd (5 bits) +/// - funct3 (3 bits) +/// - vs1 (5 bits) +/// - vs2 (5 bits) +/// - vm (1 bit) +/// - funct6 (6 bits) +/// +/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc +pub fn encode_valu( + op: VecAluOpRRR, + vd: WritableReg, + vs1: Reg, + vs2: Reg, + masking: VecOpMasking, +) -> u32 { + let funct7 = (op.funct6() << 1) | masking.encode(); + encode_r_type_bits( + op.opcode(), + reg_to_gpr_num(vd.to_reg()), + op.funct3(), + reg_to_gpr_num(vs1), + reg_to_gpr_num(vs2), + funct7, + ) +} + +/// Encodes a Vector ALU+Imm instruction. +/// This is just a Vector ALU instruction with an immediate in the VS1 field. +/// +/// Fields: +/// - opcode (7 bits) +/// - vd (5 bits) +/// - funct3 (3 bits) +/// - imm (5 bits) +/// - vs2 (5 bits) +/// - vm (1 bit) +/// - funct6 (6 bits) +/// +/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc +pub fn encode_valu_rr_imm( + op: VecAluOpRRImm5, + vd: WritableReg, + imm: Imm5, + vs2: Reg, + masking: VecOpMasking, +) -> u32 { + let funct7 = (op.funct6() << 1) | masking.encode(); + let imm = imm.bits() as u32; + encode_r_type_bits( + op.opcode(), + reg_to_gpr_num(vd.to_reg()), + op.funct3(), + imm, + reg_to_gpr_num(vs2), + funct7, + ) +} + +pub fn encode_valu_rrrr( + op: VecAluOpRRRR, + vd: WritableReg, + vs2: Reg, + vs1: Reg, + masking: VecOpMasking, +) -> u32 { + let funct7 = (op.funct6() << 1) | masking.encode(); + encode_r_type_bits( + op.opcode(), + reg_to_gpr_num(vd.to_reg()), + op.funct3(), + reg_to_gpr_num(vs1), + reg_to_gpr_num(vs2), + funct7, + ) +} + +pub fn encode_valu_rrr_imm( + op: VecAluOpRRRImm5, + vd: WritableReg, + imm: Imm5, + vs2: Reg, + masking: VecOpMasking, +) -> u32 { + let funct7 = (op.funct6() << 1) | masking.encode(); + let imm = imm.bits() as u32; + encode_r_type_bits( + op.opcode(), + reg_to_gpr_num(vd.to_reg()), + op.funct3(), + imm, + reg_to_gpr_num(vs2), + funct7, + ) +} + +pub fn encode_valu_rr(op: VecAluOpRR, vd: WritableReg, vs: Reg, masking: VecOpMasking) -> u32 { + let funct7 = (op.funct6() << 1) | masking.encode(); + + let (vs1, vs2) = if op.vs_is_vs2_encoded() { + (op.aux_encoding(), reg_to_gpr_num(vs)) + } else { + (reg_to_gpr_num(vs), op.aux_encoding()) + }; + + encode_r_type_bits( + op.opcode(), + reg_to_gpr_num(vd.to_reg()), + op.funct3(), + vs1, + vs2, + funct7, + ) +} + +pub fn encode_valu_r_imm( + op: VecAluOpRImm5, + vd: WritableReg, + imm: Imm5, + masking: VecOpMasking, +) -> u32 { + let funct7 = (op.funct6() << 1) | masking.encode(); + + // This is true for this opcode, not sure if there are any other ones. + debug_assert_eq!(op, VecAluOpRImm5::VmvVI); + let vs1 = imm.bits() as u32; + let vs2 = op.aux_encoding(); + + encode_r_type_bits( + op.opcode(), + reg_to_gpr_num(vd.to_reg()), + op.funct3(), + vs1, + vs2, + funct7, + ) +} + +/// Encodes a Vector CFG Imm instruction. +/// +/// See: https://github.com/riscv/riscv-v-spec/blob/master/vcfg-format.adoc +// TODO: Check if this is any of the known instruction types in the spec. +pub fn encode_vcfg_imm(opcode: u32, rd: Reg, imm: UImm5, vtype: &VType) -> u32 { + let mut bits = 0; + bits |= unsigned_field_width(opcode, 7); + bits |= reg_to_gpr_num(rd) << 7; + bits |= VecOpCategory::OPCFG.encode() << 12; + bits |= unsigned_field_width(imm.bits(), 5) << 15; + bits |= unsigned_field_width(vtype.encode(), 10) << 20; + bits |= 0b11 << 30; + bits +} + +/// Encodes a Vector Mem Unit Stride Load instruction. +/// +/// See: https://github.com/riscv/riscv-v-spec/blob/master/vmem-format.adoc +/// TODO: These instructions share opcode space with LOAD-FP and STORE-FP +pub fn encode_vmem_load( + opcode: u32, + vd: Reg, + width: VecElementWidth, + rs1: Reg, + lumop: u32, + masking: VecOpMasking, + mop: u32, + nf: u32, +) -> u32 { + // Width is encoded differently to avoid a clash with the FP load/store sizes. + let width = match width { + VecElementWidth::E8 => 0b000, + VecElementWidth::E16 => 0b101, + VecElementWidth::E32 => 0b110, + VecElementWidth::E64 => 0b111, + }; + + let mut bits = 0; + bits |= unsigned_field_width(opcode, 7); + bits |= reg_to_gpr_num(vd) << 7; + bits |= width << 12; + bits |= reg_to_gpr_num(rs1) << 15; + bits |= unsigned_field_width(lumop, 5) << 20; + bits |= masking.encode() << 25; + bits |= unsigned_field_width(mop, 2) << 26; + + // The mew bit (inst[28]) when set is expected to be used to encode expanded + // memory sizes of 128 bits and above, but these encodings are currently reserved. + bits |= 0b0 << 28; + + bits |= unsigned_field_width(nf, 3) << 29; + bits +} + +/// Encodes a Vector Mem Unit Stride Load instruction. +/// +/// See: https://github.com/riscv/riscv-v-spec/blob/master/vmem-format.adoc +/// TODO: These instructions share opcode space with LOAD-FP and STORE-FP +pub fn encode_vmem_store( + opcode: u32, + vs3: Reg, + width: VecElementWidth, + rs1: Reg, + sumop: u32, + masking: VecOpMasking, + mop: u32, + nf: u32, +) -> u32 { + // This is pretty much the same as the load instruction, just + // with different names on the fields. + encode_vmem_load(opcode, vs3, width, rs1, sumop, masking, mop, nf) +} diff --git a/cranelift/codegen/src/isa/zkasm/inst/imms.rs b/cranelift/codegen/src/isa/zkasm/inst/imms.rs new file mode 100644 index 000000000000..2f9b544b15ed --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/imms.rs @@ -0,0 +1,250 @@ +//! Riscv64 ISA definitions: immediate constants. + +// Some variants are never constructed, but we still want them as options in the future. +use super::Inst; +#[allow(dead_code)] +use std::fmt::{Debug, Display, Formatter, Result}; + +#[derive(Copy, Clone, Debug, Default)] +pub struct Imm12 { + pub bits: i16, +} + +impl Imm12 { + pub(crate) const FALSE: Self = Self { bits: 0 }; + pub(crate) const TRUE: Self = Self { bits: 1 }; + pub fn maybe_from_u64(val: u64) -> Option { + let sign_bit = 1 << 11; + if val == 0 { + Some(Imm12 { bits: 0 }) + } else if (val & sign_bit) != 0 && (val >> 12) == 0xffff_ffff_ffff_f { + Some(Imm12 { + bits: (val & 0xffff) as i16, + }) + } else if (val & sign_bit) == 0 && (val >> 12) == 0 { + Some(Imm12 { + bits: (val & 0xffff) as i16, + }) + } else { + None + } + } + #[inline] + pub fn from_bits(bits: i16) -> Self { + Self { bits: bits & 0xfff } + } + /// Create a zero immediate of this format. + #[inline] + pub fn zero() -> Self { + Imm12 { bits: 0 } + } + #[inline] + pub fn as_i16(self) -> i16 { + self.bits + } + #[inline] + pub fn as_u32(&self) -> u32 { + (self.bits as u32) & 0xfff + } +} + +impl Into for Imm12 { + fn into(self) -> i64 { + self.bits as i64 + } +} + +impl Display for Imm12 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{:+}", self.bits) + } +} + +impl std::ops::Neg for Imm12 { + type Output = Self; + fn neg(self) -> Self::Output { + Self { bits: -self.bits } + } +} + +// singed +#[derive(Clone, Copy, Default)] +pub struct Imm20 { + /// The immediate bits. + pub bits: i32, +} + +impl Imm20 { + #[inline] + pub fn from_bits(bits: i32) -> Self { + Self { + bits: bits & 0xf_ffff, + } + } + #[inline] + pub fn as_u32(&self) -> u32 { + (self.bits as u32) & 0xf_ffff + } +} + +impl Debug for Imm20 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{}", self.bits) + } +} + +impl Display for Imm20 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{}", self.bits) + } +} + +/// An unsigned 5-bit immediate. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct UImm5 { + value: u8, +} + +impl UImm5 { + /// Create an unsigned 5-bit immediate from u8. + pub fn maybe_from_u8(value: u8) -> Option { + if value < 32 { + Some(UImm5 { value }) + } else { + None + } + } + + /// Bits for encoding. + pub fn bits(&self) -> u32 { + u32::from(self.value) + } +} + +impl Display for UImm5 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{}", self.value) + } +} + +/// A Signed 5-bit immediate. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Imm5 { + value: i8, +} + +impl Imm5 { + /// Create an signed 5-bit immediate from an i8. + pub fn maybe_from_i8(value: i8) -> Option { + if value >= -16 && value <= 15 { + Some(Imm5 { value }) + } else { + None + } + } + + pub fn from_bits(value: u8) -> Imm5 { + assert_eq!(value & 0x1f, value); + let signed = ((value << 3) as i8) >> 3; + Imm5 { value: signed } + } + + /// Bits for encoding. + pub fn bits(&self) -> u8 { + self.value as u8 & 0x1f + } +} + +impl Display for Imm5 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{}", self.value) + } +} + +impl Inst { + pub(crate) fn imm_min() -> i64 { + let imm20_max: i64 = (1 << 19) << 12; + let imm12_max = 1 << 11; + -imm20_max - imm12_max + } + pub(crate) fn imm_max() -> i64 { + let imm20_max: i64 = ((1 << 19) - 1) << 12; + let imm12_max = (1 << 11) - 1; + imm20_max + imm12_max + } + + /// An imm20 immediate and an Imm12 immediate can generate a 32-bit immediate. + /// This helper produces an imm12, imm20, or both to generate the value. + /// + /// `value` must be between `imm_min()` and `imm_max()`, or else + /// this helper returns `None`. + pub(crate) fn generate_imm( + value: u64, + mut handle_imm: impl FnMut(Option, Option) -> R, + ) -> Option { + if let Some(imm12) = Imm12::maybe_from_u64(value) { + // can be load using single imm12. + let r = handle_imm(None, Some(imm12)); + return Some(r); + } + let value = value as i64; + if !(value >= Self::imm_min() && value <= Self::imm_max()) { + // not in range, return None. + return None; + } + const MOD_NUM: i64 = 4096; + let (imm20, imm12) = if value > 0 { + let mut imm20 = value / MOD_NUM; + let mut imm12 = value % MOD_NUM; + if imm12 >= 2048 { + imm12 -= MOD_NUM; + imm20 += 1; + } + assert!(imm12 >= -2048 && imm12 <= 2047); + (imm20, imm12) + } else { + // this is the abs value. + let value_abs = value.abs(); + let imm20 = value_abs / MOD_NUM; + let imm12 = value_abs % MOD_NUM; + let mut imm20 = -imm20; + let mut imm12 = -imm12; + if imm12 < -2048 { + imm12 += MOD_NUM; + imm20 -= 1; + } + (imm20, imm12) + }; + assert!(imm20 >= -(0x7_ffff + 1) && imm20 <= 0x7_ffff); + assert!(imm20 != 0 || imm12 != 0); + Some(handle_imm( + if imm20 != 0 { + Some(Imm20::from_bits(imm20 as i32)) + } else { + None + }, + if imm12 != 0 { + Some(Imm12::from_bits(imm12 as i16)) + } else { + None + }, + )) + } +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn test_imm12() { + let x = Imm12::zero(); + assert_eq!(0, x.as_u32()); + Imm12::maybe_from_u64(0xffff_ffff_ffff_ffff).unwrap(); + } + + #[test] + fn imm20_and_imm12() { + assert!(Inst::imm_max() == (i32::MAX - 2048) as i64); + assert!(Inst::imm_min() == i32::MIN as i64 - 2048); + } +} diff --git a/cranelift/codegen/src/isa/zkasm/inst/mod.rs b/cranelift/codegen/src/isa/zkasm/inst/mod.rs new file mode 100644 index 000000000000..597d9d1cf22c --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/mod.rs @@ -0,0 +1,2140 @@ +//! This module defines zkasm-specific machine instruction types. + +// Some variants are not constructed, but we still want them as options in the future. +#![allow(dead_code)] +#![allow(non_camel_case_types)] + +use super::lower::isle::generated_code::{VecAMode, VecElementWidth, VecOpMasking}; +use crate::binemit::{Addend, CodeOffset, Reloc}; +pub use crate::ir::condcodes::IntCC; +use crate::ir::types::{self, F32, F64, I128, I16, I32, I64, I8, I8X16, R32, R64}; + +pub use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, Type, ValueLabel}; +use crate::isa::{CallConv, FunctionAlignment}; +use crate::machinst::*; +use crate::{settings, CodegenError, CodegenResult}; + +pub use crate::ir::condcodes::FloatCC; + +use alloc::vec::Vec; +use regalloc2::{PRegSet, RegClass, VReg}; +use smallvec::{smallvec, SmallVec}; +use std::boxed::Box; +use std::fmt::Write; +use std::string::{String, ToString}; + +pub mod regs; +pub use self::regs::*; +pub mod imms; +pub use self::imms::*; +pub mod args; +pub use self::args::*; +pub mod emit; +pub use self::emit::*; +pub mod vector; +pub use self::vector::*; +pub mod encode; +pub use self::encode::*; +pub mod unwind; + +use crate::isa::zkasm::abi::Riscv64MachineDeps; + +#[cfg(test)] +mod emit_tests; + +use std::fmt::{Display, Formatter}; + +pub(crate) type OptionReg = Option; +pub(crate) type OptionImm12 = Option; +pub(crate) type VecBranchTarget = Vec; +pub(crate) type OptionUimm5 = Option; +pub(crate) type OptionFloatRoundingMode = Option; +pub(crate) type VecU8 = Vec; +pub(crate) type VecWritableReg = Vec>; +//============================================================================= +// Instructions (top level): definition + +pub use crate::isa::zkasm::lower::isle::generated_code::{ + AluOPRRI, AluOPRRR, AtomicOP, FClassResult, FFlagsException, FloatRoundOP, FloatSelectOP, + FpuOPRR, FpuOPRRR, FpuOPRRRR, IntSelectOP, LoadOP, MInst as Inst, StoreOP, FRM, +}; +use crate::isa::zkasm::lower::isle::generated_code::{MInst, VecAluOpRRImm5, VecAluOpRRR}; + +type BoxCallInfo = Box; +type BoxCallIndInfo = Box; +type BoxReturnCallInfo = Box; + +/// Additional information for (direct) Call instructions, left out of line to lower the size of +/// the Inst enum. +#[derive(Clone, Debug)] +pub struct CallInfo { + pub dest: ExternalName, + pub uses: CallArgList, + pub defs: CallRetList, + pub opcode: Opcode, + pub caller_callconv: CallConv, + pub callee_callconv: CallConv, + pub clobbers: PRegSet, + pub callee_pop_size: u32, +} + +/// Additional information for CallInd instructions, left out of line to lower the size of the Inst +/// enum. +#[derive(Clone, Debug)] +pub struct CallIndInfo { + pub rn: Reg, + pub uses: CallArgList, + pub defs: CallRetList, + pub opcode: Opcode, + pub caller_callconv: CallConv, + pub callee_callconv: CallConv, + pub clobbers: PRegSet, + pub callee_pop_size: u32, +} + +/// Additional information for `return_call[_ind]` instructions, left out of +/// line to lower the size of the `Inst` enum. +#[derive(Clone, Debug)] +pub struct ReturnCallInfo { + pub uses: CallArgList, + pub opcode: Opcode, + pub old_stack_arg_size: u32, + pub new_stack_arg_size: u32, +} + +/// A branch target. Either unresolved (basic-block index) or resolved (offset +/// from end of current instruction). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum BranchTarget { + /// An unresolved reference to a Label, as passed into + /// `lower_branch_group()`. + Label(MachLabel), + /// A fixed PC offset. + ResolvedOffset(i32), +} + +impl BranchTarget { + /// Return the target's label, if it is a label-based target. + pub(crate) fn as_label(self) -> Option { + match self { + BranchTarget::Label(l) => Some(l), + _ => None, + } + } + /// offset zero. + #[inline] + pub(crate) fn zero() -> Self { + Self::ResolvedOffset(0) + } + #[inline] + pub(crate) fn offset(off: i32) -> Self { + Self::ResolvedOffset(off) + } + #[inline] + pub(crate) fn is_zero(self) -> bool { + match self { + BranchTarget::Label(_) => false, + BranchTarget::ResolvedOffset(off) => off == 0, + } + } + #[inline] + pub(crate) fn as_offset(self) -> Option { + match self { + BranchTarget::Label(_) => None, + BranchTarget::ResolvedOffset(off) => Some(off), + } + } +} + +impl Display for BranchTarget { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + BranchTarget::Label(l) => write!(f, "{}", l.to_string()), + BranchTarget::ResolvedOffset(off) => write!(f, "{}", off), + } + } +} + +pub(crate) fn enc_auipc(rd: Writable, imm: Imm20) -> u32 { + let x = 0b0010111 | reg_to_gpr_num(rd.to_reg()) << 7 | imm.as_u32() << 12; + x +} + +pub(crate) fn enc_jalr(rd: Writable, base: Reg, offset: Imm12) -> u32 { + let x = 0b1100111 + | reg_to_gpr_num(rd.to_reg()) << 7 + | 0b000 << 12 + | reg_to_gpr_num(base) << 15 + | offset.as_u32() << 20; + x +} + +/// rd and src must have the same length. +pub(crate) fn gen_moves(rd: &[Writable], src: &[Reg]) -> SmallInstVec { + assert!(rd.len() == src.len()); + assert!(rd.len() > 0); + let mut insts = SmallInstVec::new(); + for (dst, src) in rd.iter().zip(src.iter()) { + let ty = Inst::canonical_type_for_rc(dst.to_reg().class()); + insts.push(Inst::gen_move(*dst, *src, ty)); + } + insts +} + +impl Inst { + const INSTRUCTION_SIZE: i32 = 4; + + #[inline] + pub(crate) fn load_imm12(rd: Writable, imm: Imm12) -> Inst { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd, + rs: zero_reg(), + imm12: imm, + } + } + + /// Immediates can be loaded using lui and addi instructions. + fn load_const_imm Writable>( + rd: Writable, + value: u64, + alloc_tmp: &mut F, + ) -> Option> { + Inst::generate_imm(value, |imm20, imm12| { + let mut insts = SmallVec::new(); + + let rs = if let Some(imm) = imm20 { + let rd = if imm12.is_some() { alloc_tmp(I64) } else { rd }; + insts.push(Inst::Lui { rd, imm }); + rd.to_reg() + } else { + zero_reg() + }; + + if let Some(imm12) = imm12 { + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd, + rs, + imm12, + }) + } + + insts + }) + } + + pub(crate) fn load_constant_u32 Writable>( + rd: Writable, + value: u64, + alloc_tmp: &mut F, + ) -> SmallInstVec { + let insts = Inst::load_const_imm(rd, value, alloc_tmp); + insts.unwrap_or_else(|| { + smallvec![Inst::LoadConst32 { + rd, + imm: value as u32 + }] + }) + } + + pub fn load_constant_u64 Writable>( + rd: Writable, + value: u64, + alloc_tmp: &mut F, + ) -> SmallInstVec { + let insts = Inst::load_const_imm(rd, value, alloc_tmp); + insts.unwrap_or_else(|| smallvec![Inst::LoadConst64 { rd, imm: value }]) + } + + pub(crate) fn construct_auipc_and_jalr( + link: Option>, + tmp: Writable, + offset: i64, + ) -> [Inst; 2] { + Inst::generate_imm(offset as u64, |imm20, imm12| { + let a = Inst::Auipc { + rd: tmp, + imm: imm20.unwrap_or_default(), + }; + let b = Inst::Jalr { + rd: link.unwrap_or(writable_zero_reg()), + base: tmp.to_reg(), + offset: imm12.unwrap_or_default(), + }; + [a, b] + }) + .expect("code range is too big.") + } + + /// Create instructions that load a 32-bit floating-point constant. + pub fn load_fp_constant32 Writable>( + rd: Writable, + const_data: u32, + mut alloc_tmp: F, + ) -> SmallVec<[Inst; 4]> { + let mut insts = SmallVec::new(); + let tmp = alloc_tmp(I64); + insts.extend(Self::load_constant_u32( + tmp, + const_data as u64, + &mut alloc_tmp, + )); + insts.push(Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::move_x_to_f_op(F32), + rd, + rs: tmp.to_reg(), + }); + insts + } + + /// Create instructions that load a 64-bit floating-point constant. + pub fn load_fp_constant64 Writable>( + rd: Writable, + const_data: u64, + mut alloc_tmp: F, + ) -> SmallVec<[Inst; 4]> { + let mut insts = SmallInstVec::new(); + let tmp = alloc_tmp(I64); + insts.extend(Self::load_constant_u64(tmp, const_data, &mut alloc_tmp)); + insts.push(Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::move_x_to_f_op(F64), + rd, + rs: tmp.to_reg(), + }); + insts + } + + /// Generic constructor for a load (zero-extending where appropriate). + pub fn gen_load(into_reg: Writable, mem: AMode, ty: Type, flags: MemFlags) -> Inst { + if ty.is_vector() { + Inst::VecLoad { + eew: VecElementWidth::from_type(ty), + to: into_reg, + from: VecAMode::UnitStride { base: mem }, + flags, + mask: VecOpMasking::Disabled, + vstate: VState::from_type(ty), + } + } else { + Inst::Load { + rd: into_reg, + op: LoadOP::from_type(ty), + from: mem, + flags, + } + } + } + + /// Generic constructor for a store. + pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst { + if ty.is_vector() { + Inst::VecStore { + eew: VecElementWidth::from_type(ty), + to: VecAMode::UnitStride { base: mem }, + from: from_reg, + flags, + mask: VecOpMasking::Disabled, + vstate: VState::from_type(ty), + } + } else { + Inst::Store { + src: from_reg, + op: StoreOP::from_type(ty), + to: mem, + flags, + } + } + } +} + +//============================================================================= + +fn vec_mask_operands VReg>( + mask: &VecOpMasking, + collector: &mut OperandCollector<'_, F>, +) { + match mask { + VecOpMasking::Enabled { reg } => { + collector.reg_fixed_use(*reg, pv_reg(0).into()); + } + VecOpMasking::Disabled => {} + } +} +fn vec_mask_late_operands VReg>( + mask: &VecOpMasking, + collector: &mut OperandCollector<'_, F>, +) { + match mask { + VecOpMasking::Enabled { reg } => { + collector.reg_fixed_late_use(*reg, pv_reg(0).into()); + } + VecOpMasking::Disabled => {} + } +} + +fn zkasm_get_operands VReg>(inst: &Inst, collector: &mut OperandCollector<'_, F>) { + match inst { + &Inst::Nop0 => {} + &Inst::Nop4 => {} + &Inst::BrTable { + index, tmp1, tmp2, .. + } => { + collector.reg_use(index); + collector.reg_early_def(tmp1); + collector.reg_early_def(tmp2); + } + &Inst::Auipc { rd, .. } => collector.reg_def(rd), + &Inst::Lui { rd, .. } => collector.reg_def(rd), + &Inst::LoadConst32 { rd, .. } => collector.reg_def(rd), + &Inst::LoadConst64 { rd, .. } => collector.reg_def(rd), + &Inst::AluRRR { rd, rs1, rs2, .. } => { + collector.reg_use(rs1); + collector.reg_use(rs2); + collector.reg_def(rd); + } + &Inst::FpuRRR { rd, rs1, rs2, .. } => { + collector.reg_use(rs1); + collector.reg_use(rs2); + collector.reg_def(rd); + } + &Inst::AluRRImm12 { rd, rs, .. } => { + collector.reg_use(rs); + collector.reg_def(rd); + } + &Inst::Load { rd, from, .. } => { + if let Some(r) = from.get_allocatable_register() { + collector.reg_use(r); + } + collector.reg_def(rd); + } + &Inst::Store { to, src, .. } => { + if let Some(r) = to.get_allocatable_register() { + collector.reg_use(r); + } + collector.reg_use(src); + } + + &Inst::Args { ref args } => { + for arg in args { + collector.reg_fixed_def(arg.vreg, arg.preg); + } + } + &Inst::Ret { ref rets, .. } => { + for ret in rets { + collector.reg_fixed_use(ret.vreg, ret.preg); + } + } + + &Inst::Extend { rd, rn, .. } => { + collector.reg_use(rn); + collector.reg_def(rd); + } + &Inst::AdjustSp { .. } => {} + &Inst::Call { ref info } => { + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + for d in &info.defs { + collector.reg_fixed_def(d.vreg, d.preg); + } + collector.reg_clobbers(info.clobbers); + } + &Inst::CallInd { ref info } => { + if info.callee_callconv == CallConv::Tail { + // TODO(https://github.com/bytecodealliance/regalloc2/issues/145): + // This shouldn't be a fixed register constraint. + collector.reg_fixed_use(info.rn, x_reg(5)); + } else { + collector.reg_use(info.rn); + } + + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + for d in &info.defs { + collector.reg_fixed_def(d.vreg, d.preg); + } + collector.reg_clobbers(info.clobbers); + } + &Inst::ReturnCall { + callee: _, + ref info, + } => { + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + } + &Inst::ReturnCallInd { ref info, callee } => { + collector.reg_use(callee); + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + } + &Inst::TrapIf { test, .. } => { + collector.reg_use(test); + } + &Inst::Jal { .. } => {} + &Inst::CondBr { kind, .. } => { + collector.reg_use(kind.rs1); + collector.reg_use(kind.rs2); + } + &Inst::LoadExtName { rd, .. } => { + collector.reg_def(rd); + } + &Inst::LoadAddr { rd, mem } => { + if let Some(r) = mem.get_allocatable_register() { + collector.reg_use(r); + } + collector.reg_early_def(rd); + } + + &Inst::VirtualSPOffsetAdj { .. } => {} + &Inst::Mov { rd, rm, .. } => { + collector.reg_use(rm); + collector.reg_def(rd); + } + &Inst::MovFromPReg { rd, rm } => { + debug_assert!([px_reg(2), px_reg(8)].contains(&rm)); + collector.reg_def(rd); + } + &Inst::Fence { .. } => {} + &Inst::FenceI => {} + &Inst::ECall => {} + &Inst::EBreak => {} + &Inst::Udf { .. } => {} + &Inst::FpuRR { rd, rs, .. } => { + collector.reg_use(rs); + collector.reg_def(rd); + } + &Inst::FpuRRRR { + rd, rs1, rs2, rs3, .. + } => { + collector.reg_uses(&[rs1, rs2, rs3]); + collector.reg_def(rd); + } + + &Inst::Jalr { rd, base, .. } => { + collector.reg_use(base); + collector.reg_def(rd); + } + &Inst::Atomic { rd, addr, src, .. } => { + collector.reg_use(addr); + collector.reg_use(src); + collector.reg_def(rd); + } + &Inst::Select { + ref dst, + condition, + x, + y, + .. + } => { + collector.reg_use(condition); + collector.reg_uses(x.regs()); + collector.reg_uses(y.regs()); + for d in dst.iter() { + collector.reg_early_def(d.clone()); + } + } + &Inst::AtomicCas { + offset, + t0, + dst, + e, + addr, + v, + .. + } => { + collector.reg_uses(&[offset, e, addr, v]); + collector.reg_early_def(t0); + collector.reg_early_def(dst); + } + &Inst::IntSelect { + ref dst, + ref x, + ref y, + .. + } => { + collector.reg_uses(x.regs()); + collector.reg_uses(y.regs()); + for d in dst.iter() { + collector.reg_early_def(d.clone()); + } + } + + &Inst::Icmp { rd, a, b, .. } => { + collector.reg_uses(a.regs()); + collector.reg_uses(b.regs()); + collector.reg_def(rd); + } + + &Inst::SelectReg { + rd, + rs1, + rs2, + condition, + } => { + collector.reg_use(condition.rs1); + collector.reg_use(condition.rs2); + collector.reg_use(rs1); + collector.reg_use(rs2); + collector.reg_def(rd); + } + &Inst::FcvtToInt { rd, rs, tmp, .. } => { + collector.reg_use(rs); + collector.reg_early_def(tmp); + collector.reg_early_def(rd); + } + &Inst::RawData { .. } => {} + &Inst::AtomicStore { src, p, .. } => { + collector.reg_use(src); + collector.reg_use(p); + } + &Inst::AtomicLoad { rd, p, .. } => { + collector.reg_use(p); + collector.reg_def(rd); + } + &Inst::AtomicRmwLoop { + offset, + dst, + p, + x, + t0, + .. + } => { + collector.reg_uses(&[offset, p, x]); + collector.reg_early_def(t0); + collector.reg_early_def(dst); + } + &Inst::TrapIfC { rs1, rs2, .. } => { + collector.reg_use(rs1); + collector.reg_use(rs2); + } + &Inst::Unwind { .. } => {} + &Inst::DummyUse { reg } => { + collector.reg_use(reg); + } + &Inst::FloatRound { + rd, + int_tmp, + f_tmp, + rs, + .. + } => { + collector.reg_use(rs); + collector.reg_early_def(int_tmp); + collector.reg_early_def(f_tmp); + collector.reg_early_def(rd); + } + &Inst::FloatSelect { + rd, tmp, rs1, rs2, .. + } => { + collector.reg_uses(&[rs1, rs2]); + collector.reg_early_def(tmp); + collector.reg_early_def(rd); + } + &Inst::Popcnt { + sum, step, rs, tmp, .. + } => { + collector.reg_use(rs); + collector.reg_early_def(tmp); + collector.reg_early_def(step); + collector.reg_early_def(sum); + } + &Inst::Rev8 { rs, rd, tmp, step } => { + collector.reg_use(rs); + collector.reg_early_def(tmp); + collector.reg_early_def(step); + collector.reg_early_def(rd); + } + &Inst::Cltz { + sum, step, tmp, rs, .. + } => { + collector.reg_use(rs); + collector.reg_early_def(tmp); + collector.reg_early_def(step); + collector.reg_early_def(sum); + } + &Inst::Brev8 { + rs, + rd, + step, + tmp, + tmp2, + .. + } => { + collector.reg_use(rs); + collector.reg_early_def(step); + collector.reg_early_def(tmp); + collector.reg_early_def(tmp2); + collector.reg_early_def(rd); + } + &Inst::StackProbeLoop { .. } => { + // StackProbeLoop has a tmp register and StackProbeLoop used at gen_prologue. + // t3 will do the job. (t3 is caller-save register and not used directly by compiler like writable_spilltmp_reg) + // gen_prologue is called at emit stage. + // no need let reg alloc know. + } + &Inst::VecAluRRRR { + op, + vd, + vd_src, + vs1, + vs2, + ref mask, + .. + } => { + debug_assert_eq!(vd_src.class(), RegClass::Vector); + debug_assert_eq!(vd.to_reg().class(), RegClass::Vector); + debug_assert_eq!(vs2.class(), RegClass::Vector); + debug_assert_eq!(vs1.class(), op.vs1_regclass()); + + collector.reg_late_use(vs1); + collector.reg_late_use(vs2); + collector.reg_use(vd_src); + collector.reg_reuse_def(vd, 2); // `vd` == `vd_src`. + vec_mask_late_operands(mask, collector); + } + &Inst::VecAluRRRImm5 { + op, + vd, + vd_src, + vs2, + ref mask, + .. + } => { + debug_assert_eq!(vd_src.class(), RegClass::Vector); + debug_assert_eq!(vd.to_reg().class(), RegClass::Vector); + debug_assert_eq!(vs2.class(), RegClass::Vector); + + // If the operation forbids source/destination overlap we need to + // ensure that the source and destination registers are different. + if op.forbids_src_dst_overlaps() { + collector.reg_late_use(vs2); + collector.reg_use(vd_src); + collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`. + vec_mask_late_operands(mask, collector); + } else { + collector.reg_use(vs2); + collector.reg_use(vd_src); + collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`. + vec_mask_operands(mask, collector); + } + } + &Inst::VecAluRRR { + op, + vd, + vs1, + vs2, + ref mask, + .. + } => { + debug_assert_eq!(vd.to_reg().class(), RegClass::Vector); + debug_assert_eq!(vs2.class(), RegClass::Vector); + debug_assert_eq!(vs1.class(), op.vs1_regclass()); + + collector.reg_use(vs1); + collector.reg_use(vs2); + + // If the operation forbids source/destination overlap, then we must + // register it as an early_def. This encodes the constraint that + // these must not overlap. + if op.forbids_src_dst_overlaps() { + collector.reg_early_def(vd); + } else { + collector.reg_def(vd); + } + + vec_mask_operands(mask, collector); + } + &Inst::VecAluRRImm5 { + op, + vd, + vs2, + ref mask, + .. + } => { + debug_assert_eq!(vd.to_reg().class(), RegClass::Vector); + debug_assert_eq!(vs2.class(), RegClass::Vector); + + collector.reg_use(vs2); + + // If the operation forbids source/destination overlap, then we must + // register it as an early_def. This encodes the constraint that + // these must not overlap. + if op.forbids_src_dst_overlaps() { + collector.reg_early_def(vd); + } else { + collector.reg_def(vd); + } + + vec_mask_operands(mask, collector); + } + &Inst::VecAluRR { + op, + vd, + vs, + ref mask, + .. + } => { + debug_assert_eq!(vd.to_reg().class(), op.dst_regclass()); + debug_assert_eq!(vs.class(), op.src_regclass()); + + collector.reg_use(vs); + + // If the operation forbids source/destination overlap, then we must + // register it as an early_def. This encodes the constraint that + // these must not overlap. + if op.forbids_src_dst_overlaps() { + collector.reg_early_def(vd); + } else { + collector.reg_def(vd); + } + + vec_mask_operands(mask, collector); + } + &Inst::VecAluRImm5 { vd, ref mask, .. } => { + debug_assert_eq!(vd.to_reg().class(), RegClass::Vector); + + collector.reg_def(vd); + vec_mask_operands(mask, collector); + } + &Inst::VecSetState { rd, .. } => { + collector.reg_def(rd); + } + &Inst::VecLoad { + to, + ref from, + ref mask, + .. + } => { + if let Some(r) = from.get_allocatable_register() { + collector.reg_use(r); + } + collector.reg_def(to); + vec_mask_operands(mask, collector); + } + &Inst::VecStore { + ref to, + from, + ref mask, + .. + } => { + if let Some(r) = to.get_allocatable_register() { + collector.reg_use(r); + } + collector.reg_use(from); + vec_mask_operands(mask, collector); + } + } +} + +impl MachInst for Inst { + type LabelUse = LabelUse; + type ABIMachineSpec = Riscv64MachineDeps; + + // https://github.com/riscv/riscv-isa-manual/issues/850 + // all zero will cause invalid opcode. + const TRAP_OPCODE: &'static [u8] = &[0; 4]; + + fn gen_dummy_use(reg: Reg) -> Self { + Inst::DummyUse { reg } + } + + fn canonical_type_for_rc(rc: RegClass) -> Type { + match rc { + regalloc2::RegClass::Int => I64, + regalloc2::RegClass::Float => F64, + regalloc2::RegClass::Vector => I8X16, + } + } + + fn is_safepoint(&self) -> bool { + match self { + &Inst::Call { .. } + | &Inst::CallInd { .. } + | &Inst::TrapIf { .. } + | &Inst::Udf { .. } => true, + _ => false, + } + } + + fn get_operands VReg>(&self, collector: &mut OperandCollector<'_, F>) { + zkasm_get_operands(self, collector); + } + + fn is_move(&self) -> Option<(Writable, Reg)> { + match self { + Inst::Mov { rd, rm, .. } => Some((rd.clone(), rm.clone())), + _ => None, + } + } + + fn is_included_in_clobbers(&self) -> bool { + match self { + &Inst::Args { .. } => false, + _ => true, + } + } + + fn is_trap(&self) -> bool { + match self { + Self::Udf { .. } => true, + _ => false, + } + } + + fn is_args(&self) -> bool { + match self { + Self::Args { .. } => true, + _ => false, + } + } + + fn is_term(&self) -> MachTerminator { + match self { + &Inst::Jal { .. } => MachTerminator::Uncond, + &Inst::CondBr { .. } => MachTerminator::Cond, + &Inst::Jalr { .. } => MachTerminator::Uncond, + &Inst::Ret { .. } => MachTerminator::Ret, + &Inst::BrTable { .. } => MachTerminator::Indirect, + &Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall, + _ => MachTerminator::None, + } + } + + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { + let x = Inst::Mov { + rd: to_reg, + rm: from_reg, + ty, + }; + x + } + + fn gen_nop(preferred_size: usize) -> Inst { + if preferred_size == 0 { + return Inst::Nop0; + } + // We can't give a NOP (or any insn) < 4 bytes. + assert!(preferred_size >= 4); + Inst::Nop4 + } + + fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> { + match ty { + I8 => Ok((&[RegClass::Int], &[I8])), + I16 => Ok((&[RegClass::Int], &[I16])), + I32 => Ok((&[RegClass::Int], &[I32])), + I64 => Ok((&[RegClass::Int], &[I64])), + R32 => panic!("32-bit reftype pointer should never be seen on zkasm"), + R64 => Ok((&[RegClass::Int], &[R64])), + F32 => Ok((&[RegClass::Float], &[F32])), + F64 => Ok((&[RegClass::Float], &[F64])), + I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])), + _ if ty.is_vector() => { + debug_assert!(ty.bits() <= 512); + + // Here we only need to return a SIMD type with the same size as `ty`. + // We use these types for spills and reloads, so prefer types with lanes <= 31 + // since that fits in the immediate field of `vsetivli`. + const SIMD_TYPES: [[Type; 1]; 6] = [ + [types::I8X2], + [types::I8X4], + [types::I8X8], + [types::I8X16], + [types::I16X16], + [types::I32X16], + ]; + let idx = (ty.bytes().ilog2() - 1) as usize; + let ty = &SIMD_TYPES[idx][..]; + + Ok((&[RegClass::Vector], ty)) + } + _ => Err(CodegenError::Unsupported(format!( + "Unexpected SSA-value type: {}", + ty + ))), + } + } + + fn gen_jump(target: MachLabel) -> Inst { + Inst::Jal { + dest: BranchTarget::Label(target), + } + } + + fn worst_case_size() -> CodeOffset { + // calculate by test function zkasm_worst_case_instruction_size() + 116 + } + + fn ref_type_regclass(_settings: &settings::Flags) -> RegClass { + RegClass::Int + } + + fn function_alignment() -> FunctionAlignment { + FunctionAlignment { + minimum: 4, + preferred: 4, + } + } +} + +//============================================================================= +// Pretty-printing of instructions. +pub fn reg_name(reg: Reg) -> String { + match reg.to_real_reg() { + Some(real) => match real.class() { + RegClass::Int => match real.hw_enc() { + 0 => "zero".into(), + 1 => "ra".into(), + 2 => "sp".into(), + 3 => "gp".into(), + 4 => "tp".into(), + 5 => "t0".into(), + 6..=7 => format!("t{}", real.hw_enc() - 5), + 8 => "fp".into(), + 9 => "s1".into(), + 10..=17 => format!("a{}", real.hw_enc() - 10), + 18..=27 => format!("s{}", real.hw_enc() - 16), + 28..=31 => format!("t{}", real.hw_enc() - 25), + _ => unreachable!(), + }, + RegClass::Float => match real.hw_enc() { + 0..=7 => format!("ft{}", real.hw_enc() - 0), + 8..=9 => format!("fs{}", real.hw_enc() - 8), + 10..=17 => format!("fa{}", real.hw_enc() - 10), + 18..=27 => format!("fs{}", real.hw_enc() - 16), + 28..=31 => format!("ft{}", real.hw_enc() - 20), + _ => unreachable!(), + }, + RegClass::Vector => format!("v{}", real.hw_enc()), + }, + None => { + format!("{:?}", reg) + } + } +} + +impl Inst { + fn print_with_state( + &self, + _state: &mut EmitState, + allocs: &mut AllocationConsumer<'_>, + ) -> String { + let format_reg = |reg: Reg, allocs: &mut AllocationConsumer<'_>| -> String { + let reg = allocs.next(reg); + reg_name(reg) + }; + + let format_vec_amode = |amode: &VecAMode, allocs: &mut AllocationConsumer<'_>| -> String { + match amode { + VecAMode::UnitStride { base } => base.to_string_with_alloc(allocs), + } + }; + + let format_mask = |mask: &VecOpMasking, allocs: &mut AllocationConsumer<'_>| -> String { + match mask { + VecOpMasking::Enabled { reg } => format!(",{}.t", format_reg(*reg, allocs)), + VecOpMasking::Disabled => format!(""), + } + }; + + let format_regs = |regs: &[Reg], allocs: &mut AllocationConsumer<'_>| -> String { + let mut x = if regs.len() > 1 { + String::from("[") + } else { + String::default() + }; + regs.iter().for_each(|i| { + x.push_str(format_reg(i.clone(), allocs).as_str()); + if *i != *regs.last().unwrap() { + x.push_str(","); + } + }); + if regs.len() > 1 { + x.push_str("]"); + } + x + }; + let format_labels = |labels: &[MachLabel]| -> String { + if labels.len() == 0 { + return String::from("[_]"); + } + let mut x = String::from("["); + labels.iter().for_each(|l| { + x.push_str( + format!( + "{:?}{}", + l, + if l != labels.last().unwrap() { "," } else { "" }, + ) + .as_str(), + ); + }); + x.push_str("]"); + x + }; + + fn format_frm(rounding_mode: Option) -> String { + if let Some(r) = rounding_mode { + format!(",{}", r.to_static_str(),) + } else { + "".into() + } + } + + let mut empty_allocs = AllocationConsumer::default(); + match self { + &Inst::Nop0 => { + format!("##zero length nop") + } + &Inst::Nop4 => { + format!("##fixed 4-size nop") + } + &Inst::StackProbeLoop { + guard_size, + probe_count, + tmp, + } => { + let tmp = format_reg(tmp.to_reg(), allocs); + format!( + "inline_stack_probe##guard_size={} probe_count={} tmp={}", + guard_size, probe_count, tmp + ) + } + &Inst::FloatRound { + op, + rd, + int_tmp, + f_tmp, + rs, + ty, + } => { + let rs = format_reg(rs, allocs); + let int_tmp = format_reg(int_tmp.to_reg(), allocs); + let f_tmp = format_reg(f_tmp.to_reg(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "{} {},{}##int_tmp={} f_tmp={} ty={}", + op.op_name(), + rd, + rs, + int_tmp, + f_tmp, + ty + ) + } + &Inst::FloatSelect { + op, + rd, + tmp, + rs1, + rs2, + ty, + } => { + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "f{}.{} {},{},{}##tmp={} ty={}", + op.op_name(), + if ty == F32 { "s" } else { "d" }, + rd, + rs1, + rs2, + tmp, + ty + ) + } + &Inst::AtomicStore { src, ty, p } => { + let src = format_reg(src, allocs); + let p = format_reg(p, allocs); + format!("atomic_store.{} {},({})", ty, src, p) + } + &Inst::DummyUse { reg } => { + let reg = format_reg(reg, allocs); + format!("dummy_use {}", reg) + } + + &Inst::AtomicLoad { rd, ty, p } => { + let p = format_reg(p, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("atomic_load.{} {},({})", ty, rd, p) + } + &Inst::AtomicRmwLoop { + offset, + op, + dst, + ty, + p, + x, + t0, + } => { + let offset = format_reg(offset, allocs); + let p = format_reg(p, allocs); + let x = format_reg(x, allocs); + let t0 = format_reg(t0.to_reg(), allocs); + let dst = format_reg(dst.to_reg(), allocs); + format!( + "atomic_rmw.{} {} {},{},({})##t0={} offset={}", + ty, op, dst, x, p, t0, offset + ) + } + + &Inst::RawData { ref data } => match data.len() { + 4 => { + let mut bytes = [0; 4]; + for i in 0..bytes.len() { + bytes[i] = data[i]; + } + format!(".4byte 0x{:x}", u32::from_le_bytes(bytes)) + } + 8 => { + let mut bytes = [0; 8]; + for i in 0..bytes.len() { + bytes[i] = data[i]; + } + format!(".8byte 0x{:x}", u64::from_le_bytes(bytes)) + } + _ => { + format!(".data {:?}", data) + } + }, + &Inst::Unwind { ref inst } => { + format!("unwind {:?}", inst) + } + &Inst::Brev8 { + rs, + ty, + step, + tmp, + tmp2, + rd, + } => { + let rs = format_reg(rs, allocs); + let step = format_reg(step.to_reg(), allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let tmp2 = format_reg(tmp2.to_reg(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "brev8 {},{}##tmp={} tmp2={} step={} ty={}", + rd, rs, tmp, tmp2, step, ty + ) + } + &Inst::Popcnt { + sum, + step, + rs, + tmp, + ty, + } => { + let rs = format_reg(rs, allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let step = format_reg(step.to_reg(), allocs); + let sum = format_reg(sum.to_reg(), allocs); + format!("popcnt {},{}##ty={} tmp={} step={}", sum, rs, ty, tmp, step) + } + &Inst::Rev8 { rs, rd, tmp, step } => { + let rs = format_reg(rs, allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let step = format_reg(step.to_reg(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("rev8 {},{}##step={} tmp={}", rd, rs, step, tmp) + } + &Inst::Cltz { + sum, + step, + rs, + tmp, + ty, + leading, + } => { + let rs = format_reg(rs, allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let step = format_reg(step.to_reg(), allocs); + let sum = format_reg(sum.to_reg(), allocs); + format!( + "{} {},{}##ty={} tmp={} step={}", + if leading { "clz" } else { "ctz" }, + sum, + rs, + ty, + tmp, + step + ) + } + &Inst::FcvtToInt { + is_sat, + rd, + rs, + is_signed, + in_type, + out_type, + tmp, + } => { + let rs = format_reg(rs, allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "fcvt_to_{}int{}.{} {},{}##in_ty={} tmp={}", + if is_signed { "s" } else { "u" }, + if is_sat { "_sat" } else { "" }, + out_type, + rd, + rs, + in_type, + tmp + ) + } + &Inst::SelectReg { + rd, + rs1, + rs2, + ref condition, + } => { + let c_rs1 = format_reg(condition.rs1, allocs); + let c_rs2 = format_reg(condition.rs2, allocs); + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "select_reg {},{},{}##condition={}", + rd, + rs1, + rs2, + format!("({} {} {})", c_rs1, condition.kind.to_static_str(), c_rs2), + ) + } + &Inst::AtomicCas { + offset, + t0, + dst, + e, + addr, + v, + ty, + } => { + let offset = format_reg(offset, allocs); + let e = format_reg(e, allocs); + let addr = format_reg(addr, allocs); + let v = format_reg(v, allocs); + let t0 = format_reg(t0.to_reg(), allocs); + let dst = format_reg(dst.to_reg(), allocs); + format!( + "atomic_cas.{} {},{},{},({})##t0={} offset={}", + ty, dst, e, v, addr, t0, offset, + ) + } + &Inst::Icmp { cc, rd, a, b, ty } => { + let a = format_regs(a.regs(), allocs); + let b = format_regs(b.regs(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("{} {},{},{}##ty={}", cc.to_static_str(), rd, a, b, ty) + } + &Inst::IntSelect { + op, + ref dst, + x, + y, + ty, + } => { + let x = format_regs(x.regs(), allocs); + let y = format_regs(y.regs(), allocs); + let dst: Vec<_> = dst.iter().map(|r| r.to_reg()).collect(); + let dst = format_regs(&dst[..], allocs); + format!("{} {},{},{}##ty={}", op.op_name(), dst, x, y, ty,) + } + &Inst::BrTable { + index, + tmp1, + tmp2, + ref targets, + } => { + let targets: Vec<_> = targets.iter().map(|x| x.as_label().unwrap()).collect(); + format!( + "{} {},{}##tmp1={},tmp2={}", + "br_table", + format_reg(index, allocs), + format_labels(&targets[..]), + format_reg(tmp1.to_reg(), allocs), + format_reg(tmp2.to_reg(), allocs), + ) + } + &Inst::Auipc { rd, imm } => { + format!( + "{} {},{}", + "auipc", + format_reg(rd.to_reg(), allocs), + imm.bits + ) + } + &Inst::Jalr { rd, base, offset } => { + let base = format_reg(base, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("{} {},{}({})", "jalr", rd, offset.bits, base) + } + &Inst::Lui { rd, ref imm } => { + format!("{} {},{}", "lui", format_reg(rd.to_reg(), allocs), imm.bits) + } + &Inst::LoadConst32 { rd, imm } => { + let rd = format_reg(rd.to_reg(), allocs); + let mut buf = String::new(); + write!(&mut buf, "auipc {},0; ", rd).unwrap(); + write!(&mut buf, "ld {},12({}); ", rd, rd).unwrap(); + write!(&mut buf, "j {}; ", Inst::INSTRUCTION_SIZE + 4).unwrap(); + write!(&mut buf, ".4byte 0x{:x}", imm).unwrap(); + buf + } + &Inst::LoadConst64 { rd, imm } => { + let rd = format_reg(rd.to_reg(), allocs); + let mut buf = String::new(); + write!(&mut buf, "auipc {},0; ", rd).unwrap(); + write!(&mut buf, "ld {},12({}); ", rd, rd).unwrap(); + write!(&mut buf, "j {}; ", Inst::INSTRUCTION_SIZE + 8).unwrap(); + write!(&mut buf, ".8byte 0x{:x}", imm).unwrap(); + buf + } + &Inst::AluRRR { + alu_op, + rd, + rs1, + rs2, + } => { + let rs1_s = format_reg(rs1, allocs); + let rs2_s = format_reg(rs2, allocs); + let rd_s = format_reg(rd.to_reg(), allocs); + match alu_op { + AluOPRRR::Adduw if rs2 == zero_reg() => { + format!("zext.w {},{}", rd_s, rs1_s) + } + _ => { + format!("{} {},{},{}", alu_op.op_name(), rd_s, rs1_s, rs2_s) + } + } + } + &Inst::FpuRR { + frm, + alu_op, + rd, + rs, + } => { + let rs = format_reg(rs, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("{} {},{}{}", alu_op.op_name(), rd, rs, format_frm(frm)) + } + &Inst::FpuRRR { + alu_op, + rd, + rs1, + rs2, + frm, + } => { + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + let rd = format_reg(rd.to_reg(), allocs); + let rs1_is_rs2 = rs1 == rs2; + if rs1_is_rs2 && alu_op.is_copy_sign() { + // this is move instruction. + format!( + "fmv.{} {},{}", + if alu_op.is_32() { "s" } else { "d" }, + rd, + rs1 + ) + } else if rs1_is_rs2 && alu_op.is_copy_neg_sign() { + format!( + "fneg.{} {},{}", + if alu_op.is_32() { "s" } else { "d" }, + rd, + rs1 + ) + } else if rs1_is_rs2 && alu_op.is_copy_xor_sign() { + format!( + "fabs.{} {},{}", + if alu_op.is_32() { "s" } else { "d" }, + rd, + rs1 + ) + } else { + format!( + "{} {},{},{}{}", + alu_op.op_name(), + rd, + rs1, + rs2, + format_frm(frm) + ) + } + } + &Inst::FpuRRRR { + alu_op, + rd, + rs1, + rs2, + rs3, + frm, + } => { + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + let rs3 = format_reg(rs3, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "{} {},{},{},{}{}", + alu_op.op_name(), + rd, + rs1, + rs2, + rs3, + format_frm(frm) + ) + } + &Inst::AluRRImm12 { + alu_op, + rd, + rs, + ref imm12, + } => { + let rs_s = format_reg(rs, allocs); + let rd = format_reg(rd.to_reg(), allocs); + + // Some of these special cases are better known as + // their pseudo-instruction version, so prefer printing those. + match (alu_op, rs, imm12) { + (AluOPRRI::Addi, rs, _) if rs == zero_reg() => { + return format!("li {},{}", rd, imm12.as_i16()); + } + (AluOPRRI::Addiw, _, imm12) if imm12.as_i16() == 0 => { + return format!("sext.w {},{}", rd, rs_s); + } + (AluOPRRI::Xori, _, imm12) if imm12.as_i16() == -1 => { + return format!("not {},{}", rd, rs_s); + } + (AluOPRRI::SltiU, _, imm12) if imm12.as_i16() == 1 => { + return format!("seqz {},{}", rd, rs_s); + } + (alu_op, _, _) if alu_op.option_funct12().is_some() => { + format!("{} {},{}", alu_op.op_name(), rd, rs_s) + } + (alu_op, _, imm12) => { + format!("{} {},{},{}", alu_op.op_name(), rd, rs_s, imm12.as_i16()) + } + } + } + &Inst::Load { + rd, + op, + from, + flags: _flags, + } => { + let base = from.to_string_with_alloc(allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("{} {},{}", op.op_name(), rd, base,) + } + &Inst::Store { + to, + src, + op, + flags: _flags, + } => { + let base = to.to_string_with_alloc(allocs); + let src = format_reg(src, allocs); + format!("{} {},{}", op.op_name(), src, base,) + } + &Inst::Args { ref args } => { + let mut s = "args".to_string(); + let mut empty_allocs = AllocationConsumer::default(); + for arg in args { + let preg = format_reg(arg.preg, &mut empty_allocs); + let def = format_reg(arg.vreg.to_reg(), allocs); + write!(&mut s, " {}={}", def, preg).unwrap(); + } + s + } + &Inst::Ret { + ref rets, + stack_bytes_to_pop, + } => { + let mut s = if stack_bytes_to_pop == 0 { + "ret".to_string() + } else { + format!("add sp, sp, #{stack_bytes_to_pop} ; ret") + }; + + let mut empty_allocs = AllocationConsumer::default(); + for ret in rets { + let preg = format_reg(ret.preg, &mut empty_allocs); + let vreg = format_reg(ret.vreg, allocs); + write!(&mut s, " {vreg}={preg}").unwrap(); + } + s + } + + &MInst::Extend { + rd, + rn, + signed, + from_bits, + .. + } => { + let rn = format_reg(rn, allocs); + let rd = format_reg(rd.to_reg(), allocs); + return if signed == false && from_bits == 8 { + format!("andi {rd},{rn}") + } else { + let op = if signed { "srai" } else { "srli" }; + let shift_bits = (64 - from_bits) as i16; + format!("slli {rd},{rn},{shift_bits}; {op} {rd},{rd},{shift_bits}") + }; + } + &MInst::AdjustSp { amount } => { + format!("{} sp,{:+}", "add", amount) + } + &MInst::Call { ref info } => format!("call {}", info.dest.display(None)), + &MInst::CallInd { ref info } => { + let rd = format_reg(info.rn, allocs); + format!("callind {}", rd) + } + &MInst::ReturnCall { + ref callee, + ref info, + } => { + let mut s = format!( + "return_call {callee:?} old_stack_arg_size:{} new_stack_arg_size:{}", + info.old_stack_arg_size, info.new_stack_arg_size + ); + for ret in &info.uses { + let preg = format_reg(ret.preg, &mut empty_allocs); + let vreg = format_reg(ret.vreg, allocs); + write!(&mut s, " {vreg}={preg}").unwrap(); + } + s + } + &MInst::ReturnCallInd { callee, ref info } => { + let callee = format_reg(callee, allocs); + let mut s = format!( + "return_call_ind {callee} old_stack_arg_size:{} new_stack_arg_size:{}", + info.old_stack_arg_size, info.new_stack_arg_size + ); + for ret in &info.uses { + let preg = format_reg(ret.preg, &mut empty_allocs); + let vreg = format_reg(ret.vreg, allocs); + write!(&mut s, " {vreg}={preg}").unwrap(); + } + s + } + &MInst::TrapIf { test, trap_code } => { + format!("trap_if {},{}", format_reg(test, allocs), trap_code,) + } + &MInst::TrapIfC { + rs1, + rs2, + cc, + trap_code, + } => { + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + format!("trap_ifc {}##({} {} {})", trap_code, rs1, cc, rs2) + } + &MInst::Jal { dest, .. } => { + format!("{} {}", "j", dest) + } + &MInst::CondBr { + taken, + not_taken, + kind, + .. + } => { + let rs1 = format_reg(kind.rs1, allocs); + let rs2 = format_reg(kind.rs2, allocs); + if not_taken.is_zero() && taken.as_label().is_none() { + let off = taken.as_offset().unwrap(); + format!("{} {},{},{}", kind.op_name(), rs1, rs2, off) + } else { + let x = format!( + "{} {},{},taken({}),not_taken({})", + kind.op_name(), + rs1, + rs2, + taken, + not_taken + ); + x + } + } + &MInst::Atomic { + op, + rd, + addr, + src, + amo, + } => { + let op_name = op.op_name(amo); + let addr = format_reg(addr, allocs); + let src = format_reg(src, allocs); + let rd = format_reg(rd.to_reg(), allocs); + if op.is_load() { + format!("{} {},({})", op_name, rd, addr) + } else { + format!("{} {},{},({})", op_name, rd, src, addr) + } + } + &MInst::LoadExtName { + rd, + ref name, + offset, + } => { + let rd = format_reg(rd.to_reg(), allocs); + format!("load_sym {},{}{:+}", rd, name.display(None), offset) + } + &MInst::LoadAddr { ref rd, ref mem } => { + let rs = mem.to_string_with_alloc(allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("load_addr {},{}", rd, rs) + } + &MInst::VirtualSPOffsetAdj { amount } => { + format!("virtual_sp_offset_adj {:+}", amount) + } + &MInst::Mov { rd, rm, ty } => { + let rd = format_reg(rd.to_reg(), allocs); + let rm = format_reg(rm, allocs); + + let op = match ty { + F32 => "fmv.s", + F64 => "fmv.d", + ty if ty.is_vector() => "vmv1r.v", + _ => "mv", + }; + + format!("{op} {rd},{rm}") + } + &MInst::MovFromPReg { rd, rm } => { + let rd = format_reg(rd.to_reg(), allocs); + debug_assert!([px_reg(2), px_reg(8)].contains(&rm)); + let rm = reg_name(Reg::from(rm)); + format!("mv {},{}", rd, rm) + } + &MInst::Fence { pred, succ } => { + format!( + "fence {},{}", + Inst::fence_req_to_string(pred), + Inst::fence_req_to_string(succ), + ) + } + &MInst::FenceI => "fence.i".into(), + &MInst::Select { + ref dst, + condition, + ref x, + ref y, + ty, + } => { + let condition = format_reg(condition, allocs); + let x = format_regs(x.regs(), allocs); + let y = format_regs(y.regs(), allocs); + let dst: Vec<_> = dst.clone().into_iter().map(|r| r.to_reg()).collect(); + let dst = format_regs(&dst[..], allocs); + format!("select_{} {},{},{}##condition={}", ty, dst, x, y, condition) + } + &MInst::Udf { trap_code } => format!("udf##trap_code={}", trap_code), + &MInst::EBreak {} => String::from("ebreak"), + &MInst::ECall {} => String::from("ecall"), + &Inst::VecAluRRRR { + op, + vd, + vd_src, + vs1, + vs2, + ref mask, + ref vstate, + } => { + let vs1_s = format_reg(vs1, allocs); + let vs2_s = format_reg(vs2, allocs); + let vd_src_s = format_reg(vd_src, allocs); + let vd_s = format_reg(vd.to_reg(), allocs); + let mask = format_mask(mask, allocs); + + let vd_fmt = if vd_s != vd_src_s { + format!("{},{}", vd_s, vd_src_s) + } else { + vd_s + }; + + // Note: vs2 and vs1 here are opposite to the standard scalar ordering. + // This is noted in Section 10.1 of the RISC-V Vector spec. + format!("{op} {vd_fmt},{vs2_s},{vs1_s}{mask} {vstate}") + } + &Inst::VecAluRRRImm5 { + op, + vd, + imm, + vs2, + ref mask, + ref vstate, + .. + } => { + let vs2_s = format_reg(vs2, allocs); + let vd_s = format_reg(vd.to_reg(), allocs); + let mask = format_mask(mask, allocs); + + // Some opcodes interpret the immediate as unsigned, lets show the + // correct number here. + let imm_s = if op.imm_is_unsigned() { + format!("{}", imm.bits()) + } else { + format!("{}", imm) + }; + + format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}") + } + &Inst::VecAluRRR { + op, + vd, + vs1, + vs2, + ref mask, + ref vstate, + } => { + let vs1_s = format_reg(vs1, allocs); + let vs2_s = format_reg(vs2, allocs); + let vd_s = format_reg(vd.to_reg(), allocs); + let mask = format_mask(mask, allocs); + + // Note: vs2 and vs1 here are opposite to the standard scalar ordering. + // This is noted in Section 10.1 of the RISC-V Vector spec. + match (op, vs2, vs1) { + (VecAluOpRRR::VrsubVX, _, vs1) if vs1 == zero_reg() => { + format!("vneg.v {vd_s},{vs2_s}{mask} {vstate}") + } + (VecAluOpRRR::VfsgnjnVV, vs2, vs1) if vs2 == vs1 => { + format!("vfneg.v {vd_s},{vs2_s}{mask} {vstate}") + } + (VecAluOpRRR::VfsgnjxVV, vs2, vs1) if vs2 == vs1 => { + format!("vfabs.v {vd_s},{vs2_s}{mask} {vstate}") + } + (VecAluOpRRR::VmnandMM, vs2, vs1) if vs2 == vs1 => { + format!("vmnot.m {vd_s},{vs2_s}{mask} {vstate}") + } + _ => format!("{op} {vd_s},{vs2_s},{vs1_s}{mask} {vstate}"), + } + } + &Inst::VecAluRRImm5 { + op, + vd, + imm, + vs2, + ref mask, + ref vstate, + } => { + let vs2_s = format_reg(vs2, allocs); + let vd_s = format_reg(vd.to_reg(), allocs); + let mask = format_mask(mask, allocs); + + // Some opcodes interpret the immediate as unsigned, lets show the + // correct number here. + let imm_s = if op.imm_is_unsigned() { + format!("{}", imm.bits()) + } else { + format!("{}", imm) + }; + + match (op, imm) { + (VecAluOpRRImm5::VxorVI, imm) if imm == Imm5::maybe_from_i8(-1).unwrap() => { + format!("vnot.v {vd_s},{vs2_s}{mask} {vstate}") + } + _ => format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}"), + } + } + &Inst::VecAluRR { + op, + vd, + vs, + ref mask, + ref vstate, + } => { + let vs_s = format_reg(vs, allocs); + let vd_s = format_reg(vd.to_reg(), allocs); + let mask = format_mask(mask, allocs); + + format!("{op} {vd_s},{vs_s}{mask} {vstate}") + } + &Inst::VecAluRImm5 { + op, + vd, + imm, + ref mask, + ref vstate, + } => { + let vd_s = format_reg(vd.to_reg(), allocs); + let mask = format_mask(mask, allocs); + + format!("{op} {vd_s},{imm}{mask} {vstate}") + } + &Inst::VecSetState { rd, ref vstate } => { + let rd_s = format_reg(rd.to_reg(), allocs); + assert!(vstate.avl.is_static()); + format!("vsetivli {}, {}, {}", rd_s, vstate.avl, vstate.vtype) + } + Inst::VecLoad { + eew, + to, + from, + ref mask, + ref vstate, + .. + } => { + let base = format_vec_amode(from, allocs); + let vd = format_reg(to.to_reg(), allocs); + let mask = format_mask(mask, allocs); + + format!("vl{eew}.v {vd},{base}{mask} {vstate}") + } + Inst::VecStore { + eew, + to, + from, + ref mask, + ref vstate, + .. + } => { + let dst = format_vec_amode(to, allocs); + let vs3 = format_reg(*from, allocs); + let mask = format_mask(mask, allocs); + + format!("vs{eew}.v {vs3},{dst}{mask} {vstate}") + } + } + } +} + +/// Different forms of label references for different instruction formats. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum LabelUse { + /// 20-bit branch offset (unconditional branches). PC-rel, offset is + /// imm << 1. Immediate is 20 signed bits. Use in Jal instructions. + Jal20, + + /// The unconditional jump instructions all use PC-relative + /// addressing to help support position independent code. The JALR + /// instruction was defined to enable a two-instruction sequence to + /// jump anywhere in a 32-bit absolute address range. A LUI + /// instruction can first load rs1 with the upper 20 bits of a + /// target address, then JALR can add in the lower bits. Similarly, + /// AUIPC then JALR can jump anywhere in a 32-bit pc-relative + /// address range. + PCRel32, + + /// All branch instructions use the B-type instruction format. The + /// 12-bit B-immediate encodes signed offsets in multiples of 2, and + /// is added to the current pc to give the target address. The + /// conditional branch range is ±4 KiB. + B12, + + /// Equivalent to the `R_RISCV_PCREL_HI20` relocation, Allows setting + /// the immediate field of an `auipc` instruction. + PCRelHi20, + + /// Similar to the `R_RISCV_PCREL_LO12_I` relocation but pointing to + /// the final address, instead of the `PCREL_HI20` label. Allows setting + /// the immediate field of I Type instructions such as `addi` or `lw`. + /// + /// Since we currently don't support offsets in labels, this relocation has + /// an implicit offset of 4. + PCRelLo12I, +} + +impl MachInstLabelUse for LabelUse { + /// Alignment for veneer code. Every Riscv64 instruction must be + /// 4-byte-aligned. + const ALIGN: CodeOffset = 4; + + /// Maximum PC-relative range (positive), inclusive. + fn max_pos_range(self) -> CodeOffset { + match self { + LabelUse::Jal20 => ((1 << 19) - 1) * 2, + LabelUse::PCRelLo12I | LabelUse::PCRelHi20 | LabelUse::PCRel32 => { + Inst::imm_max() as CodeOffset + } + LabelUse::B12 => ((1 << 11) - 1) * 2, + } + } + + /// Maximum PC-relative range (negative). + fn max_neg_range(self) -> CodeOffset { + match self { + LabelUse::PCRel32 => Inst::imm_min().abs() as CodeOffset, + _ => self.max_pos_range() + 2, + } + } + + /// Size of window into code needed to do the patch. + fn patch_size(self) -> CodeOffset { + match self { + LabelUse::Jal20 | LabelUse::B12 | LabelUse::PCRelHi20 | LabelUse::PCRelLo12I => 4, + LabelUse::PCRel32 => 8, + } + } + + /// Perform the patch. + fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) { + assert!(use_offset % 4 == 0); + assert!(label_offset % 4 == 0); + let offset = (label_offset as i64) - (use_offset as i64); + + // re-check range + assert!( + offset >= -(self.max_neg_range() as i64) && offset <= (self.max_pos_range() as i64), + "{:?} offset '{}' use_offset:'{}' label_offset:'{}' must not exceed max range.", + self, + offset, + use_offset, + label_offset, + ); + self.patch_raw_offset(buffer, offset); + } + + /// Is a veneer supported for this label reference type? + fn supports_veneer(self) -> bool { + match self { + Self::Jal20 | Self::B12 => true, + _ => false, + } + } + + /// How large is the veneer, if supported? + fn veneer_size(self) -> CodeOffset { + match self { + Self::B12 | Self::Jal20 => 8, + _ => unreachable!(), + } + } + + /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return + /// an offset and label-use for the veneer's use of the original label. + fn generate_veneer( + self, + buffer: &mut [u8], + veneer_offset: CodeOffset, + ) -> (CodeOffset, LabelUse) { + let base = writable_spilltmp_reg(); + { + let x = enc_auipc(base, Imm20::from_bits(0)).to_le_bytes(); + buffer[0] = x[0]; + buffer[1] = x[1]; + buffer[2] = x[2]; + buffer[3] = x[3]; + } + { + let x = enc_jalr(writable_zero_reg(), base.to_reg(), Imm12::from_bits(0)).to_le_bytes(); + buffer[4] = x[0]; + buffer[5] = x[1]; + buffer[6] = x[2]; + buffer[7] = x[3]; + } + (veneer_offset, Self::PCRel32) + } + + fn from_reloc(reloc: Reloc, addend: Addend) -> Option { + match (reloc, addend) { + (Reloc::RiscvCall, _) => Some(Self::PCRel32), + _ => None, + } + } +} + +impl LabelUse { + fn offset_in_range(self, offset: i64) -> bool { + let min = -(self.max_neg_range() as i64); + let max = self.max_pos_range() as i64; + offset >= min && offset <= max + } + + fn patch_raw_offset(self, buffer: &mut [u8], offset: i64) { + let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]); + match self { + LabelUse::Jal20 => { + let offset = offset as u32; + let v = ((offset >> 12 & 0b1111_1111) << 12) + | ((offset >> 11 & 0b1) << 20) + | ((offset >> 1 & 0b11_1111_1111) << 21) + | ((offset >> 20 & 0b1) << 31); + buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v)); + } + LabelUse::PCRel32 => { + let insn2 = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]); + Inst::generate_imm(offset as u64, |imm20, imm12| { + let imm20 = imm20.unwrap_or_default(); + let imm12 = imm12.unwrap_or_default(); + // Encode the OR-ed-in value with zero_reg(). The + // register parameter must be in the original + // encoded instruction and or'ing in zeroes does not + // change it. + buffer[0..4].clone_from_slice(&u32::to_le_bytes( + insn | enc_auipc(writable_zero_reg(), imm20), + )); + buffer[4..8].clone_from_slice(&u32::to_le_bytes( + insn2 | enc_jalr(writable_zero_reg(), zero_reg(), imm12), + )); + }) + // expect make sure we handled. + .expect("we have check the range before,this is a compiler error."); + } + + LabelUse::B12 => { + let offset = offset as u32; + let v = ((offset >> 11 & 0b1) << 7) + | ((offset >> 1 & 0b1111) << 8) + | ((offset >> 5 & 0b11_1111) << 25) + | ((offset >> 12 & 0b1) << 31); + buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v)); + } + + LabelUse::PCRelHi20 => { + // See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses + // + // We need to add 0x800 to ensure that we land at the next page as soon as it goes out of range for the + // Lo12 relocation. That relocation is signed and has a maximum range of -2048..2047. So when we get an + // offset of 2048, we need to land at the next page and subtract instead. + let offset = offset as u32; + let hi20 = offset.wrapping_add(0x800) >> 12; + let insn = (insn & 0xFFF) | (hi20 << 12); + buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn)); + } + + LabelUse::PCRelLo12I => { + // `offset` is the offset from the current instruction to the target address. + // + // However we are trying to compute the offset to the target address from the previous instruction. + // The previous instruction should be the one that contains the PCRelHi20 relocation and + // stores/references the program counter (`auipc` usually). + // + // Since we are trying to compute the offset from the previous instruction, we can + // represent it as offset = target_address - (current_instruction_address - 4) + // which is equivalent to offset = target_address - current_instruction_address + 4. + // + // Thus we need to add 4 to the offset here. + let lo12 = (offset + 4) as u32 & 0xFFF; + let insn = (insn & 0xFFFFF) | (lo12 << 20); + buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn)); + } + } + } +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn label_use_max_range() { + assert!(LabelUse::B12.max_neg_range() == LabelUse::B12.max_pos_range() + 2); + assert!(LabelUse::Jal20.max_neg_range() == LabelUse::Jal20.max_pos_range() + 2); + assert!(LabelUse::PCRel32.max_pos_range() == (Inst::imm_max() as CodeOffset)); + assert!(LabelUse::PCRel32.max_neg_range() == (Inst::imm_min().abs() as CodeOffset)); + assert!(LabelUse::B12.max_pos_range() == ((1 << 11) - 1) * 2); + } +} diff --git a/cranelift/codegen/src/isa/zkasm/inst/regs.rs b/cranelift/codegen/src/isa/zkasm/inst/regs.rs new file mode 100644 index 000000000000..429625e22ede --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/regs.rs @@ -0,0 +1,223 @@ +//! Riscv64 ISA definitions: registers. +//! + +use crate::settings; + +use crate::machinst::{Reg, Writable}; + +use crate::machinst::RealReg; +use alloc::vec; +use alloc::vec::Vec; + +use regalloc2::VReg; +use regalloc2::{MachineEnv, PReg, RegClass}; + +// first argument of function call +#[inline] +pub fn a0() -> Reg { + x_reg(10) +} + +// second argument of function call +#[inline] +pub fn a1() -> Reg { + x_reg(11) +} + +// third argument of function call +#[inline] +pub fn a2() -> Reg { + x_reg(12) +} + +#[inline] +pub fn writable_a0() -> Writable { + Writable::from_reg(a0()) +} +#[inline] +pub fn writable_a1() -> Writable { + Writable::from_reg(a1()) +} +#[inline] +pub fn writable_a2() -> Writable { + Writable::from_reg(a2()) +} + +#[inline] +pub fn fa0() -> Reg { + f_reg(10) +} +#[inline] +pub fn writable_fa0() -> Writable { + Writable::from_reg(fa0()) +} +#[inline] +pub fn writable_fa1() -> Writable { + Writable::from_reg(fa1()) +} +#[inline] +pub fn fa1() -> Reg { + f_reg(11) +} + +#[inline] +pub fn fa7() -> Reg { + f_reg(17) +} + +/// Get a reference to the zero-register. +#[inline] +pub fn zero_reg() -> Reg { + x_reg(0) +} + +/// Get a writable reference to the zero-register (this discards a result). +#[inline] +pub fn writable_zero_reg() -> Writable { + Writable::from_reg(zero_reg()) +} +#[inline] +pub fn stack_reg() -> Reg { + x_reg(2) +} + +/// Get a writable reference to the stack-pointer register. +#[inline] +pub fn writable_stack_reg() -> Writable { + Writable::from_reg(stack_reg()) +} + +/// Get a reference to the link register (x1). +pub fn link_reg() -> Reg { + x_reg(1) +} + +/// Get a writable reference to the link register. +#[inline] +pub fn writable_link_reg() -> Writable { + Writable::from_reg(link_reg()) +} + +/// Get a reference to the frame pointer (x29). +#[inline] +pub fn fp_reg() -> Reg { + x_reg(8) +} + +/// Get a writable reference to the frame pointer. +#[inline] +pub fn writable_fp_reg() -> Writable { + Writable::from_reg(fp_reg()) +} + +/// Get a reference to the first temporary, sometimes "spill temporary", +/// register. This register is used in various ways as a temporary. +#[inline] +pub fn spilltmp_reg() -> Reg { + x_reg(31) +} + +/// Get a writable reference to the spilltmp reg. +#[inline] +pub fn writable_spilltmp_reg() -> Writable { + Writable::from_reg(spilltmp_reg()) +} + +///spilltmp2 +#[inline] +pub fn spilltmp_reg2() -> Reg { + x_reg(30) +} + +/// Get a writable reference to the spilltmp2 reg. +#[inline] +pub fn writable_spilltmp_reg2() -> Writable { + Writable::from_reg(spilltmp_reg2()) +} + +pub fn crate_reg_eviroment(_flags: &settings::Flags) -> MachineEnv { + let preferred_regs_by_class: [Vec; 3] = { + let x_registers: Vec = (5..=7) + .chain(10..=17) + .chain(28..=29) + .map(|i| PReg::new(i, RegClass::Int)) + .collect(); + + let f_registers: Vec = (0..=7) + .chain(10..=17) + .chain(28..=31) + .map(|i| PReg::new(i, RegClass::Float)) + .collect(); + + let v_registers: Vec = (0..=31).map(|i| PReg::new(i, RegClass::Vector)).collect(); + + [x_registers, f_registers, v_registers] + }; + + let non_preferred_regs_by_class: [Vec; 3] = { + let x_registers: Vec = (9..=9) + .chain(18..=27) + .map(|i| PReg::new(i, RegClass::Int)) + .collect(); + + let f_registers: Vec = (8..=9) + .chain(18..=27) + .map(|i| PReg::new(i, RegClass::Float)) + .collect(); + + let v_registers = vec![]; + + [x_registers, f_registers, v_registers] + }; + + MachineEnv { + preferred_regs_by_class, + non_preferred_regs_by_class, + fixed_stack_slots: vec![], + scratch_by_class: [None, None, None], + } +} + +#[inline] +pub fn x_reg(enc: usize) -> Reg { + let p_reg = PReg::new(enc, RegClass::Int); + let v_reg = VReg::new(p_reg.index(), p_reg.class()); + Reg::from(v_reg) +} +pub const fn px_reg(enc: usize) -> PReg { + PReg::new(enc, RegClass::Int) +} + +#[inline] +pub fn f_reg(enc: usize) -> Reg { + let p_reg = PReg::new(enc, RegClass::Float); + let v_reg = VReg::new(p_reg.index(), p_reg.class()); + Reg::from(v_reg) +} +pub const fn pf_reg(enc: usize) -> PReg { + PReg::new(enc, RegClass::Float) +} +#[inline] +pub(crate) fn real_reg_to_reg(x: RealReg) -> Reg { + let v_reg = VReg::new(x.hw_enc() as usize, x.class()); + Reg::from(v_reg) +} + +#[allow(dead_code)] +pub(crate) fn x_reg_range(start: usize, end: usize) -> Vec> { + let mut regs = vec![]; + for i in start..=end { + regs.push(Writable::from_reg(x_reg(i))); + } + regs +} + +#[inline] +pub fn v_reg(enc: usize) -> Reg { + let p_reg = PReg::new(enc, RegClass::Vector); + let v_reg = VReg::new(p_reg.index(), p_reg.class()); + Reg::from(v_reg) +} +pub const fn pv_reg(enc: usize) -> PReg { + PReg::new(enc, RegClass::Vector) +} diff --git a/cranelift/codegen/src/isa/zkasm/inst/unwind.rs b/cranelift/codegen/src/isa/zkasm/inst/unwind.rs new file mode 100644 index 000000000000..1e2bb904db74 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/unwind.rs @@ -0,0 +1,2 @@ +#[cfg(feature = "unwind")] +pub(crate) mod systemv; diff --git a/cranelift/codegen/src/isa/zkasm/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/zkasm/inst/unwind/systemv.rs new file mode 100644 index 000000000000..ac5f587ac97d --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/unwind/systemv.rs @@ -0,0 +1,174 @@ +//! Unwind information for System V ABI (Riscv64). + +use crate::isa::zkasm::inst::regs; +use crate::isa::unwind::systemv::RegisterMappingError; +use crate::machinst::Reg; +use gimli::{write::CommonInformationEntry, Encoding, Format, Register}; +use regalloc2::RegClass; + +/// Creates a new zkasm common information entry (CIE). +pub fn create_cie() -> CommonInformationEntry { + use gimli::write::CallFrameInstruction; + + let mut entry = CommonInformationEntry::new( + Encoding { + address_size: 8, + format: Format::Dwarf32, + version: 1, + }, + 4, // Code alignment factor + -8, // Data alignment factor + Register(regs::link_reg().to_real_reg().unwrap().hw_enc() as u16), + ); + + // Every frame will start with the call frame address (CFA) at SP + let sp = Register(regs::stack_reg().to_real_reg().unwrap().hw_enc().into()); + entry.add_instruction(CallFrameInstruction::Cfa(sp, 0)); + + entry +} + +/// Map Cranelift registers to their corresponding Gimli registers. +pub fn map_reg(reg: Reg) -> Result { + let reg_offset = match reg.class() { + RegClass::Int => 0, + RegClass::Float => 32, + RegClass::Vector => 64, + }; + + let reg = reg.to_real_reg().unwrap().hw_enc() as u16; + Ok(Register(reg_offset + reg)) +} + +pub(crate) struct RegisterMapper; + +impl crate::isa::unwind::systemv::RegisterMapper for RegisterMapper { + fn map(&self, reg: Reg) -> Result { + Ok(map_reg(reg)?.0) + } + fn sp(&self) -> u16 { + regs::stack_reg().to_real_reg().unwrap().hw_enc() as u16 + } + fn fp(&self) -> Option { + Some(regs::fp_reg().to_real_reg().unwrap().hw_enc() as u16) + } + fn lr(&self) -> Option { + Some(regs::link_reg().to_real_reg().unwrap().hw_enc() as u16) + } + fn lr_offset(&self) -> Option { + Some(8) + } +} + +#[cfg(test)] +mod tests { + use crate::cursor::{Cursor, FuncCursor}; + + use crate::ir::{ + types, AbiParam, Function, InstBuilder, Signature, StackSlotData, StackSlotKind, + UserFuncName, + }; + use crate::isa::{lookup, CallConv}; + use crate::settings::{builder, Flags}; + use crate::Context; + use gimli::write::Address; + use std::str::FromStr; + use target_lexicon::triple; + + #[test] + fn test_simple_func() { + let isa = lookup(triple!("zkasm")) + .expect("expect zkasm ISA") + .finish(Flags::new(builder())) + .expect("Creating compiler backend"); + + let mut context = Context::for_function(create_function( + CallConv::SystemV, + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), + )); + + let code = context + .compile(&*isa, &mut Default::default()) + .expect("expected compilation"); + + let fde = match code + .create_unwind_info(isa.as_ref()) + .expect("can create unwind info") + { + Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { + info.to_fde(Address::Constant(1234)) + } + _ => panic!("expected unwind information"), + }; + + assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 40, lsda: None, instructions: [(12, CfaOffset(16)), (12, Offset(Register(8), -16)), (12, Offset(Register(1), -8)), (16, CfaRegister(Register(8)))] }"); + } + + fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { + let mut func = + Function::with_name_signature(UserFuncName::user(0, 0), Signature::new(call_conv)); + + let block0 = func.dfg.make_block(); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + pos.ins().return_(&[]); + + if let Some(stack_slot) = stack_slot { + func.sized_stack_slots.push(stack_slot); + } + + func + } + + #[test] + fn test_multi_return_func() { + let isa = lookup(triple!("zkasm")) + .expect("expect zkasm ISA") + .finish(Flags::new(builder())) + .expect("Creating compiler backend"); + + let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV)); + + let code = context + .compile(&*isa, &mut Default::default()) + .expect("expected compilation"); + + let fde = match code + .create_unwind_info(isa.as_ref()) + .expect("can create unwind info") + { + Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { + info.to_fde(Address::Constant(4321)) + } + _ => panic!("expected unwind information"), + }; + + assert_eq!( + format!("{:?}", fde), + "FrameDescriptionEntry { address: Constant(4321), length: 20, lsda: None, instructions: [] }" + ); + } + + fn create_multi_return_function(call_conv: CallConv) -> Function { + let mut sig = Signature::new(call_conv); + sig.params.push(AbiParam::new(types::I32)); + let mut func = Function::with_name_signature(UserFuncName::user(0, 0), sig); + + let block0 = func.dfg.make_block(); + let v0 = func.dfg.append_block_param(block0, types::I32); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + pos.ins().brif(v0, block2, &[], block1, &[]); + + pos.insert_block(block1); + pos.ins().return_(&[]); + + pos.insert_block(block2); + pos.ins().return_(&[]); + + func + } +} diff --git a/cranelift/codegen/src/isa/zkasm/inst/vector.rs b/cranelift/codegen/src/isa/zkasm/inst/vector.rs new file mode 100644 index 000000000000..afd248379875 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst/vector.rs @@ -0,0 +1,996 @@ +use crate::isa::zkasm::inst::AllocationConsumer; +use crate::isa::zkasm::inst::EmitState; +use crate::isa::zkasm::lower::isle::generated_code::VecAluOpRRRR; +use crate::isa::zkasm::lower::isle::generated_code::{ + VecAMode, VecAluOpRImm5, VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, VecAluOpRRRImm5, VecAvl, + VecElementWidth, VecLmul, VecMaskMode, VecOpCategory, VecOpMasking, VecTailMode, +}; +use crate::machinst::RegClass; +use crate::Reg; +use core::fmt; + +use super::{Type, UImm5}; + +impl VecAvl { + pub fn _static(size: u32) -> Self { + VecAvl::Static { + size: UImm5::maybe_from_u8(size as u8).expect("Invalid size for AVL"), + } + } + + pub fn is_static(&self) -> bool { + match self { + VecAvl::Static { .. } => true, + } + } + + pub fn unwrap_static(&self) -> UImm5 { + match self { + VecAvl::Static { size } => *size, + } + } +} + +// TODO: Can we tell ISLE to derive this? +impl Copy for VecAvl {} + +// TODO: Can we tell ISLE to derive this? +impl PartialEq for VecAvl { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (VecAvl::Static { size: lhs }, VecAvl::Static { size: rhs }) => lhs == rhs, + } + } +} + +impl fmt::Display for VecAvl { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + VecAvl::Static { size } => write!(f, "{}", size), + } + } +} + +impl VecElementWidth { + pub fn from_type(ty: Type) -> Self { + Self::from_bits(ty.lane_bits()) + } + + pub fn from_bits(bits: u32) -> Self { + match bits { + 8 => VecElementWidth::E8, + 16 => VecElementWidth::E16, + 32 => VecElementWidth::E32, + 64 => VecElementWidth::E64, + _ => panic!("Invalid number of bits for VecElementWidth: {}", bits), + } + } + + pub fn bits(&self) -> u32 { + match self { + VecElementWidth::E8 => 8, + VecElementWidth::E16 => 16, + VecElementWidth::E32 => 32, + VecElementWidth::E64 => 64, + } + } + + pub fn encode(&self) -> u32 { + match self { + VecElementWidth::E8 => 0b000, + VecElementWidth::E16 => 0b001, + VecElementWidth::E32 => 0b010, + VecElementWidth::E64 => 0b011, + } + } +} + +impl fmt::Display for VecElementWidth { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "e{}", self.bits()) + } +} + +impl VecLmul { + pub fn encode(&self) -> u32 { + match self { + VecLmul::LmulF8 => 0b101, + VecLmul::LmulF4 => 0b110, + VecLmul::LmulF2 => 0b111, + VecLmul::Lmul1 => 0b000, + VecLmul::Lmul2 => 0b001, + VecLmul::Lmul4 => 0b010, + VecLmul::Lmul8 => 0b011, + } + } +} + +impl fmt::Display for VecLmul { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + VecLmul::LmulF8 => write!(f, "mf8"), + VecLmul::LmulF4 => write!(f, "mf4"), + VecLmul::LmulF2 => write!(f, "mf2"), + VecLmul::Lmul1 => write!(f, "m1"), + VecLmul::Lmul2 => write!(f, "m2"), + VecLmul::Lmul4 => write!(f, "m4"), + VecLmul::Lmul8 => write!(f, "m8"), + } + } +} + +impl VecTailMode { + pub fn encode(&self) -> u32 { + match self { + VecTailMode::Agnostic => 1, + VecTailMode::Undisturbed => 0, + } + } +} + +impl fmt::Display for VecTailMode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + VecTailMode::Agnostic => write!(f, "ta"), + VecTailMode::Undisturbed => write!(f, "tu"), + } + } +} + +impl VecMaskMode { + pub fn encode(&self) -> u32 { + match self { + VecMaskMode::Agnostic => 1, + VecMaskMode::Undisturbed => 0, + } + } +} + +impl fmt::Display for VecMaskMode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + VecMaskMode::Agnostic => write!(f, "ma"), + VecMaskMode::Undisturbed => write!(f, "mu"), + } + } +} + +/// Vector Type (VType) +/// +/// vtype provides the default type used to interpret the contents of the vector register file. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct VType { + pub sew: VecElementWidth, + pub lmul: VecLmul, + pub tail_mode: VecTailMode, + pub mask_mode: VecMaskMode, +} + +impl VType { + // https://github.com/riscv/riscv-v-spec/blob/master/vtype-format.adoc + pub fn encode(&self) -> u32 { + let mut bits = 0; + bits |= self.lmul.encode(); + bits |= self.sew.encode() << 3; + bits |= self.tail_mode.encode() << 6; + bits |= self.mask_mode.encode() << 7; + bits + } +} + +impl fmt::Display for VType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{}, {}, {}, {}", + self.sew, self.lmul, self.tail_mode, self.mask_mode + ) + } +} + +/// Vector State (VState) +/// +/// VState represents the state of the vector unit that each instruction expects before execution. +/// Unlike VType or any of the other types here, VState is not a part of the RISC-V ISA. It is +/// used by our instruction emission code to ensure that the vector unit is in the correct state. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct VState { + pub avl: VecAvl, + pub vtype: VType, +} + +impl VState { + pub fn from_type(ty: Type) -> Self { + VState { + avl: VecAvl::_static(ty.lane_count()), + vtype: VType { + sew: VecElementWidth::from_type(ty), + lmul: VecLmul::Lmul1, + tail_mode: VecTailMode::Agnostic, + mask_mode: VecMaskMode::Agnostic, + }, + } + } +} + +impl fmt::Display for VState { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "#avl={}, #vtype=({})", self.avl, self.vtype) + } +} + +impl VecOpCategory { + pub fn encode(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#101-vector-arithmetic-instruction-encoding + match self { + VecOpCategory::OPIVV => 0b000, + VecOpCategory::OPFVV => 0b001, + VecOpCategory::OPMVV => 0b010, + VecOpCategory::OPIVI => 0b011, + VecOpCategory::OPIVX => 0b100, + VecOpCategory::OPFVF => 0b101, + VecOpCategory::OPMVX => 0b110, + VecOpCategory::OPCFG => 0b111, + } + } +} + +impl VecOpMasking { + pub fn encode(&self) -> u32 { + match self { + VecOpMasking::Enabled { .. } => 0, + VecOpMasking::Disabled => 1, + } + } + + pub(crate) fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self { + match self { + VecOpMasking::Enabled { reg } => VecOpMasking::Enabled { + reg: allocs.next(*reg), + }, + VecOpMasking::Disabled => VecOpMasking::Disabled, + } + } +} + +impl VecAluOpRRRR { + pub fn opcode(&self) -> u32 { + // Vector Opcode + 0x57 + } + pub fn funct3(&self) -> u32 { + self.category().encode() + } + + pub fn funct6(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc + match self { + VecAluOpRRRR::VmaccVV | VecAluOpRRRR::VmaccVX => 0b101101, + VecAluOpRRRR::VnmsacVV | VecAluOpRRRR::VnmsacVX => 0b101111, + VecAluOpRRRR::VfmaccVV | VecAluOpRRRR::VfmaccVF => 0b101100, + VecAluOpRRRR::VfnmaccVV | VecAluOpRRRR::VfnmaccVF => 0b101101, + VecAluOpRRRR::VfmsacVV | VecAluOpRRRR::VfmsacVF => 0b101110, + VecAluOpRRRR::VfnmsacVV | VecAluOpRRRR::VfnmsacVF => 0b101111, + } + } + + pub fn category(&self) -> VecOpCategory { + match self { + VecAluOpRRRR::VmaccVV | VecAluOpRRRR::VnmsacVV => VecOpCategory::OPMVV, + VecAluOpRRRR::VmaccVX | VecAluOpRRRR::VnmsacVX => VecOpCategory::OPMVX, + VecAluOpRRRR::VfmaccVV + | VecAluOpRRRR::VfnmaccVV + | VecAluOpRRRR::VfmsacVV + | VecAluOpRRRR::VfnmsacVV => VecOpCategory::OPFVV, + VecAluOpRRRR::VfmaccVF + | VecAluOpRRRR::VfnmaccVF + | VecAluOpRRRR::VfmsacVF + | VecAluOpRRRR::VfnmsacVF => VecOpCategory::OPFVF, + } + } + + // vs1 is the only variable source, vs2 is fixed. + pub fn vs1_regclass(&self) -> RegClass { + match self.category() { + VecOpCategory::OPMVV | VecOpCategory::OPFVV => RegClass::Vector, + VecOpCategory::OPMVX => RegClass::Int, + VecOpCategory::OPFVF => RegClass::Float, + _ => unreachable!(), + } + } +} + +impl fmt::Display for VecAluOpRRRR { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut s = format!("{self:?}"); + s.make_ascii_lowercase(); + let (opcode, category) = s.split_at(s.len() - 2); + f.write_str(&format!("{opcode}.{category}")) + } +} + +impl VecAluOpRRRImm5 { + pub fn opcode(&self) -> u32 { + // Vector Opcode + 0x57 + } + pub fn funct3(&self) -> u32 { + self.category().encode() + } + + pub fn funct6(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc + match self { + VecAluOpRRRImm5::VslideupVI => 0b001110, + } + } + + pub fn category(&self) -> VecOpCategory { + match self { + VecAluOpRRRImm5::VslideupVI => VecOpCategory::OPIVI, + } + } + + pub fn imm_is_unsigned(&self) -> bool { + match self { + VecAluOpRRRImm5::VslideupVI => true, + } + } + + /// Some instructions do not allow the source and destination registers to overlap. + pub fn forbids_src_dst_overlaps(&self) -> bool { + match self { + VecAluOpRRRImm5::VslideupVI => true, + } + } +} + +impl fmt::Display for VecAluOpRRRImm5 { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut s = format!("{self:?}"); + s.make_ascii_lowercase(); + let (opcode, category) = s.split_at(s.len() - 2); + f.write_str(&format!("{opcode}.{category}")) + } +} + +impl VecAluOpRRR { + pub fn opcode(&self) -> u32 { + // Vector Opcode + 0x57 + } + pub fn funct3(&self) -> u32 { + self.category().encode() + } + pub fn funct6(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc + match self { + VecAluOpRRR::VaddVV + | VecAluOpRRR::VaddVX + | VecAluOpRRR::VfaddVV + | VecAluOpRRR::VfaddVF => 0b000000, + VecAluOpRRR::VsubVV + | VecAluOpRRR::VsubVX + | VecAluOpRRR::VfsubVV + | VecAluOpRRR::VfsubVF => 0b000010, + VecAluOpRRR::VrsubVX => 0b000011, + VecAluOpRRR::VmulVV | VecAluOpRRR::VmulVX => 0b100101, + VecAluOpRRR::VmulhVV | VecAluOpRRR::VmulhVX => 0b100111, + VecAluOpRRR::VmulhuVV + | VecAluOpRRR::VmulhuVX + | VecAluOpRRR::VfmulVV + | VecAluOpRRR::VfmulVF => 0b100100, + VecAluOpRRR::VsmulVV | VecAluOpRRR::VsmulVX => 0b100111, + VecAluOpRRR::VsllVV | VecAluOpRRR::VsllVX => 0b100101, + VecAluOpRRR::VsrlVV | VecAluOpRRR::VsrlVX => 0b101000, + VecAluOpRRR::VsraVV | VecAluOpRRR::VsraVX => 0b101001, + VecAluOpRRR::VandVV | VecAluOpRRR::VandVX => 0b001001, + VecAluOpRRR::VorVV | VecAluOpRRR::VorVX => 0b001010, + VecAluOpRRR::VxorVV | VecAluOpRRR::VxorVX => 0b001011, + VecAluOpRRR::VminuVV | VecAluOpRRR::VminuVX | VecAluOpRRR::VredminuVS => 0b000100, + VecAluOpRRR::VminVV | VecAluOpRRR::VminVX => 0b000101, + VecAluOpRRR::VmaxuVV | VecAluOpRRR::VmaxuVX | VecAluOpRRR::VredmaxuVS => 0b000110, + VecAluOpRRR::VmaxVV | VecAluOpRRR::VmaxVX => 0b000111, + VecAluOpRRR::VslidedownVX => 0b001111, + VecAluOpRRR::VfrsubVF => 0b100111, + VecAluOpRRR::VmergeVVM + | VecAluOpRRR::VmergeVXM + | VecAluOpRRR::VfmergeVFM + | VecAluOpRRR::VcompressVM => 0b010111, + VecAluOpRRR::VfdivVV + | VecAluOpRRR::VfdivVF + | VecAluOpRRR::VsadduVV + | VecAluOpRRR::VsadduVX => 0b100000, + VecAluOpRRR::VfrdivVF | VecAluOpRRR::VsaddVV | VecAluOpRRR::VsaddVX => 0b100001, + VecAluOpRRR::VfminVV => 0b000100, + VecAluOpRRR::VfmaxVV => 0b000110, + VecAluOpRRR::VssubuVV | VecAluOpRRR::VssubuVX => 0b100010, + VecAluOpRRR::VssubVV | VecAluOpRRR::VssubVX => 0b100011, + VecAluOpRRR::VfsgnjVV | VecAluOpRRR::VfsgnjVF => 0b001000, + VecAluOpRRR::VfsgnjnVV => 0b001001, + VecAluOpRRR::VfsgnjxVV => 0b001010, + VecAluOpRRR::VrgatherVV | VecAluOpRRR::VrgatherVX => 0b001100, + VecAluOpRRR::VwadduVV | VecAluOpRRR::VwadduVX => 0b110000, + VecAluOpRRR::VwaddVV | VecAluOpRRR::VwaddVX => 0b110001, + VecAluOpRRR::VwsubuVV | VecAluOpRRR::VwsubuVX => 0b110010, + VecAluOpRRR::VwsubVV | VecAluOpRRR::VwsubVX => 0b110011, + VecAluOpRRR::VwadduWV | VecAluOpRRR::VwadduWX => 0b110100, + VecAluOpRRR::VwaddWV | VecAluOpRRR::VwaddWX => 0b110101, + VecAluOpRRR::VwsubuWV | VecAluOpRRR::VwsubuWX => 0b110110, + VecAluOpRRR::VwsubWV | VecAluOpRRR::VwsubWX => 0b110111, + VecAluOpRRR::VmseqVV + | VecAluOpRRR::VmseqVX + | VecAluOpRRR::VmfeqVV + | VecAluOpRRR::VmfeqVF => 0b011000, + VecAluOpRRR::VmsneVV + | VecAluOpRRR::VmsneVX + | VecAluOpRRR::VmfleVV + | VecAluOpRRR::VmfleVF + | VecAluOpRRR::VmandMM => 0b011001, + VecAluOpRRR::VmsltuVV | VecAluOpRRR::VmsltuVX | VecAluOpRRR::VmorMM => 0b011010, + VecAluOpRRR::VmsltVV + | VecAluOpRRR::VmsltVX + | VecAluOpRRR::VmfltVV + | VecAluOpRRR::VmfltVF => 0b011011, + VecAluOpRRR::VmsleuVV + | VecAluOpRRR::VmsleuVX + | VecAluOpRRR::VmfneVV + | VecAluOpRRR::VmfneVF => 0b011100, + VecAluOpRRR::VmsleVV + | VecAluOpRRR::VmsleVX + | VecAluOpRRR::VmfgtVF + | VecAluOpRRR::VmnandMM => 0b011101, + VecAluOpRRR::VmsgtuVX | VecAluOpRRR::VmnorMM => 0b011110, + VecAluOpRRR::VmsgtVX | VecAluOpRRR::VmfgeVF => 0b011111, + } + } + + pub fn category(&self) -> VecOpCategory { + match self { + VecAluOpRRR::VaddVV + | VecAluOpRRR::VsaddVV + | VecAluOpRRR::VsadduVV + | VecAluOpRRR::VsubVV + | VecAluOpRRR::VssubVV + | VecAluOpRRR::VssubuVV + | VecAluOpRRR::VsmulVV + | VecAluOpRRR::VsllVV + | VecAluOpRRR::VsrlVV + | VecAluOpRRR::VsraVV + | VecAluOpRRR::VandVV + | VecAluOpRRR::VorVV + | VecAluOpRRR::VxorVV + | VecAluOpRRR::VminuVV + | VecAluOpRRR::VminVV + | VecAluOpRRR::VmaxuVV + | VecAluOpRRR::VmaxVV + | VecAluOpRRR::VmergeVVM + | VecAluOpRRR::VrgatherVV + | VecAluOpRRR::VmseqVV + | VecAluOpRRR::VmsneVV + | VecAluOpRRR::VmsltuVV + | VecAluOpRRR::VmsltVV + | VecAluOpRRR::VmsleuVV + | VecAluOpRRR::VmsleVV => VecOpCategory::OPIVV, + VecAluOpRRR::VwaddVV + | VecAluOpRRR::VwaddWV + | VecAluOpRRR::VwadduVV + | VecAluOpRRR::VwadduWV + | VecAluOpRRR::VwsubVV + | VecAluOpRRR::VwsubWV + | VecAluOpRRR::VwsubuVV + | VecAluOpRRR::VwsubuWV + | VecAluOpRRR::VmulVV + | VecAluOpRRR::VmulhVV + | VecAluOpRRR::VmulhuVV + | VecAluOpRRR::VredmaxuVS + | VecAluOpRRR::VredminuVS + | VecAluOpRRR::VcompressVM + | VecAluOpRRR::VmandMM + | VecAluOpRRR::VmorMM + | VecAluOpRRR::VmnandMM + | VecAluOpRRR::VmnorMM => VecOpCategory::OPMVV, + VecAluOpRRR::VwaddVX + | VecAluOpRRR::VwadduVX + | VecAluOpRRR::VwadduWX + | VecAluOpRRR::VwaddWX + | VecAluOpRRR::VwsubVX + | VecAluOpRRR::VwsubuVX + | VecAluOpRRR::VwsubuWX + | VecAluOpRRR::VwsubWX + | VecAluOpRRR::VmulVX + | VecAluOpRRR::VmulhVX + | VecAluOpRRR::VmulhuVX => VecOpCategory::OPMVX, + VecAluOpRRR::VaddVX + | VecAluOpRRR::VsaddVX + | VecAluOpRRR::VsadduVX + | VecAluOpRRR::VsubVX + | VecAluOpRRR::VssubVX + | VecAluOpRRR::VssubuVX + | VecAluOpRRR::VrsubVX + | VecAluOpRRR::VsmulVX + | VecAluOpRRR::VsllVX + | VecAluOpRRR::VsrlVX + | VecAluOpRRR::VsraVX + | VecAluOpRRR::VandVX + | VecAluOpRRR::VorVX + | VecAluOpRRR::VxorVX + | VecAluOpRRR::VminuVX + | VecAluOpRRR::VminVX + | VecAluOpRRR::VmaxuVX + | VecAluOpRRR::VmaxVX + | VecAluOpRRR::VslidedownVX + | VecAluOpRRR::VmergeVXM + | VecAluOpRRR::VrgatherVX + | VecAluOpRRR::VmseqVX + | VecAluOpRRR::VmsneVX + | VecAluOpRRR::VmsltuVX + | VecAluOpRRR::VmsltVX + | VecAluOpRRR::VmsleuVX + | VecAluOpRRR::VmsleVX + | VecAluOpRRR::VmsgtuVX + | VecAluOpRRR::VmsgtVX => VecOpCategory::OPIVX, + VecAluOpRRR::VfaddVV + | VecAluOpRRR::VfsubVV + | VecAluOpRRR::VfmulVV + | VecAluOpRRR::VfdivVV + | VecAluOpRRR::VfmaxVV + | VecAluOpRRR::VfminVV + | VecAluOpRRR::VfsgnjVV + | VecAluOpRRR::VfsgnjnVV + | VecAluOpRRR::VfsgnjxVV + | VecAluOpRRR::VmfeqVV + | VecAluOpRRR::VmfneVV + | VecAluOpRRR::VmfltVV + | VecAluOpRRR::VmfleVV => VecOpCategory::OPFVV, + VecAluOpRRR::VfaddVF + | VecAluOpRRR::VfsubVF + | VecAluOpRRR::VfrsubVF + | VecAluOpRRR::VfmulVF + | VecAluOpRRR::VfdivVF + | VecAluOpRRR::VfrdivVF + | VecAluOpRRR::VfmergeVFM + | VecAluOpRRR::VfsgnjVF + | VecAluOpRRR::VmfeqVF + | VecAluOpRRR::VmfneVF + | VecAluOpRRR::VmfltVF + | VecAluOpRRR::VmfleVF + | VecAluOpRRR::VmfgtVF + | VecAluOpRRR::VmfgeVF => VecOpCategory::OPFVF, + } + } + + // vs1 is the only variable source, vs2 is fixed. + pub fn vs1_regclass(&self) -> RegClass { + match self.category() { + VecOpCategory::OPIVV | VecOpCategory::OPFVV | VecOpCategory::OPMVV => RegClass::Vector, + VecOpCategory::OPIVX | VecOpCategory::OPMVX => RegClass::Int, + VecOpCategory::OPFVF => RegClass::Float, + _ => unreachable!(), + } + } + + /// Some instructions do not allow the source and destination registers to overlap. + pub fn forbids_src_dst_overlaps(&self) -> bool { + match self { + VecAluOpRRR::VrgatherVV + | VecAluOpRRR::VrgatherVX + | VecAluOpRRR::VcompressVM + | VecAluOpRRR::VwadduVV + | VecAluOpRRR::VwadduVX + | VecAluOpRRR::VwaddVV + | VecAluOpRRR::VwaddVX + | VecAluOpRRR::VwadduWV + | VecAluOpRRR::VwadduWX + | VecAluOpRRR::VwaddWV + | VecAluOpRRR::VwaddWX + | VecAluOpRRR::VwsubuVV + | VecAluOpRRR::VwsubuVX + | VecAluOpRRR::VwsubVV + | VecAluOpRRR::VwsubVX + | VecAluOpRRR::VwsubuWV + | VecAluOpRRR::VwsubuWX + | VecAluOpRRR::VwsubWV + | VecAluOpRRR::VwsubWX => true, + _ => false, + } + } +} + +impl fmt::Display for VecAluOpRRR { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let suffix_length = match self { + VecAluOpRRR::VmergeVVM | VecAluOpRRR::VmergeVXM | VecAluOpRRR::VfmergeVFM => 3, + _ => 2, + }; + + let mut s = format!("{self:?}"); + s.make_ascii_lowercase(); + let (opcode, category) = s.split_at(s.len() - suffix_length); + f.write_str(&format!("{opcode}.{category}")) + } +} + +impl VecAluOpRRImm5 { + pub fn opcode(&self) -> u32 { + // Vector Opcode + 0x57 + } + pub fn funct3(&self) -> u32 { + self.category().encode() + } + + pub fn funct6(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc + match self { + VecAluOpRRImm5::VaddVI => 0b000000, + VecAluOpRRImm5::VrsubVI => 0b000011, + VecAluOpRRImm5::VsllVI => 0b100101, + VecAluOpRRImm5::VsrlVI => 0b101000, + VecAluOpRRImm5::VsraVI => 0b101001, + VecAluOpRRImm5::VandVI => 0b001001, + VecAluOpRRImm5::VorVI => 0b001010, + VecAluOpRRImm5::VxorVI => 0b001011, + VecAluOpRRImm5::VslidedownVI => 0b001111, + VecAluOpRRImm5::VssrlVI => 0b101010, + VecAluOpRRImm5::VmergeVIM => 0b010111, + VecAluOpRRImm5::VsadduVI => 0b100000, + VecAluOpRRImm5::VsaddVI => 0b100001, + VecAluOpRRImm5::VrgatherVI => 0b001100, + VecAluOpRRImm5::VmvrV => 0b100111, + VecAluOpRRImm5::VnclipWI => 0b101111, + VecAluOpRRImm5::VnclipuWI => 0b101110, + VecAluOpRRImm5::VmseqVI => 0b011000, + VecAluOpRRImm5::VmsneVI => 0b011001, + VecAluOpRRImm5::VmsleuVI => 0b011100, + VecAluOpRRImm5::VmsleVI => 0b011101, + VecAluOpRRImm5::VmsgtuVI => 0b011110, + VecAluOpRRImm5::VmsgtVI => 0b011111, + } + } + + pub fn category(&self) -> VecOpCategory { + match self { + VecAluOpRRImm5::VaddVI + | VecAluOpRRImm5::VrsubVI + | VecAluOpRRImm5::VsllVI + | VecAluOpRRImm5::VsrlVI + | VecAluOpRRImm5::VsraVI + | VecAluOpRRImm5::VandVI + | VecAluOpRRImm5::VorVI + | VecAluOpRRImm5::VxorVI + | VecAluOpRRImm5::VssrlVI + | VecAluOpRRImm5::VslidedownVI + | VecAluOpRRImm5::VmergeVIM + | VecAluOpRRImm5::VsadduVI + | VecAluOpRRImm5::VsaddVI + | VecAluOpRRImm5::VrgatherVI + | VecAluOpRRImm5::VmvrV + | VecAluOpRRImm5::VnclipWI + | VecAluOpRRImm5::VnclipuWI + | VecAluOpRRImm5::VmseqVI + | VecAluOpRRImm5::VmsneVI + | VecAluOpRRImm5::VmsleuVI + | VecAluOpRRImm5::VmsleVI + | VecAluOpRRImm5::VmsgtuVI + | VecAluOpRRImm5::VmsgtVI => VecOpCategory::OPIVI, + } + } + + pub fn imm_is_unsigned(&self) -> bool { + match self { + VecAluOpRRImm5::VsllVI + | VecAluOpRRImm5::VsrlVI + | VecAluOpRRImm5::VssrlVI + | VecAluOpRRImm5::VsraVI + | VecAluOpRRImm5::VslidedownVI + | VecAluOpRRImm5::VrgatherVI + | VecAluOpRRImm5::VmvrV + | VecAluOpRRImm5::VnclipWI + | VecAluOpRRImm5::VnclipuWI => true, + VecAluOpRRImm5::VaddVI + | VecAluOpRRImm5::VrsubVI + | VecAluOpRRImm5::VandVI + | VecAluOpRRImm5::VorVI + | VecAluOpRRImm5::VxorVI + | VecAluOpRRImm5::VmergeVIM + | VecAluOpRRImm5::VsadduVI + | VecAluOpRRImm5::VsaddVI + | VecAluOpRRImm5::VmseqVI + | VecAluOpRRImm5::VmsneVI + | VecAluOpRRImm5::VmsleuVI + | VecAluOpRRImm5::VmsleVI + | VecAluOpRRImm5::VmsgtuVI + | VecAluOpRRImm5::VmsgtVI => false, + } + } + + /// Some instructions do not allow the source and destination registers to overlap. + pub fn forbids_src_dst_overlaps(&self) -> bool { + match self { + VecAluOpRRImm5::VrgatherVI => true, + _ => false, + } + } +} + +impl fmt::Display for VecAluOpRRImm5 { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let suffix_length = match self { + VecAluOpRRImm5::VmergeVIM => 3, + _ => 2, + }; + + let mut s = format!("{self:?}"); + s.make_ascii_lowercase(); + let (opcode, category) = s.split_at(s.len() - suffix_length); + f.write_str(&format!("{opcode}.{category}")) + } +} + +impl VecAluOpRR { + pub fn opcode(&self) -> u32 { + // Vector Opcode + 0x57 + } + + pub fn funct3(&self) -> u32 { + self.category().encode() + } + + pub fn funct6(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc + match self { + VecAluOpRR::VmvSX | VecAluOpRR::VmvXS | VecAluOpRR::VfmvSF | VecAluOpRR::VfmvFS => { + 0b010000 + } + VecAluOpRR::VzextVF2 + | VecAluOpRR::VzextVF4 + | VecAluOpRR::VzextVF8 + | VecAluOpRR::VsextVF2 + | VecAluOpRR::VsextVF4 + | VecAluOpRR::VsextVF8 => 0b010010, + VecAluOpRR::VfsqrtV => 0b010011, + VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0b010111, + } + } + + pub fn category(&self) -> VecOpCategory { + match self { + VecAluOpRR::VmvSX => VecOpCategory::OPMVX, + VecAluOpRR::VmvXS + | VecAluOpRR::VzextVF2 + | VecAluOpRR::VzextVF4 + | VecAluOpRR::VzextVF8 + | VecAluOpRR::VsextVF2 + | VecAluOpRR::VsextVF4 + | VecAluOpRR::VsextVF8 => VecOpCategory::OPMVV, + VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => VecOpCategory::OPFVF, + VecAluOpRR::VfmvFS | VecAluOpRR::VfsqrtV => VecOpCategory::OPFVV, + VecAluOpRR::VmvVV => VecOpCategory::OPIVV, + VecAluOpRR::VmvVX => VecOpCategory::OPIVX, + } + } + + /// Returns the auxiliary encoding field for the instruction, if any. + pub fn aux_encoding(&self) -> u32 { + match self { + // VRXUNARY0 + VecAluOpRR::VmvSX => 0b00000, + // VWXUNARY0 + VecAluOpRR::VmvXS => 0b00000, + // VRFUNARY0 + VecAluOpRR::VfmvSF => 0b00000, + // VWFUNARY0 + VecAluOpRR::VfmvFS => 0b00000, + // VFUNARY1 + VecAluOpRR::VfsqrtV => 0b00000, + // VXUNARY0 + VecAluOpRR::VzextVF8 => 0b00010, + VecAluOpRR::VsextVF8 => 0b00011, + VecAluOpRR::VzextVF4 => 0b00100, + VecAluOpRR::VsextVF4 => 0b00101, + VecAluOpRR::VzextVF2 => 0b00110, + VecAluOpRR::VsextVF2 => 0b00111, + // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states: + // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved. + VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0, + } + } + + /// Most of these opcodes have the source register encoded in the VS2 field and + /// the `aux_encoding` field in VS1. However some special snowflakes have it the + /// other way around. As far as I can tell only vmv.v.* are backwards. + pub fn vs_is_vs2_encoded(&self) -> bool { + match self { + VecAluOpRR::VmvXS + | VecAluOpRR::VfmvFS + | VecAluOpRR::VfsqrtV + | VecAluOpRR::VzextVF2 + | VecAluOpRR::VzextVF4 + | VecAluOpRR::VzextVF8 + | VecAluOpRR::VsextVF2 + | VecAluOpRR::VsextVF4 + | VecAluOpRR::VsextVF8 => true, + VecAluOpRR::VmvSX + | VecAluOpRR::VfmvSF + | VecAluOpRR::VmvVV + | VecAluOpRR::VmvVX + | VecAluOpRR::VfmvVF => false, + } + } + + pub fn dst_regclass(&self) -> RegClass { + match self { + VecAluOpRR::VfmvSF + | VecAluOpRR::VmvSX + | VecAluOpRR::VmvVV + | VecAluOpRR::VmvVX + | VecAluOpRR::VfmvVF + | VecAluOpRR::VfsqrtV + | VecAluOpRR::VzextVF2 + | VecAluOpRR::VzextVF4 + | VecAluOpRR::VzextVF8 + | VecAluOpRR::VsextVF2 + | VecAluOpRR::VsextVF4 + | VecAluOpRR::VsextVF8 => RegClass::Vector, + VecAluOpRR::VmvXS => RegClass::Int, + VecAluOpRR::VfmvFS => RegClass::Float, + } + } + + pub fn src_regclass(&self) -> RegClass { + match self { + VecAluOpRR::VmvXS + | VecAluOpRR::VfmvFS + | VecAluOpRR::VmvVV + | VecAluOpRR::VfsqrtV + | VecAluOpRR::VzextVF2 + | VecAluOpRR::VzextVF4 + | VecAluOpRR::VzextVF8 + | VecAluOpRR::VsextVF2 + | VecAluOpRR::VsextVF4 + | VecAluOpRR::VsextVF8 => RegClass::Vector, + VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => RegClass::Float, + VecAluOpRR::VmvSX | VecAluOpRR::VmvVX => RegClass::Int, + } + } + + /// Some instructions do not allow the source and destination registers to overlap. + pub fn forbids_src_dst_overlaps(&self) -> bool { + match self { + VecAluOpRR::VzextVF2 + | VecAluOpRR::VzextVF4 + | VecAluOpRR::VzextVF8 + | VecAluOpRR::VsextVF2 + | VecAluOpRR::VsextVF4 + | VecAluOpRR::VsextVF8 => true, + _ => false, + } + } +} + +impl fmt::Display for VecAluOpRR { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + VecAluOpRR::VmvSX => "vmv.s.x", + VecAluOpRR::VmvXS => "vmv.x.s", + VecAluOpRR::VfmvSF => "vfmv.s.f", + VecAluOpRR::VfmvFS => "vfmv.f.s", + VecAluOpRR::VfsqrtV => "vfsqrt.v", + VecAluOpRR::VzextVF2 => "vzext.vf2", + VecAluOpRR::VzextVF4 => "vzext.vf4", + VecAluOpRR::VzextVF8 => "vzext.vf8", + VecAluOpRR::VsextVF2 => "vsext.vf2", + VecAluOpRR::VsextVF4 => "vsext.vf4", + VecAluOpRR::VsextVF8 => "vsext.vf8", + VecAluOpRR::VmvVV => "vmv.v.v", + VecAluOpRR::VmvVX => "vmv.v.x", + VecAluOpRR::VfmvVF => "vfmv.v.f", + }) + } +} + +impl VecAluOpRImm5 { + pub fn opcode(&self) -> u32 { + // Vector Opcode + 0x57 + } + pub fn funct3(&self) -> u32 { + self.category().encode() + } + + pub fn funct6(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc + match self { + VecAluOpRImm5::VmvVI => 0b010111, + } + } + + pub fn category(&self) -> VecOpCategory { + match self { + VecAluOpRImm5::VmvVI => VecOpCategory::OPIVI, + } + } + + /// Returns the auxiliary encoding field for the instruction, if any. + pub fn aux_encoding(&self) -> u32 { + match self { + // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states: + // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved. + VecAluOpRImm5::VmvVI => 0, + } + } +} + +impl fmt::Display for VecAluOpRImm5 { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + VecAluOpRImm5::VmvVI => "vmv.v.i", + }) + } +} + +impl VecAMode { + pub fn get_base_register(&self) -> Option { + match self { + VecAMode::UnitStride { base, .. } => base.get_base_register(), + } + } + + pub fn get_allocatable_register(&self) -> Option { + match self { + VecAMode::UnitStride { base, .. } => base.get_allocatable_register(), + } + } + + pub(crate) fn with_allocs(self, allocs: &mut AllocationConsumer<'_>) -> Self { + match self { + VecAMode::UnitStride { base } => VecAMode::UnitStride { + base: base.with_allocs(allocs), + }, + } + } + + pub(crate) fn get_offset_with_state(&self, state: &EmitState) -> i64 { + match self { + VecAMode::UnitStride { base, .. } => base.get_offset_with_state(state), + } + } + + /// `mop` field, described in Table 7 of Section 7.2. Vector Load/Store Addressing Modes + /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes + pub fn mop(&self) -> u32 { + match self { + VecAMode::UnitStride { .. } => 0b00, + } + } + + /// `lumop` field, described in Table 9 of Section 7.2. Vector Load/Store Addressing Modes + /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes + pub fn lumop(&self) -> u32 { + match self { + VecAMode::UnitStride { .. } => 0b00000, + } + } + + /// `sumop` field, described in Table 10 of Section 7.2. Vector Load/Store Addressing Modes + /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes + pub fn sumop(&self) -> u32 { + match self { + VecAMode::UnitStride { .. } => 0b00000, + } + } + + /// The `nf[2:0]` field encodes the number of fields in each segment. For regular vector loads and + /// stores, nf=0, indicating that a single value is moved between a vector register group and memory + /// at each element position. Larger values in the nf field are used to access multiple contiguous + /// fields within a segment as described in Section 7.8 Vector Load/Store Segment Instructions. + /// + /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes + pub fn nf(&self) -> u32 { + match self { + VecAMode::UnitStride { .. } => 0b000, + } + } +} diff --git a/cranelift/codegen/src/isa/zkasm/inst_vector.isle b/cranelift/codegen/src/isa/zkasm/inst_vector.isle new file mode 100644 index 000000000000..cadf4911f989 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/inst_vector.isle @@ -0,0 +1,1773 @@ +;; Represents the possible widths of an element when used in an operation. +(type VecElementWidth (enum + (E8) + (E16) + (E32) + (E64) +)) + +;; Vector Register Group Multiplier (LMUL) +;; +;; The LMUL setting specifies how we should group registers together. LMUL can +;; also be a fractional value, reducing the number of bits used in a single +;; vector register. Fractional LMUL is used to increase the number of effective +;; usable vector register groups when operating on mixed-width values. +(type VecLmul (enum + (LmulF8) + (LmulF4) + (LmulF2) + (Lmul1) + (Lmul2) + (Lmul4) + (Lmul8) +)) + +;; Tail Mode +;; +;; The tail mode specifies how the tail elements of a vector register are handled. +(type VecTailMode (enum + ;; Tail Agnostic means that the tail elements are left in an undefined state. + (Agnostic) + ;; Tail Undisturbed means that the tail elements are left in their original values. + (Undisturbed) +)) + +;; Mask Mode +;; +;; The mask mode specifies how the masked elements of a vector register are handled. +(type VecMaskMode (enum + ;; Mask Agnostic means that the masked out elements are left in an undefined state. + (Agnostic) + ;; Mask Undisturbed means that the masked out elements are left in their original values. + (Undisturbed) +)) + +;; Application Vector Length (AVL) +;; +;; This setting specifies the number of elements that are going to be processed +;; in a single instruction. Note: We may end up processing fewer elements than +;; the AVL setting, if they don't fit in a single register. +(type VecAvl (enum + ;; Static AVL emits a `vsetivli` that uses a constant value + (Static (size UImm5)) + ;; TODO: Add a dynamic, register based AVL mode when we are able to properly test it +)) + +(type VType (primitive VType)) +(type VState (primitive VState)) + + +;; Vector Opcode Category +;; +;; These categories are used to determine the type of operands that are allowed in the +;; instruction. +(type VecOpCategory (enum + (OPIVV) + (OPFVV) + (OPMVV) + (OPIVI) + (OPIVX) + (OPFVF) + (OPMVX) + (OPCFG) +)) + +;; Vector Opcode Masking +;; +;; When masked, the instruction will only operate on the elements that are dictated by +;; the mask register. Currently this is always fixed to v0. +(type VecOpMasking (enum + (Enabled (reg Reg)) + (Disabled) +)) + +(decl pure masked (VReg) VecOpMasking) +(rule (masked reg) (VecOpMasking.Enabled reg)) + +(decl pure unmasked () VecOpMasking) +(rule (unmasked) (VecOpMasking.Disabled)) + +;; Register to Register ALU Ops +(type VecAluOpRRR (enum + ;; Vector-Vector Opcodes + (VaddVV) + (VsaddVV) + (VsadduVV) + (VwaddVV) + (VwaddWV) + (VwadduVV) + (VwadduWV) + (VsubVV) + (VwsubVV) + (VwsubWV) + (VwsubuVV) + (VwsubuWV) + (VssubVV) + (VssubuVV) + (VmulVV) + (VmulhVV) + (VmulhuVV) + (VsmulVV) + (VsllVV) + (VsrlVV) + (VsraVV) + (VandVV) + (VorVV) + (VxorVV) + (VmaxVV) + (VmaxuVV) + (VminVV) + (VminuVV) + (VfaddVV) + (VfsubVV) + (VfmulVV) + (VfdivVV) + (VfminVV) + (VfmaxVV) + (VfsgnjVV) + (VfsgnjnVV) + (VfsgnjxVV) + (VmergeVVM) + (VredmaxuVS) + (VredminuVS) + (VrgatherVV) + (VcompressVM) + (VmseqVV) + (VmsneVV) + (VmsltuVV) + (VmsltVV) + (VmsleuVV) + (VmsleVV) + (VmfeqVV) + (VmfneVV) + (VmfltVV) + (VmfleVV) + (VmandMM) + (VmorMM) + (VmnandMM) + (VmnorMM) + + + ;; Vector-Scalar Opcodes + (VaddVX) + (VsaddVX) + (VsadduVX) + (VwaddVX) + (VwaddWX) + (VwadduVX) + (VwadduWX) + (VsubVX) + (VrsubVX) + (VwsubVX) + (VwsubWX) + (VwsubuVX) + (VwsubuWX) + (VssubVX) + (VssubuVX) + (VmulVX) + (VmulhVX) + (VmulhuVX) + (VsmulVX) + (VsllVX) + (VsrlVX) + (VsraVX) + (VandVX) + (VorVX) + (VxorVX) + (VmaxVX) + (VmaxuVX) + (VminVX) + (VminuVX) + (VslidedownVX) + (VfaddVF) + (VfsubVF) + (VfrsubVF) + (VfmulVF) + (VfdivVF) + (VfsgnjVF) + (VfrdivVF) + (VmergeVXM) + (VfmergeVFM) + (VrgatherVX) + (VmseqVX) + (VmsneVX) + (VmsltuVX) + (VmsltVX) + (VmsleuVX) + (VmsleVX) + (VmsgtuVX) + (VmsgtVX) + (VmfeqVF) + (VmfneVF) + (VmfltVF) + (VmfleVF) + (VmfgtVF) + (VmfgeVF) +)) + + + +;; Register-Imm ALU Ops that modify the destination register +(type VecAluOpRRRImm5 (enum + (VslideupVI) +)) + +;; Register-Register ALU Ops that modify the destination register +(type VecAluOpRRRR (enum + ;; Vector-Vector Opcodes + (VmaccVV) + (VnmsacVV) + (VfmaccVV) + (VfnmaccVV) + (VfmsacVV) + (VfnmsacVV) + + ;; Vector-Scalar Opcodes + (VmaccVX) + (VnmsacVX) + (VfmaccVF) + (VfnmaccVF) + (VfmsacVF) + (VfnmsacVF) +)) + +;; Register-Imm ALU Ops +(type VecAluOpRRImm5 (enum + ;; Regular VI Opcodes + (VaddVI) + (VsaddVI) + (VsadduVI) + (VrsubVI) + (VsllVI) + (VsrlVI) + (VsraVI) + (VandVI) + (VorVI) + (VxorVI) + (VssrlVI) + (VslidedownVI) + (VmergeVIM) + (VrgatherVI) + ;; This opcode represents multiple instructions `vmv1r`/`vmv2r`/`vmv4r`/etc... + ;; The immediate field specifies how many registers should be copied. + (VmvrV) + (VnclipWI) + (VnclipuWI) + (VmseqVI) + (VmsneVI) + (VmsleuVI) + (VmsleVI) + (VmsgtuVI) + (VmsgtVI) +)) + +;; Imm only ALU Ops +(type VecAluOpRImm5 (enum + (VmvVI) +)) + +;; These are all of the special cases that have weird encodings. They are all +;; single source, single destination instructions, and usually use one of +;; the two source registers as auxiliary encoding space. +(type VecAluOpRR (enum + (VmvSX) + (VmvXS) + (VfmvSF) + (VfmvFS) + ;; vmv.v* is special in that vs2 must be v0 (and is ignored) otherwise the instruction is illegal. + (VmvVV) + (VmvVX) + (VfmvVF) + (VfsqrtV) + (VsextVF2) + (VsextVF4) + (VsextVF8) + (VzextVF2) + (VzextVF4) + (VzextVF8) +)) + +;; Returns the canonical destination type for a VecAluOpRRImm5. +(decl pure vec_alu_rr_dst_type (VecAluOpRR) Type) +(extern constructor vec_alu_rr_dst_type vec_alu_rr_dst_type) + + +;; Vector Addressing Mode +(type VecAMode (enum + ;; Vector unit-stride operations access elements stored contiguously in memory + ;; starting from the base effective address. + (UnitStride + (base AMode)) + ;; TODO: Constant Stride + ;; TODO: Indexed Operations +)) + + +;; Builds a static VState matching a SIMD type. +;; The VState is guaranteed to be static with AVL set to the number of lanes. +;; Element size is set to the size of the type. +;; LMUL is set to 1. +;; Tail mode is set to agnostic. +;; Mask mode is set to agnostic. +(decl pure vstate_from_type (Type) VState) +(extern constructor vstate_from_type vstate_from_type) +(convert Type VState vstate_from_type) + +;; Alters the LMUL of a VState to mf2 +(decl pure vstate_mf2 (VState) VState) +(extern constructor vstate_mf2 vstate_mf2) + +;; Extracts an element width from a SIMD type. +(decl pure element_width_from_type (Type) VecElementWidth) +(rule (element_width_from_type ty) + (if-let $I8 (lane_type ty)) + (VecElementWidth.E8)) +(rule (element_width_from_type ty) + (if-let $I16 (lane_type ty)) + (VecElementWidth.E16)) +(rule (element_width_from_type ty) + (if-let $I32 (lane_type ty)) + (VecElementWidth.E32)) +(rule (element_width_from_type ty) + (if-let $F32 (lane_type ty)) + (VecElementWidth.E32)) +(rule (element_width_from_type ty) + (if-let $I64 (lane_type ty)) + (VecElementWidth.E64)) +(rule (element_width_from_type ty) + (if-let $F64 (lane_type ty)) + (VecElementWidth.E64)) + +(decl pure min_vec_reg_size () u64) +(extern constructor min_vec_reg_size min_vec_reg_size) + +;; An extractor that matches any type that is known to fit in a single vector +;; register. +(decl ty_vec_fits_in_register (Type) Type) +(extern extractor ty_vec_fits_in_register ty_vec_fits_in_register) + +;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; As noted in the RISC-V Vector Extension Specification, rs2 is the first +;; source register and rs1 is the second source register. This is the opposite +;; of the usual RISC-V register order. +;; See Section 10.1 of the RISC-V Vector Extension Specification. + + +;; Helper for emitting `MInst.VecAluRRRR` instructions. +;; These instructions modify the destination register. +(decl vec_alu_rrrr (VecAluOpRRRR VReg VReg Reg VecOpMasking VState) VReg) +(rule (vec_alu_rrrr op vd_src vs2 vs1 mask vstate) + (let ((vd WritableVReg (temp_writable_vreg)) + (_ Unit (emit (MInst.VecAluRRRR op vd vd_src vs2 vs1 mask vstate)))) + vd)) + +;; Helper for emitting `MInst.VecAluRRRImm5` instructions. +;; These instructions modify the destination register. +(decl vec_alu_rrr_imm5 (VecAluOpRRRImm5 VReg VReg Imm5 VecOpMasking VState) VReg) +(rule (vec_alu_rrr_imm5 op vd_src vs2 imm mask vstate) + (let ((vd WritableVReg (temp_writable_vreg)) + (_ Unit (emit (MInst.VecAluRRRImm5 op vd vd_src vs2 imm mask vstate)))) + vd)) + +;; Helper for emitting `MInst.VecAluRRRImm5` instructions where the immediate +;; is zero extended instead of sign extended. +(decl vec_alu_rrr_uimm5 (VecAluOpRRRImm5 VReg VReg UImm5 VecOpMasking VState) VReg) +(rule (vec_alu_rrr_uimm5 op vd_src vs2 imm mask vstate) + (vec_alu_rrr_imm5 op vd_src vs2 (uimm5_bitcast_to_imm5 imm) mask vstate)) + +;; Helper for emitting `MInst.VecAluRRR` instructions. +(decl vec_alu_rrr (VecAluOpRRR Reg Reg VecOpMasking VState) Reg) +(rule (vec_alu_rrr op vs2 vs1 mask vstate) + (let ((vd WritableVReg (temp_writable_vreg)) + (_ Unit (emit (MInst.VecAluRRR op vd vs2 vs1 mask vstate)))) + vd)) + +;; Helper for emitting `MInst.VecAluRRImm5` instructions. +(decl vec_alu_rr_imm5 (VecAluOpRRImm5 Reg Imm5 VecOpMasking VState) Reg) +(rule (vec_alu_rr_imm5 op vs2 imm mask vstate) + (let ((vd WritableVReg (temp_writable_vreg)) + (_ Unit (emit (MInst.VecAluRRImm5 op vd vs2 imm mask vstate)))) + vd)) + +;; Helper for emitting `MInst.VecAluRRImm5` instructions where the immediate +;; is zero extended instead of sign extended. +(decl vec_alu_rr_uimm5 (VecAluOpRRImm5 Reg UImm5 VecOpMasking VState) Reg) +(rule (vec_alu_rr_uimm5 op vs2 imm mask vstate) + (vec_alu_rr_imm5 op vs2 (uimm5_bitcast_to_imm5 imm) mask vstate)) + +;; Helper for emitting `MInst.VecAluRRImm5` instructions that use the Imm5 as +;; auxiliary encoding space. +(decl vec_alu_rr (VecAluOpRR Reg VecOpMasking VState) Reg) +(rule (vec_alu_rr op vs mask vstate) + (let ((vd WritableReg (temp_writable_reg (vec_alu_rr_dst_type op))) + (_ Unit (emit (MInst.VecAluRR op vd vs mask vstate)))) + vd)) + +;; Helper for emitting `MInst.VecAluRImm5` instructions. +(decl vec_alu_r_imm5 (VecAluOpRImm5 Imm5 VecOpMasking VState) Reg) +(rule (vec_alu_r_imm5 op imm mask vstate) + (let ((vd WritableVReg (temp_writable_vreg)) + (_ Unit (emit (MInst.VecAluRImm5 op vd imm mask vstate)))) + vd)) + +;; Helper for emitting `MInst.VecLoad` instructions. +(decl vec_load (VecElementWidth VecAMode MemFlags VecOpMasking VState) Reg) +(rule (vec_load eew from flags mask vstate) + (let ((vd WritableVReg (temp_writable_vreg)) + (_ Unit (emit (MInst.VecLoad eew vd from flags mask vstate)))) + vd)) + +;; Helper for emitting `MInst.VecStore` instructions. +(decl vec_store (VecElementWidth VecAMode VReg MemFlags VecOpMasking VState) InstOutput) +(rule (vec_store eew to from flags mask vstate) + (side_effect + (SideEffectNoResult.Inst (MInst.VecStore eew to from flags mask vstate)))) + +;; Helper for emitting the `vadd.vv` instruction. +(decl rv_vadd_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vadd_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VaddVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vadd.vx` instruction. +(decl rv_vadd_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vadd_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VaddVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vadd.vi` instruction. +(decl rv_vadd_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vadd_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VaddVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vsadd.vv` instruction. +(decl rv_vsadd_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vsadd_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsaddVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsadd.vx` instruction. +(decl rv_vsadd_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vsadd_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsaddVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsadd.vi` instruction. +(decl rv_vsadd_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vsadd_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VsaddVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vsaddu.vv` instruction. +(decl rv_vsaddu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vsaddu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsadduVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsaddu.vx` instruction. +(decl rv_vsaddu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vsaddu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsadduVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsaddu.vi` instruction. +(decl rv_vsaddu_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vsaddu_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VsadduVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vwadd.vv` instruction. +;; +;; Widening integer add, 2*SEW = SEW + SEW +(decl rv_vwadd_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vwadd_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwaddVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwadd.vx` instruction. +;; +;; Widening integer add, 2*SEW = SEW + SEW +(decl rv_vwadd_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vwadd_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwaddVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwadd.wv` instruction. +;; +;; Widening integer add, 2*SEW = 2*SEW + SEW +(decl rv_vwadd_wv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vwadd_wv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwaddWV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwadd.wx` instruction. +;; +;; Widening integer add, 2*SEW = 2*SEW + SEW +(decl rv_vwadd_wx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vwadd_wx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwaddWX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwaddu.vv` instruction. +;; +;; Widening unsigned integer add, 2*SEW = SEW + SEW +(decl rv_vwaddu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vwaddu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwadduVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwaddu.vv` instruction. +;; +;; Widening unsigned integer add, 2*SEW = SEW + SEW +(decl rv_vwaddu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vwaddu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwadduVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwaddu.wv` instruction. +;; +;; Widening integer add, 2*SEW = 2*SEW + SEW +(decl rv_vwaddu_wv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vwaddu_wv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwadduWV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwaddu.wx` instruction. +;; +;; Widening integer add, 2*SEW = 2*SEW + SEW +(decl rv_vwaddu_wx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vwaddu_wx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwadduWX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsub.vv` instruction. +(decl rv_vsub_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vsub_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsubVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsub.vx` instruction. +(decl rv_vsub_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vsub_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsubVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vrsub.vx` instruction. +(decl rv_vrsub_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vrsub_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VrsubVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwsub.vv` instruction. +;; +;; Widening integer sub, 2*SEW = SEW + SEW +(decl rv_vwsub_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vwsub_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwsubVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwsub.vx` instruction. +;; +;; Widening integer sub, 2*SEW = SEW + SEW +(decl rv_vwsub_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vwsub_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwsubVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwsub.wv` instruction. +;; +;; Widening integer sub, 2*SEW = 2*SEW + SEW +(decl rv_vwsub_wv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vwsub_wv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwsubWV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwsub.wx` instruction. +;; +;; Widening integer sub, 2*SEW = 2*SEW + SEW +(decl rv_vwsub_wx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vwsub_wx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwsubWX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwsubu.vv` instruction. +;; +;; Widening unsigned integer sub, 2*SEW = SEW + SEW +(decl rv_vwsubu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vwsubu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwsubuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwsubu.vv` instruction. +;; +;; Widening unsigned integer sub, 2*SEW = SEW + SEW +(decl rv_vwsubu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vwsubu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwsubuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwsubu.wv` instruction. +;; +;; Widening integer sub, 2*SEW = 2*SEW + SEW +(decl rv_vwsubu_wv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vwsubu_wv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwsubuWV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vwsubu.wx` instruction. +;; +;; Widening integer sub, 2*SEW = 2*SEW + SEW +(decl rv_vwsubu_wx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vwsubu_wx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VwsubuWX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vssub.vv` instruction. +(decl rv_vssub_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vssub_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VssubVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vssub.vx` instruction. +(decl rv_vssub_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vssub_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VssubVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vssubu.vv` instruction. +(decl rv_vssubu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vssubu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VssubuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vssubu.vx` instruction. +(decl rv_vssubu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vssubu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VssubuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vneg.v` pseudo-instruction. +(decl rv_vneg_v (VReg VecOpMasking VState) VReg) +(rule (rv_vneg_v vs2 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VrsubVX) vs2 (zero_reg) mask vstate)) + +;; Helper for emitting the `vrsub.vi` instruction. +(decl rv_vrsub_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vrsub_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VrsubVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmul.vv` instruction. +(decl rv_vmul_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmul_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmulVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmul.vx` instruction. +(decl rv_vmul_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmul_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmulVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmulh.vv` instruction. +(decl rv_vmulh_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmulh_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmulhVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmulh.vx` instruction. +(decl rv_vmulh_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmulh_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmulhVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmulhu.vv` instruction. +(decl rv_vmulhu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmulhu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmulhuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmulhu.vx` instruction. +(decl rv_vmulhu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmulhu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmulhuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsmul.vv` instruction. +;; +;; Signed saturating and rounding fractional multiply +;; # vd[i] = clip(roundoff_signed(vs2[i]*vs1[i], SEW-1)) +(decl rv_vsmul_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vsmul_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsmulVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsmul.vx` instruction. +;; +;; Signed saturating and rounding fractional multiply +;; # vd[i] = clip(roundoff_signed(vs2[i]*x[rs1], SEW-1)) +(decl rv_vsmul_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vsmul_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsmulVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmacc.vv` instruction. +;; +;; Integer multiply-add, overwrite addend +;; # vd[i] = +(vs1[i] * vs2[i]) + vd[i] +(decl rv_vmacc_vv (VReg VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmacc_vv vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VmaccVV) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmacc.vx` instruction. +;; +;; Integer multiply-add, overwrite addend +;; # vd[i] = +(x[rs1] * vs2[i]) + vd[i] +(decl rv_vmacc_vx (VReg VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmacc_vx vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VmaccVX) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vnmsac.vv` instruction. +;; +;; Integer multiply-sub, overwrite minuend +;; # vd[i] = -(vs1[i] * vs2[i]) + vd[i] +(decl rv_vnmsac_vv (VReg VReg VReg VecOpMasking VState) VReg) +(rule (rv_vnmsac_vv vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VnmsacVV) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vnmsac.vx` instruction. +;; +;; Integer multiply-sub, overwrite minuend +;; # vd[i] = -(x[rs1] * vs2[i]) + vd[i] +(decl rv_vnmsac_vx (VReg VReg XReg VecOpMasking VState) VReg) +(rule (rv_vnmsac_vx vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VnmsacVX) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `sll.vv` instruction. +(decl rv_vsll_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vsll_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsllVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `sll.vx` instruction. +(decl rv_vsll_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vsll_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsllVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsll.vi` instruction. +(decl rv_vsll_vi (VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vsll_vi vs2 imm mask vstate) + (vec_alu_rr_uimm5 (VecAluOpRRImm5.VsllVI) vs2 imm mask vstate)) + +;; Helper for emitting the `srl.vv` instruction. +(decl rv_vsrl_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vsrl_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsrlVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `srl.vx` instruction. +(decl rv_vsrl_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vsrl_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsrlVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsrl.vi` instruction. +(decl rv_vsrl_vi (VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vsrl_vi vs2 imm mask vstate) + (vec_alu_rr_uimm5 (VecAluOpRRImm5.VsrlVI) vs2 imm mask vstate)) + +;; Helper for emitting the `sra.vv` instruction. +(decl rv_vsra_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vsra_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsraVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `sra.vx` instruction. +(decl rv_vsra_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vsra_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsraVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsra.vi` instruction. +(decl rv_vsra_vi (VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vsra_vi vs2 imm mask vstate) + (vec_alu_rr_uimm5 (VecAluOpRRImm5.VsraVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vand.vv` instruction. +(decl rv_vand_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vand_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VandVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vand.vx` instruction. +(decl rv_vand_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vand_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VandVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vand.vi` instruction. +(decl rv_vand_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vand_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VandVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vor.vv` instruction. +(decl rv_vor_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vor_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VorVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vor.vx` instruction. +(decl rv_vor_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vor_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VorVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vor.vi` instruction. +(decl rv_vor_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vor_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VorVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vxor.vv` instruction. +(decl rv_vxor_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vxor_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VxorVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vxor.vx` instruction. +(decl rv_vxor_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vxor_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VxorVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vxor.vi` instruction. +(decl rv_vxor_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vxor_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VxorVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vssrl.vi` instruction. +;; +;; vd[i] = (unsigned(vs2[i]) >> imm) + r +;; +;; `r` here is the rounding mode currently selected. +(decl rv_vssrl_vi (VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vssrl_vi vs2 imm mask vstate) + (vec_alu_rr_uimm5 (VecAluOpRRImm5.VssrlVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vnot.v` instruction. +;; This is just a mnemonic for `vxor.vi vd, vs, -1` +(decl rv_vnot_v (VReg VecOpMasking VState) VReg) +(rule (rv_vnot_v vs2 mask vstate) + (if-let neg1 (imm5_from_i8 -1)) + (rv_vxor_vi vs2 neg1 mask vstate)) + +;; Helper for emitting the `vmax.vv` instruction. +(decl rv_vmax_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmax_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmaxVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmax.vx` instruction. +(decl rv_vmax_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmax_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmaxVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmin.vv` instruction. +(decl rv_vmin_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmin_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VminVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmin.vx` instruction. +(decl rv_vmin_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmin_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VminVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmaxu.vv` instruction. +(decl rv_vmaxu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmaxu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmaxuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmaxu.vx` instruction. +(decl rv_vmaxu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmaxu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmaxuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vminu.vv` instruction. +(decl rv_vminu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vminu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VminuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vminu.vx` instruction. +(decl rv_vminu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vminu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VminuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfadd.vv` instruction. +(decl rv_vfadd_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfadd_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfaddVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfadd.vf` instruction. +(decl rv_vfadd_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfadd_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfaddVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfsub.vv` instruction. +(decl rv_vfsub_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfsub_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfsubVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfsub.vf` instruction. +(decl rv_vfsub_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfsub_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfsubVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfrsub.vf` instruction. +(decl rv_vfrsub_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfrsub_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfrsubVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfmul.vv` instruction. +(decl rv_vfmul_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfmul_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfmulVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfmul.vf` instruction. +(decl rv_vfmul_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfmul_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfmulVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfmacc.vv` instruction. +;; +;; FP multiply-accumulate, overwrites addend +;; # vd[i] = +(vs1[i] * vs2[i]) + vd[i] +(decl rv_vfmacc_vv (VReg VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfmacc_vv vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VfmaccVV) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfmacc.vf` instruction. +;; +;; FP multiply-accumulate, overwrites addend +;; # vd[i] = +(f[rs1] * vs2[i]) + vd[i] +(decl rv_vfmacc_vf (VReg VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfmacc_vf vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VfmaccVF) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfnmacc.vv` instruction. +;; +;; FP negate-(multiply-accumulate), overwrites subtrahend +;; # vd[i] = -(vs1[i] * vs2[i]) - vd[i] +(decl rv_vfnmacc_vv (VReg VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfnmacc_vv vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VfnmaccVV) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfnmacc.vf` instruction. +;; +;; FP negate-(multiply-accumulate), overwrites subtrahend +;; # vd[i] = -(f[rs1] * vs2[i]) - vd[i] +(decl rv_vfnmacc_vf (VReg VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfnmacc_vf vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VfnmaccVF) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfmsac.vv` instruction. +;; +;; FP multiply-subtract-accumulator, overwrites subtrahend +;; # vd[i] = +(vs1[i] * vs2[i]) - vd[i] +(decl rv_vfmsac_vv (VReg VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfmsac_vv vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VfmsacVV) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfmsac.vf` instruction. +;; +;; FP multiply-subtract-accumulator, overwrites subtrahend +;; # vd[i] = +(f[rs1] * vs2[i]) - vd[i] +(decl rv_vfmsac_vf (VReg VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfmsac_vf vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VfmsacVF) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfnmsac.vv` instruction. +;; +;; FP negate-(multiply-subtract-accumulator), overwrites minuend +;; # vd[i] = -(vs1[i] * vs2[i]) + vd[i] +(decl rv_vfnmsac_vv (VReg VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfnmsac_vv vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VfnmsacVV) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfnmsac.vf` instruction. +;; +;; FP negate-(multiply-subtract-accumulator), overwrites minuend +;; # vd[i] = -(f[rs1] * vs2[i]) + vd[i] +(decl rv_vfnmsac_vf (VReg VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfnmsac_vf vd vs2 vs1 mask vstate) + (vec_alu_rrrr (VecAluOpRRRR.VfnmsacVF) vd vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfdiv.vv` instruction. +(decl rv_vfdiv_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfdiv_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfdivVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfdiv.vf` instruction. +(decl rv_vfdiv_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfdiv_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfdivVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfrdiv.vf` instruction. +(decl rv_vfrdiv_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfrdiv_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfrdivVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfmin.vv` instruction. +(decl rv_vfmin_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfmin_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfminVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfmax.vv` instruction. +(decl rv_vfmax_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfmax_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfmaxVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfsgnj.vv` ("Floating Point Sign Injection") instruction. +;; The output of this instruction is `vs2` with the sign bit from `vs1` +(decl rv_vfsgnj_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfsgnj_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfsgnjVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfsgnj.vf` ("Floating Point Sign Injection") instruction. +(decl rv_vfsgnj_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfsgnj_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfsgnjVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfsgnjn.vv` ("Floating Point Sign Injection Negated") instruction. +;; The output of this instruction is `vs2` with the negated sign bit from `vs1` +(decl rv_vfsgnjn_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfsgnjn_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfsgnjnVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfneg.v` instruction. +;; This instruction is a mnemonic for `vfsgnjn.vv vd, vs, vs` +(decl rv_vfneg_v (VReg VecOpMasking VState) VReg) +(rule (rv_vfneg_v vs mask vstate) (rv_vfsgnjn_vv vs vs mask vstate)) + +;; Helper for emitting the `vfsgnjx.vv` ("Floating Point Sign Injection Exclusive") instruction. +;; The output of this instruction is `vs2` with the XOR of the sign bits from `vs2` and `vs1`. +;; When `vs2 == vs1` this implements `fabs` +(decl rv_vfsgnjx_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfsgnjx_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfsgnjxVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfabs.v` instruction. +;; This instruction is a mnemonic for `vfsgnjx.vv vd, vs, vs` +(decl rv_vfabs_v (VReg VecOpMasking VState) VReg) +(rule (rv_vfabs_v vs mask vstate) (rv_vfsgnjx_vv vs vs mask vstate)) + +;; Helper for emitting the `vfsqrt.v` instruction. +;; This instruction splats the F regsiter into all elements of the destination vector. +(decl rv_vfsqrt_v (VReg VecOpMasking VState) VReg) +(rule (rv_vfsqrt_v vs mask vstate) + (vec_alu_rr (VecAluOpRR.VfsqrtV) vs mask vstate)) + +;; Helper for emitting the `vslidedown.vx` instruction. +;; `vslidedown` moves all elements in the vector down by n elements. +;; The top most elements are up to the tail policy. +(decl rv_vslidedown_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vslidedown_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VslidedownVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vslidedown.vi` instruction. +;; Unlike other `vi` instructions the immediate is zero extended. +(decl rv_vslidedown_vi (VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vslidedown_vi vs2 imm mask vstate) + (vec_alu_rr_uimm5 (VecAluOpRRImm5.VslidedownVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vslideup.vi` instruction. +;; Unlike other `vi` instructions the immediate is zero extended. +;; This is implemented as a 2 source operand instruction, since it only +;; partially modifies the destination register. +(decl rv_vslideup_vvi (VReg VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vslideup_vvi vd vs2 imm mask vstate) + (vec_alu_rrr_uimm5 (VecAluOpRRRImm5.VslideupVI) vd vs2 imm mask vstate)) + +;; Helper for emitting the `vmv.x.s` instruction. +;; This instruction copies the first element of the source vector to the destination X register. +;; Masked versions of this instuction are not supported. +(decl rv_vmv_xs (VReg VState) XReg) +(rule (rv_vmv_xs vs vstate) + (vec_alu_rr (VecAluOpRR.VmvXS) vs (unmasked) vstate)) + +;; Helper for emitting the `vfmv.f.s` instruction. +;; This instruction copies the first element of the source vector to the destination F register. +;; Masked versions of this instuction are not supported. +(decl rv_vfmv_fs (VReg VState) FReg) +(rule (rv_vfmv_fs vs vstate) + (vec_alu_rr (VecAluOpRR.VfmvFS) vs (unmasked) vstate)) + +;; Helper for emitting the `vmv.s.x` instruction. +;; This instruction copies the source X register into first element of the source vector. +;; Masked versions of this instuction are not supported. +(decl rv_vmv_sx (XReg VState) VReg) +(rule (rv_vmv_sx vs vstate) + (vec_alu_rr (VecAluOpRR.VmvSX) vs (unmasked) vstate)) + +;; Helper for emitting the `vfmv.s.f` instruction. +;; This instruction copies the source F register into first element of the source vector. +;; Masked versions of this instuction are not supported. +(decl rv_vfmv_sf (FReg VState) VReg) +(rule (rv_vfmv_sf vs vstate) + (vec_alu_rr (VecAluOpRR.VfmvSF) vs (unmasked) vstate)) + +;; Helper for emitting the `vmv.v.x` instruction. +;; This instruction splats the X regsiter into all elements of the destination vector. +;; Masked versions of this instruction are called `vmerge` +(decl rv_vmv_vx (XReg VState) VReg) +(rule (rv_vmv_vx vs vstate) + (vec_alu_rr (VecAluOpRR.VmvVX) vs (unmasked) vstate)) + +;; Helper for emitting the `vfmv.v.f` instruction. +;; This instruction splats the F regsiter into all elements of the destination vector. +;; Masked versions of this instruction are called `vmerge` +(decl rv_vfmv_vf (FReg VState) VReg) +(rule (rv_vfmv_vf vs vstate) + (vec_alu_rr (VecAluOpRR.VfmvVF) vs (unmasked) vstate)) + +;; Helper for emitting the `vmv.v.i` instruction. +;; This instruction splat's the immediate value into all elements of the destination vector. +;; Masked versions of this instruction are called `vmerge` +(decl rv_vmv_vi (Imm5 VState) VReg) +(rule (rv_vmv_vi imm vstate) + (vec_alu_r_imm5 (VecAluOpRImm5.VmvVI) imm (unmasked) vstate)) + +;; Helper for emitting the `vmerge.vvm` instruction. +;; This instruction merges the elements of the two source vectors into the destination vector +;; based on a mask. Elements are taken from the first source vector if the mask bit is clear, +;; and from the second source vector if the mask bit is set. This instruction is always masked. +;; +;; vd[i] = v0.mask[i] ? vs1[i] : vs2[i] +(decl rv_vmerge_vvm (VReg VReg VReg VState) VReg) +(rule (rv_vmerge_vvm vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmergeVVM) vs2 vs1 (masked mask) vstate)) + +;; Helper for emitting the `vmerge.vxm` instruction. +;; Elements are taken from the first source vector if the mask bit is clear, and from the X +;; register if the mask bit is set. This instruction is always masked. +;; +;; vd[i] = v0.mask[i] ? x[rs1] : vs2[i] +(decl rv_vmerge_vxm (VReg XReg VReg VState) VReg) +(rule (rv_vmerge_vxm vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmergeVXM) vs2 vs1 (masked mask) vstate)) + +;; Helper for emitting the `vfmerge.vfm` instruction. +;; Elements are taken from the first source vector if the mask bit is clear, and from the F +;; register if the mask bit is set. This instruction is always masked. +;; +;; vd[i] = v0.mask[i] ? f[rs1] : vs2[i] +(decl rv_vfmerge_vfm (VReg FReg VReg VState) VReg) +(rule (rv_vfmerge_vfm vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfmergeVFM) vs2 vs1 (masked mask) vstate)) + +;; Helper for emitting the `vmerge.vim` instruction. +;; Elements are taken from the first source vector if the mask bit is clear, and from the +;; immediate value if the mask bit is set. This instruction is always masked. +;; +;; vd[i] = v0.mask[i] ? imm : vs2[i] +(decl rv_vmerge_vim (VReg Imm5 VReg VState) VReg) +(rule (rv_vmerge_vim vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmergeVIM) vs2 imm (masked mask) vstate)) + + +;; Helper for emitting the `vredminu.vs` instruction. +;; +;; vd[0] = minu( vs1[0] , vs2[*] ) +(decl rv_vredminu_vs (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vredminu_vs vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VredminuVS) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vredmaxu.vs` instruction. +;; +;; vd[0] = maxu( vs1[0] , vs2[*] ) +(decl rv_vredmaxu_vs (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vredmaxu_vs vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VredmaxuVS) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vrgather.vv` instruction. +;; +;; vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; +(decl rv_vrgather_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vrgather_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VrgatherVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vrgather.vx` instruction. +;; +;; vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[x[rs1]] +(decl rv_vrgather_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vrgather_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VrgatherVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vrgather.vi` instruction. +(decl rv_vrgather_vi (VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vrgather_vi vs2 imm mask vstate) + (vec_alu_rr_uimm5 (VecAluOpRRImm5.VrgatherVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vcompress.vm` instruction. +;; +;; The vector compress instruction allows elements selected by a vector mask +;; register from a source vector register group to be packed into contiguous +;; elements at the start of the destination vector register group. +;; +;; The mask register is specified through vs1 +(decl rv_vcompress_vm (VReg VReg VState) VReg) +(rule (rv_vcompress_vm vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.VcompressVM) vs2 vs1 (unmasked) vstate)) + +;; Helper for emitting the `vmseq.vv` (Vector Mask Set If Equal) instruction. +(decl rv_vmseq_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmseq_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmseqVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmseq.vx` (Vector Mask Set If Equal) instruction. +(decl rv_vmseq_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmseq_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmseqVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmseq.vi` (Vector Mask Set If Equal) instruction. +(decl rv_vmseq_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmseq_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmseqVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsne.vv` (Vector Mask Set If Not Equal) instruction. +(decl rv_vmsne_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsne_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsneVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsne.vx` (Vector Mask Set If Not Equal) instruction. +(decl rv_vmsne_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsne_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsneVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsne.vi` (Vector Mask Set If Not Equal) instruction. +(decl rv_vmsne_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsne_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsneVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsltu.vv` (Vector Mask Set If Less Than, Unsigned) instruction. +(decl rv_vmsltu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsltu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsltuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsltu.vx` (Vector Mask Set If Less Than, Unsigned) instruction. +(decl rv_vmsltu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsltu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsltuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmslt.vv` (Vector Mask Set If Less Than) instruction. +(decl rv_vmslt_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmslt_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsltVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmslt.vx` (Vector Mask Set If Less Than) instruction. +(decl rv_vmslt_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmslt_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsltVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsleu.vv` (Vector Mask Set If Less Than or Equal, Unsigned) instruction. +(decl rv_vmsleu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsleu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsleuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsleu.vx` (Vector Mask Set If Less Than or Equal, Unsigned) instruction. +(decl rv_vmsleu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsleu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsleuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsleu.vi` (Vector Mask Set If Less Than or Equal, Unsigned) instruction. +(decl rv_vmsleu_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsleu_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsleuVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsle.vv` (Vector Mask Set If Less Than or Equal) instruction. +(decl rv_vmsle_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsle_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsleVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsle.vx` (Vector Mask Set If Less Than or Equal) instruction. +(decl rv_vmsle_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsle_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsleVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsle.vi` (Vector Mask Set If Less Than or Equal) instruction. +(decl rv_vmsle_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsle_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsleVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsgt.vv` (Vector Mask Set If Greater Than, Unsigned) instruction. +;; This is an alias for `vmsltu.vv` with the operands inverted. +(decl rv_vmsgtu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsgtu_vv vs2 vs1 mask vstate) (rv_vmsltu_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmsgtu.vx` (Vector Mask Set If Greater Than, Unsigned) instruction. +(decl rv_vmsgtu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsgtu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsgtuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsgtu.vi` (Vector Mask Set If Greater Than, Unsigned) instruction. +(decl rv_vmsgtu_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsgtu_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsgtuVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsgt.vv` (Vector Mask Set If Greater Than) instruction. +;; This is an alias for `vmslt.vv` with the operands inverted. +(decl rv_vmsgt_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsgt_vv vs2 vs1 mask vstate) (rv_vmslt_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmsgt.vx` (Vector Mask Set If Greater Than) instruction. +(decl rv_vmsgt_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsgt_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsgtVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsgt.vi` (Vector Mask Set If Greater Than) instruction. +(decl rv_vmsgt_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsgt_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsgtVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsgeu.vv` (Vector Mask Set If Greater Than or Equal, Unsigned) instruction. +;; This is an alias for `vmsleu.vv` with the operands inverted. +(decl rv_vmsgeu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsgeu_vv vs2 vs1 mask vstate) (rv_vmsleu_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmsge.vv` (Vector Mask Set If Greater Than or Equal) instruction. +;; This is an alias for `vmsle.vv` with the operands inverted. +(decl rv_vmsge_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsge_vv vs2 vs1 mask vstate) (rv_vmsle_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmfeq.vv` (Vector Mask Set If Float Equal) instruction. +(decl rv_vmfeq_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmfeq_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfeqVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfeq.vf` (Vector Mask Set If Float Equal) instruction. +(decl rv_vmfeq_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmfeq_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfeqVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfne.vv` (Vector Mask Set If Float Not Equal) instruction. +(decl rv_vmfne_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmfne_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfneVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfne.vf` (Vector Mask Set If Float Not Equal) instruction. +(decl rv_vmfne_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmfne_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfneVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmflt.vv` (Vector Mask Set If Float Less Than) instruction. +(decl rv_vmflt_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmflt_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfltVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmflt.vf` (Vector Mask Set If Float Less Than) instruction. +(decl rv_vmflt_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmflt_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfltVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfle.vv` (Vector Mask Set If Float Less Than Or Equal) instruction. +(decl rv_vmfle_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmfle_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfleVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfle.vf` (Vector Mask Set If Float Less Than Or Equal) instruction. +(decl rv_vmfle_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmfle_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfleVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfgt.vv` (Vector Mask Set If Float Greater Than) instruction. +;; This is an alias for `vmflt.vv` with the operands inverted. +(decl rv_vmfgt_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmfgt_vv vs2 vs1 mask vstate) (rv_vmflt_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmfgt.vf` (Vector Mask Set If Float Greater Than) instruction. +(decl rv_vmfgt_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmfgt_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfgtVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfge.vv` (Vector Mask Set If Float Greater Than Or Equal) instruction. +;; This is an alias for `vmfle.vv` with the operands inverted. +(decl rv_vmfge_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmfge_vv vs2 vs1 mask vstate) (rv_vmfle_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmfge.vf` (Vector Mask Set If Float Greater Than Or Equal) instruction. +(decl rv_vmfge_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmfge_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfgeVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vzext.vf2` instruction. +;; Zero-extend SEW/2 source to SEW destination +(decl rv_vzext_vf2 (VReg VecOpMasking VState) VReg) +(rule (rv_vzext_vf2 vs mask vstate) + (vec_alu_rr (VecAluOpRR.VzextVF2) vs mask vstate)) + +;; Helper for emitting the `vzext.vf4` instruction. +;; Zero-extend SEW/4 source to SEW destination +(decl rv_vzext_vf4 (VReg VecOpMasking VState) VReg) +(rule (rv_vzext_vf4 vs mask vstate) + (vec_alu_rr (VecAluOpRR.VzextVF4) vs mask vstate)) + +;; Helper for emitting the `vzext.vf8` instruction. +;; Zero-extend SEW/8 source to SEW destination +(decl rv_vzext_vf8 (VReg VecOpMasking VState) VReg) +(rule (rv_vzext_vf8 vs mask vstate) + (vec_alu_rr (VecAluOpRR.VzextVF8) vs mask vstate)) + +;; Helper for emitting the `vsext.vf2` instruction. +;; Sign-extend SEW/2 source to SEW destination +(decl rv_vsext_vf2 (VReg VecOpMasking VState) VReg) +(rule (rv_vsext_vf2 vs mask vstate) + (vec_alu_rr (VecAluOpRR.VsextVF2) vs mask vstate)) + +;; Helper for emitting the `vsext.vf4` instruction. +;; Sign-extend SEW/4 source to SEW destination +(decl rv_vsext_vf4 (VReg VecOpMasking VState) VReg) +(rule (rv_vsext_vf4 vs mask vstate) + (vec_alu_rr (VecAluOpRR.VsextVF4) vs mask vstate)) + +;; Helper for emitting the `vsext.vf8` instruction. +;; Sign-extend SEW/8 source to SEW destination +(decl rv_vsext_vf8 (VReg VecOpMasking VState) VReg) +(rule (rv_vsext_vf8 vs mask vstate) + (vec_alu_rr (VecAluOpRR.VsextVF8) vs mask vstate)) + +;; Helper for emitting the `vnclip.wi` instruction. +;; +;; vd[i] = clip(roundoff_signed(vs2[i], uimm)) +(decl rv_vnclip_wi (VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vnclip_wi vs2 imm mask vstate) + (vec_alu_rr_uimm5 (VecAluOpRRImm5.VnclipWI) vs2 imm mask vstate)) + +;; Helper for emitting the `vnclipu.wi` instruction. +;; +;; vd[i] = clip(roundoff_unsigned(vs2[i], uimm)) +(decl rv_vnclipu_wi (VReg UImm5 VecOpMasking VState) VReg) +(rule (rv_vnclipu_wi vs2 imm mask vstate) + (vec_alu_rr_uimm5 (VecAluOpRRImm5.VnclipuWI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmand.mm` (Mask Bitwise AND) instruction. +;; +;; vd.mask[i] = vs2.mask[i] && vs1.mask[i] +(decl rv_vmand_mm (VReg VReg VState) VReg) +(rule (rv_vmand_mm vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.VmandMM) vs2 vs1 (unmasked) vstate)) + +;; Helper for emitting the `vmor.mm` (Mask Bitwise OR) instruction. +;; +;; vd.mask[i] = vs2.mask[i] || vs1.mask[i] +(decl rv_vmor_mm (VReg VReg VState) VReg) +(rule (rv_vmor_mm vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.VmorMM) vs2 vs1 (unmasked) vstate)) + +;; Helper for emitting the `vmnand.mm` (Mask Bitwise NAND) instruction. +;; +;; vd.mask[i] = !(vs2.mask[i] && vs1.mask[i]) +(decl rv_vmnand_mm (VReg VReg VState) VReg) +(rule (rv_vmnand_mm vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.VmnandMM) vs2 vs1 (unmasked) vstate)) + +;; Helper for emitting the `vmnot.m` (Mask Bitwise NOT) instruction. +;; This is an alias for `vmnand.mm vd, vs, vs` +;; +;; vd.mask[i] = !vs.mask[i] +(decl rv_vmnot_m (VReg VState) VReg) +(rule (rv_vmnot_m vs vstate) (rv_vmnand_mm vs vs vstate)) + +;; Helper for emitting the `vmnor.mm` (Mask Bitwise NOR) instruction. +;; +;; vd.mask[i] = !(vs2.mask[i] || vs1.mask[i]) +(decl rv_vmnor_mm (VReg VReg VState) VReg) +(rule (rv_vmnor_mm vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.VmnorMM) vs2 vs1 (unmasked) vstate)) + +;;;; Multi-Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl gen_extractlane (Type VReg u8) Reg) + +;; When extracting lane 0 for floats, we can use `vfmv.f.s` directly. +(rule 3 (gen_extractlane (ty_vec_fits_in_register ty) src 0) + (if (ty_vector_float ty)) + (rv_vfmv_fs src ty)) + +;; When extracting lane 0 for integers, we can use `vmv.x.s` directly. +(rule 2 (gen_extractlane (ty_vec_fits_in_register ty) src 0) + (if (ty_vector_not_float ty)) + (rv_vmv_xs src ty)) + +;; In the general case, we must first use a `vslidedown` to place the correct lane +;; in index 0, and then use the appropriate `vmv` instruction. +;; If the index fits into a 5-bit immediate, we can emit a `vslidedown.vi`. +(rule 1 (gen_extractlane (ty_vec_fits_in_register ty) src (uimm5_from_u8 idx)) + (gen_extractlane ty (rv_vslidedown_vi src idx (unmasked) ty) 0)) + +;; Otherwise lower it into an X register. +(rule 0 (gen_extractlane (ty_vec_fits_in_register ty) src idx) + (gen_extractlane ty (rv_vslidedown_vx src (imm $I64 idx) (unmasked) ty) 0)) + + +;; Build a vector mask from a u64 +;; TODO(#6571): We should merge this with the `vconst` rules, and take advantage of +;; the other existing `vconst` rules. +(decl gen_vec_mask (u64) VReg) + +;; When the immediate fits in a 5-bit immediate, we can use `vmv.v.i` directly. +(rule 1 (gen_vec_mask (imm5_from_u64 imm)) + (rv_vmv_vi imm (vstate_from_type $I64X2))) + +;; Materialize the mask into an X register, and move it into the bottom of +;; the vector register. +(rule 0 (gen_vec_mask mask) + (rv_vmv_sx (imm $I64 mask) (vstate_from_type $I64X2))) + + +;; Loads a `VCodeConstant` value into a vector register. For some special `VCodeConstant`s +;; we can use a dedicated instruction, otherwise we load the value from the pool. +;; +;; Type is the preferred type to use when loading the constant. +(decl gen_constant (Type VCodeConstant) VReg) + +;; The fallback case is to load the constant from the pool. +(rule (gen_constant ty n) + (vec_load + (element_width_from_type ty) + (VecAMode.UnitStride (gen_const_amode n)) + (mem_flags_trusted) + (unmasked) + ty)) + + +;; Emits a vslidedown instruction that moves half the lanes down. +(decl gen_slidedown_half (Type VReg) VReg) + +;; If the lane count can fit in a 5-bit immediate, we can use `vslidedown.vi`. +(rule 1 (gen_slidedown_half (ty_vec_fits_in_register ty) src) + (if-let (uimm5_from_u64 amt) (u64_udiv (ty_lane_count ty) 2)) + (rv_vslidedown_vi src amt (unmasked) ty)) + +;; Otherwise lower it into an X register. +(rule 0 (gen_slidedown_half (ty_vec_fits_in_register ty) src) + (if-let amt (u64_udiv (ty_lane_count ty) 2)) + (rv_vslidedown_vx src (imm $I64 amt) (unmasked) ty)) + + +;; Expands a mask into SEW wide lanes. Enabled lanes are set to all ones, disabled +;; lanes are set to all zeros. +(decl gen_expand_mask (Type VReg) VReg) +(rule (gen_expand_mask ty mask) + (if-let zero (imm5_from_i8 0)) + (if-let neg1 (imm5_from_i8 -1)) + (rv_vmerge_vim (rv_vmv_vi zero ty) neg1 mask ty)) + + +;; Builds a vector mask corresponding to the IntCC operation. +;; TODO: We are still missing some rules here for immediates. See #6623 +(decl gen_icmp_mask (Type IntCC Value Value) VReg) + +;; IntCC.Equal + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x y) + (rv_vmseq_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x (splat y)) + (rv_vmseq_vx x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) (splat x) y) + (rv_vmseq_vx y x (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x (replicated_imm5 y)) + (rv_vmseq_vi x y (unmasked) ty)) + +(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) (replicated_imm5 x) y) + (rv_vmseq_vi y x (unmasked) ty)) + +;; IntCC.NotEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x y) + (rv_vmsne_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x (splat y)) + (rv_vmsne_vx x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) (splat x) y) + (rv_vmsne_vx y x (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x (replicated_imm5 y)) + (rv_vmsne_vi x y (unmasked) ty)) + +(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) (replicated_imm5 x) y) + (rv_vmsne_vi y x (unmasked) ty)) + +;; IntCC.UnsignedLessThan + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) x y) + (rv_vmsltu_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) x (splat y)) + (rv_vmsltu_vx x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) (splat x) y) + (rv_vmsgtu_vx y x (unmasked) ty)) + +(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) (replicated_imm5 x) y) + (rv_vmsgtu_vi y x (unmasked) ty)) + +;; IntCC.SignedLessThan + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) x y) + (rv_vmslt_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) x (splat y)) + (rv_vmslt_vx x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) (splat x) y) + (rv_vmsgt_vx y x (unmasked) ty)) + +(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) (replicated_imm5 x) y) + (rv_vmsgt_vi y x (unmasked) ty)) + +;; IntCC.UnsignedLessThanOrEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x y) + (rv_vmsleu_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x (splat y)) + (rv_vmsleu_vx x y (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x (replicated_imm5 y)) + (rv_vmsleu_vi x y (unmasked) ty)) + +;; IntCC.SignedLessThanOrEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x y) + (rv_vmsle_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x (splat y)) + (rv_vmsle_vx x y (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x (replicated_imm5 y)) + (rv_vmsle_vi x y (unmasked) ty)) + +;; IntCC.UnsignedGreaterThan + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x y) + (rv_vmsgtu_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x (splat y)) + (rv_vmsgtu_vx x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) (splat x) y) + (rv_vmsltu_vx y x (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x (replicated_imm5 y)) + (rv_vmsgtu_vi x y (unmasked) ty)) + +;; IntCC.SignedGreaterThan + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x y) + (rv_vmsgt_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x (splat y)) + (rv_vmsgt_vx x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) (splat x) y) + (rv_vmslt_vx y x (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x (replicated_imm5 y)) + (rv_vmsgt_vi x y (unmasked) ty)) + +;; IntCC.UnsignedGreaterThanOrEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThanOrEqual) x y) + (rv_vmsgeu_vv x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThanOrEqual) (splat x) y) + (rv_vmsleu_vx y x (unmasked) ty)) + +(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThanOrEqual) (replicated_imm5 x) y) + (rv_vmsleu_vi y x (unmasked) ty)) + +;; IntCC.SignedGreaterThanOrEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) x y) + (rv_vmsge_vv x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) (splat x) y) + (rv_vmsle_vx y x (unmasked) ty)) + +(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) (replicated_imm5 x) y) + (rv_vmsle_vi y x (unmasked) ty)) + + + +;; Builds a vector mask corresponding to the FloatCC operation. +(decl gen_fcmp_mask (Type FloatCC Value Value) VReg) + +;; FloatCC.Equal + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) x y) + (rv_vmfeq_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) x (splat y)) + (rv_vmfeq_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) (splat x) y) + (rv_vmfeq_vf y x (unmasked) ty)) + +;; FloatCC.NotEqual +;; Note: This is UnorderedNotEqual. It is the only unoredered comparison that is not named as such. + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) x y) + (rv_vmfne_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) x (splat y)) + (rv_vmfne_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) (splat x) y) + (rv_vmfne_vf y x (unmasked) ty)) + +;; FloatCC.LessThan + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) x y) + (rv_vmflt_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) x (splat y)) + (rv_vmflt_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) (splat x) y) + (rv_vmfgt_vf y x (unmasked) ty)) + +;; FloatCC.LessThanOrEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) x y) + (rv_vmfle_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) x (splat y)) + (rv_vmfle_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) (splat x) y) + (rv_vmfge_vf y x (unmasked) ty)) + +;; FloatCC.GreaterThan + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) x y) + (rv_vmfgt_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) x (splat y)) + (rv_vmfgt_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) (splat x) y) + (rv_vmflt_vf y x (unmasked) ty)) + +;; FloatCC.GreaterThanOrEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) x y) + (rv_vmfge_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) x (splat y)) + (rv_vmfge_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) (splat x) y) + (rv_vmfle_vf y x (unmasked) ty)) + +;; FloatCC.Ordered + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Ordered) x y) + (rv_vmand_mm + (gen_fcmp_mask ty (FloatCC.Equal) x x) + (gen_fcmp_mask ty (FloatCC.Equal) y y) + ty)) + +;; FloatCC.Unordered + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Unordered) x y) + (rv_vmor_mm + (gen_fcmp_mask ty (FloatCC.NotEqual) x x) + (gen_fcmp_mask ty (FloatCC.NotEqual) y y) + ty)) + +;; FloatCC.OrderedNotEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.OrderedNotEqual) x y) + (rv_vmor_mm + (gen_fcmp_mask ty (FloatCC.LessThan) x y) + (gen_fcmp_mask ty (FloatCC.LessThan) y x) + ty)) + +;; FloatCC.UnorderedOrEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrEqual) x y) + (rv_vmnor_mm + (gen_fcmp_mask ty (FloatCC.LessThan) x y) + (gen_fcmp_mask ty (FloatCC.LessThan) y x) + ty)) + +;; FloatCC.UnorderedOrGreaterThan + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrGreaterThan) x y) + (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.LessThanOrEqual) x y) ty)) + +;; FloatCC.UnorderedOrGreaterThanOrEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrGreaterThanOrEqual) x y) + (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.LessThan) x y) ty)) + +;; FloatCC.UnorderedOrLessThan + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrLessThan) x y) + (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.GreaterThanOrEqual) x y) ty)) + +;; FloatCC.UnorderedOrLessThanOrEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrLessThanOrEqual) x y) + (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.GreaterThan) x y) ty)) diff --git a/cranelift/codegen/src/isa/zkasm/lower.isle b/cranelift/codegen/src/isa/zkasm/lower.isle new file mode 100644 index 000000000000..9e466c624fd2 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/lower.isle @@ -0,0 +1,2082 @@ +;; zkasm instruction selection and CLIF-to-MachInst lowering. + +;; The main lowering constructor term: takes a clif `Inst` and returns the +;; register(s) within which the lowered instruction's result values live. +(decl partial lower (Inst) InstOutput) + +;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty (iconst (u64_from_imm64 n)))) + (imm ty n)) + +;; ;;;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (ty_vec_fits_in_register ty) (vconst n))) + (gen_constant ty (const_to_vconst n))) + +;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (f32const (u32_from_ieee32 n))) + (imm $F32 n)) + +;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (f64const (u64_from_ieee64 n))) + (imm $F64 n)) + +;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty (null))) + (imm ty 0)) + + +;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Base case, simply adding things in registers. +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x y))) + (rv_add x y)) + +;; Special cases for when one operand is an immediate that fits in 12 bits. +(rule 1 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x (imm12_from_value y)))) + (alu_rr_imm12 (select_addi ty) x y)) + +(rule 2 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd (imm12_from_value x) y))) + (alu_rr_imm12 (select_addi ty) y x)) + +;; Special case when one of the operands is uextended +;; Needs `Zba` +(rule 3 (lower (has_type $I64 (iadd x (uextend y @ (value_type $I32))))) + (if-let $true (has_zba)) + (rv_adduw y x)) + +(rule 4 (lower (has_type $I64 (iadd (uextend x @ (value_type $I32)) y))) + (if-let $true (has_zba)) + (rv_adduw x y)) + +;; Add with const shift. We have a few of these instructions with `Zba`. +(decl pure partial match_shnadd (Imm64) AluOPRRR) +(rule (match_shnadd (u64_from_imm64 1)) (AluOPRRR.Sh1add)) +(rule (match_shnadd (u64_from_imm64 2)) (AluOPRRR.Sh2add)) +(rule (match_shnadd (u64_from_imm64 3)) (AluOPRRR.Sh3add)) + +(rule 3 (lower (has_type $I64 (iadd x (ishl y (maybe_uextend (iconst n)))))) + (if-let $true (has_zba)) + (if-let shnadd (match_shnadd n)) + (alu_rrr shnadd y x)) + +(rule 4 (lower (has_type $I64 (iadd (ishl x (maybe_uextend (iconst n))) y))) + (if-let $true (has_zba)) + (if-let shnadd (match_shnadd n)) + (alu_rrr shnadd x y)) + + +;; Add with uextended const shift. We have a few of these instructions with `Zba`. +;; +;; !!! Important !!! +;; These rules only work for (ishl (uextend _) _) and not for (uextend (ishl _ _))! +;; Getting this wrong means a potential misscalculation of the shift amount. +;; Additionaly we can only ensure that this is correct if the uextend is 32 to 64 bits. +(decl pure partial match_shnadd_uw (Imm64) AluOPRRR) +(rule (match_shnadd_uw (u64_from_imm64 1)) (AluOPRRR.Sh1adduw)) +(rule (match_shnadd_uw (u64_from_imm64 2)) (AluOPRRR.Sh2adduw)) +(rule (match_shnadd_uw (u64_from_imm64 3)) (AluOPRRR.Sh3adduw)) + +(rule 5 (lower (has_type $I64 (iadd x (ishl (uextend y @ (value_type $I32)) (maybe_uextend (iconst n)))))) + (if-let $true (has_zba)) + (if-let shnadd_uw (match_shnadd_uw n)) + (alu_rrr shnadd_uw y x)) + +(rule 6 (lower (has_type $I64 (iadd (ishl (uextend x @ (value_type $I32)) (maybe_uextend (iconst n))) y))) + (if-let $true (has_zba)) + (if-let shnadd_uw (match_shnadd_uw n)) + (alu_rrr shnadd_uw x y)) + +;; I128 cases +(rule 7 (lower (has_type $I128 (iadd x y))) + (let ((low XReg (rv_add (value_regs_get x 0) (value_regs_get y 0))) + ;; compute carry. + (carry XReg (rv_sltu low (value_regs_get y 0))) + ;; + (high_tmp XReg (rv_add (value_regs_get x 1) (value_regs_get y 1))) + ;; add carry. + (high XReg (rv_add high_tmp carry))) + (value_regs low high))) + +;; SIMD Vectors +(rule 8 (lower (has_type (ty_vec_fits_in_register ty) (iadd x y))) + (rv_vadd_vv x y (unmasked) ty)) + +(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (splat y)))) + (rv_vadd_vx x y (unmasked) ty)) + +(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (splat (sextend y @ (value_type sext_ty)))))) + (if-let half_ty (ty_half_width ty)) + (if-let $true (ty_equal (lane_type half_ty) sext_ty)) + (rv_vwadd_wx x y (unmasked) (vstate_mf2 half_ty))) + +(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (splat (uextend y @ (value_type uext_ty)))))) + (if-let half_ty (ty_half_width ty)) + (if-let $true (ty_equal (lane_type half_ty) uext_ty)) + (rv_vwaddu_wx x y (unmasked) (vstate_mf2 half_ty))) + +(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (replicated_imm5 y)))) + (rv_vadd_vi x y (unmasked) ty)) + + +(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (iadd (splat x) y))) + (rv_vadd_vx y x (unmasked) ty)) + +(rule 13 (lower (has_type (ty_vec_fits_in_register ty) (iadd (splat (sextend x @ (value_type sext_ty))) y))) + (if-let half_ty (ty_half_width ty)) + (if-let $true (ty_equal (lane_type half_ty) sext_ty)) + (rv_vwadd_wx y x (unmasked) (vstate_mf2 half_ty))) + +(rule 13 (lower (has_type (ty_vec_fits_in_register ty) (iadd (splat (uextend x @ (value_type uext_ty))) y))) + (if-let half_ty (ty_half_width ty)) + (if-let $true (ty_equal (lane_type half_ty) uext_ty)) + (rv_vwaddu_wx y x (unmasked) (vstate_mf2 half_ty))) + +(rule 14 (lower (has_type (ty_vec_fits_in_register ty) (iadd (replicated_imm5 x) y))) + (rv_vadd_vi y x (unmasked) ty)) + +;; Signed Widening Low Additions + +(rule 9 (lower (has_type (ty_vec_fits_in_register _) (iadd x (swiden_low y @ (value_type in_ty))))) + (rv_vwadd_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 12 (lower (has_type (ty_vec_fits_in_register _) (iadd (swiden_low x @ (value_type in_ty)) y))) + (rv_vwadd_wv y x (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (swiden_low x @ (value_type in_ty)) + (swiden_low y)))) + (rv_vwadd_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (swiden_low x @ (value_type in_ty)) + (splat (sextend y @ (value_type sext_ty)))))) + (if-let $true (ty_equal (lane_type in_ty) sext_ty)) + (rv_vwadd_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 14 (lower (has_type (ty_vec_fits_in_register _) (iadd (splat (sextend x @ (value_type sext_ty))) + (swiden_low y @ (value_type in_ty))))) + (if-let $true (ty_equal (lane_type in_ty) sext_ty)) + (rv_vwadd_vx y x (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +;; Signed Widening High Additions +;; These are the same as the low additions, but we first slide down the inputs. + +(rule 9 (lower (has_type (ty_vec_fits_in_register _) (iadd x (swiden_high y @ (value_type in_ty))))) + (rv_vwadd_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 12 (lower (has_type (ty_vec_fits_in_register _) (iadd (swiden_high x @ (value_type in_ty)) y))) + (rv_vwadd_wv y (gen_slidedown_half in_ty x) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (swiden_high x @ (value_type in_ty)) + (swiden_high y)))) + (rv_vwadd_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (swiden_high x @ (value_type in_ty)) + (splat (sextend y @ (value_type sext_ty)))))) + (if-let $true (ty_equal (lane_type in_ty) sext_ty)) + (rv_vwadd_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 14 (lower (has_type (ty_vec_fits_in_register _) (iadd (splat (sextend x @ (value_type sext_ty))) + (swiden_high y @ (value_type in_ty))))) + (if-let $true (ty_equal (lane_type in_ty) sext_ty)) + (rv_vwadd_vx (gen_slidedown_half in_ty y) x (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +;; Unsigned Widening Low Additions + +(rule 9 (lower (has_type (ty_vec_fits_in_register _) (iadd x (uwiden_low y @ (value_type in_ty))))) + (rv_vwaddu_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 12 (lower (has_type (ty_vec_fits_in_register _) (iadd (uwiden_low x @ (value_type in_ty)) y))) + (rv_vwaddu_wv y x (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (uwiden_low x @ (value_type in_ty)) + (uwiden_low y)))) + (rv_vwaddu_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (uwiden_low x @ (value_type in_ty)) + (splat (uextend y @ (value_type uext_ty)))))) + (if-let $true (ty_equal (lane_type in_ty) uext_ty)) + (rv_vwaddu_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 14 (lower (has_type (ty_vec_fits_in_register _) (iadd (splat (uextend x @ (value_type uext_ty))) + (uwiden_low y @ (value_type in_ty))))) + (if-let $true (ty_equal (lane_type in_ty) uext_ty)) + (rv_vwaddu_vx y x (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +;; Unsigned Widening High Additions +;; These are the same as the low additions, but we first slide down the inputs. + +(rule 9 (lower (has_type (ty_vec_fits_in_register _) (iadd x (uwiden_high y @ (value_type in_ty))))) + (rv_vwaddu_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 12 (lower (has_type (ty_vec_fits_in_register _) (iadd (uwiden_high x @ (value_type in_ty)) y))) + (rv_vwaddu_wv y (gen_slidedown_half in_ty x) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (uwiden_high x @ (value_type in_ty)) + (uwiden_high y)))) + (rv_vwaddu_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (uwiden_high x @ (value_type in_ty)) + (splat (uextend y @ (value_type uext_ty)))))) + (if-let $true (ty_equal (lane_type in_ty) uext_ty)) + (rv_vwaddu_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 14 (lower (has_type (ty_vec_fits_in_register _) (iadd (splat (uextend y @ (value_type uext_ty))) + (uwiden_high x @ (value_type in_ty))))) + (if-let $true (ty_equal (lane_type in_ty) uext_ty)) + (rv_vwaddu_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +;; Signed Widening Mixed High/Low Additions + +(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (swiden_low x @ (value_type in_ty)) + (swiden_high y)))) + (rv_vwadd_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (swiden_high x @ (value_type in_ty)) + (swiden_low y)))) + (rv_vwadd_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +;; Unsigned Widening Mixed High/Low Additions + +(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (uwiden_low x @ (value_type in_ty)) + (uwiden_high y)))) + (rv_vwaddu_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (uwiden_high x @ (value_type in_ty)) + (uwiden_low y)))) + (rv_vwaddu_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +;; Fused Multiply Accumulate Rules `vmacc` +;; +;; I dont think we can use `vmadd`/`vmnsub` here since it just modifies the multiplication +;; register instead of the addition one. The actual pattern matched seems to be +;; exactly the same. + +(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (imul y z)))) + (rv_vmacc_vv x y z (unmasked) ty)) + +(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (imul y (splat z))))) + (rv_vmacc_vx x y z (unmasked) ty)) + +(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (imul (splat y) z)))) + (rv_vmacc_vx x z y (unmasked) ty)) + +(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (iadd (imul x y) z))) + (rv_vmacc_vv z x y (unmasked) ty)) + +(rule 13 (lower (has_type (ty_vec_fits_in_register ty) (iadd (imul x (splat y)) z))) + (rv_vmacc_vx z x y (unmasked) ty)) + +(rule 14 (lower (has_type (ty_vec_fits_in_register ty) (iadd (imul (splat x) y) z))) + (rv_vmacc_vx z y x (unmasked) ty)) + +;; Fused Multiply Subtract Rules `vnmsac` + +(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (ineg (imul y z))))) + (rv_vnmsac_vv x y z (unmasked) ty)) + +(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (ineg (imul y (splat z)))))) + (rv_vnmsac_vx x y z (unmasked) ty)) + +(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (ineg (imul (splat y) z))))) + (rv_vnmsac_vx x z y (unmasked) ty)) + +(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (iadd (ineg (imul x y)) z))) + (rv_vnmsac_vv z x y (unmasked) ty)) + +(rule 13 (lower (has_type (ty_vec_fits_in_register ty) (iadd (ineg (imul x (splat y))) z))) + (rv_vnmsac_vx z x y (unmasked) ty)) + +(rule 14 (lower (has_type (ty_vec_fits_in_register ty) (iadd (ineg (imul (splat x) y)) z))) + (rv_vnmsac_vx z y x (unmasked) ty)) + +;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;; +(rule + (lower (has_type (fits_in_64 ty) (uadd_overflow_trap x y tc))) + (let ((res ValueRegs (lower_uadd_overflow x y ty)) + (_ InstOutput (gen_trapif (value_regs_get res 1) tc))) + (value_regs_get res 0))) + + +;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Base case, simply subtracting things in registers. + +(rule (lower (has_type (ty_int_ref_scalar_64 ty) (isub x y))) + (rv_sub x y)) + +(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (isub x y))) + (rv_subw x y)) + +(rule 2 (lower (has_type $I128 (isub x y))) + (i128_sub x y)) + +;; SIMD Vectors +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (isub x y))) + (rv_vsub_vv x y (unmasked) ty)) + +(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (isub x (splat y)))) + (rv_vsub_vx x y (unmasked) ty)) + +(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (isub x (splat (sextend y @ (value_type sext_ty)))))) + (if-let half_ty (ty_half_width ty)) + (if-let $true (ty_equal (lane_type half_ty) sext_ty)) + (rv_vwsub_wx x y (unmasked) (vstate_mf2 half_ty))) + +(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (isub x (splat (uextend y @ (value_type uext_ty)))))) + (if-let half_ty (ty_half_width ty)) + (if-let $true (ty_equal (lane_type half_ty) uext_ty)) + (rv_vwsubu_wx x y (unmasked) (vstate_mf2 half_ty))) + +(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (isub (splat x) y))) + (rv_vrsub_vx y x (unmasked) ty)) + +(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (isub (replicated_imm5 x) y))) + (rv_vrsub_vi y x (unmasked) ty)) + + +;; Signed Widening Low Subtractions + +(rule 5 (lower (has_type (ty_vec_fits_in_register _) (isub x (swiden_low y @ (value_type in_ty))))) + (rv_vwsub_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_low x @ (value_type in_ty)) + (swiden_low y)))) + (rv_vwsub_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_low x @ (value_type in_ty)) + (splat (sextend y @ (value_type sext_ty)))))) + (if-let $true (ty_equal (lane_type in_ty) sext_ty)) + (rv_vwsub_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +;; Signed Widening High Subtractions +;; These are the same as the low widenings, but we first slide down the inputs. + +(rule 5 (lower (has_type (ty_vec_fits_in_register _) (isub x (swiden_high y @ (value_type in_ty))))) + (rv_vwsub_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_high x @ (value_type in_ty)) + (swiden_high y)))) + (rv_vwsub_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_high x @ (value_type in_ty)) + (splat (sextend y @ (value_type sext_ty)))))) + (if-let $true (ty_equal (lane_type in_ty) sext_ty)) + (rv_vwsub_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +;; Unsigned Widening Low Subtractions + +(rule 5 (lower (has_type (ty_vec_fits_in_register _) (isub x (uwiden_low y @ (value_type in_ty))))) + (rv_vwsubu_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_low x @ (value_type in_ty)) + (uwiden_low y)))) + (rv_vwsubu_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_low x @ (value_type in_ty)) + (splat (uextend y @ (value_type uext_ty)))))) + (if-let $true (ty_equal (lane_type in_ty) uext_ty)) + (rv_vwsubu_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +;; Unsigned Widening High Subtractions +;; These are the same as the low widenings, but we first slide down the inputs. + +(rule 5 (lower (has_type (ty_vec_fits_in_register _) (isub x (uwiden_high y @ (value_type in_ty))))) + (rv_vwsubu_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_high x @ (value_type in_ty)) + (uwiden_high y)))) + (rv_vwsubu_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_high x @ (value_type in_ty)) + (splat (uextend y @ (value_type uext_ty)))))) + (if-let $true (ty_equal (lane_type in_ty) uext_ty)) + (rv_vwsubu_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +;; Signed Widening Mixed High/Low Subtractions + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_low x @ (value_type in_ty)) + (swiden_high y)))) + (rv_vwsub_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_high x @ (value_type in_ty)) + (swiden_low y)))) + (rv_vwsub_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +;; Unsigned Widening Mixed High/Low Subtractions + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_low x @ (value_type in_ty)) + (uwiden_high y)))) + (rv_vwsubu_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_high x @ (value_type in_ty)) + (uwiden_low y)))) + (rv_vwsubu_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) + + +;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (ty_int ty) (ineg val))) + (neg ty val)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (ineg x))) + (rv_vneg_v x (unmasked) ty)) + + +;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (imul x y))) + (rv_mul x y)) + +(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (imul x y))) + (rv_mulw x y)) + +;; for I128 +(rule 2 (lower (has_type $I128 (imul x y))) + (let + ((x_regs ValueRegs x) + (x_lo XReg (value_regs_get x_regs 0)) + (x_hi XReg (value_regs_get x_regs 1)) + + ;; Get the high/low registers for `y`. + (y_regs ValueRegs y) + (y_lo XReg (value_regs_get y_regs 0)) + (y_hi XReg (value_regs_get y_regs 1)) + + ;; 128bit mul formula: + ;; dst_lo = x_lo * y_lo + ;; dst_hi = mulhu(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo) + ;; + ;; We can convert the above formula into the following + ;; mulhu dst_hi, x_lo, y_lo + ;; madd dst_hi, x_lo, y_hi, dst_hi + ;; madd dst_hi, x_hi, y_lo, dst_hi + ;; madd dst_lo, x_lo, y_lo, zero + (dst_hi1 XReg (rv_mulhu x_lo y_lo)) + (dst_hi2 XReg (madd x_lo y_hi dst_hi1)) + (dst_hi XReg (madd x_hi y_lo dst_hi2)) + (dst_lo XReg (madd x_lo y_lo (zero_reg)))) + (value_regs dst_lo dst_hi))) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (imul x y))) + (rv_vmul_vv x y (unmasked) ty)) + +(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (imul (splat x) y))) + (rv_vmul_vx y x (unmasked) ty)) + +(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (imul x (splat y)))) + (rv_vmul_vx x y (unmasked) ty)) + +;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (smulhi x y))) + (lower_smlhi ty (sext x ty $I64) (sext y ty $I64))) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (smulhi x y))) + (rv_vmulh_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (smulhi (splat x) y))) + (rv_vmulh_vx y x (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (smulhi x (splat y)))) + (rv_vmulh_vx x y (unmasked) ty)) + +;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (umulhi x y))) + (lower_umlhi ty (zext x ty $I64) (zext y ty $I64))) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (umulhi x y))) + (rv_vmulhu_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (umulhi (splat x) y))) + (rv_vmulhu_vx y x (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (umulhi x (splat y)))) + (rv_vmulhu_vx x y (unmasked) ty)) + +;;;; Rules for `div` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule -1 (lower (has_type (fits_in_32 ty) (udiv x y))) + (let + ((y2 XReg (zext y ty $I64)) + (_ InstOutput (gen_div_by_zero y2))) + (rv_divuw (zext x ty $I64) y2))) + +(rule -1 (lower (has_type (fits_in_32 ty) (sdiv x y))) + (let + ((a XReg (sext x ty $I64)) + (b XReg (sext y ty $I64)) + (_ InstOutput (gen_div_overflow a b ty)) + (_ InstOutput (gen_div_by_zero b))) + (rv_divw a b))) + +(rule (lower (has_type $I64 (sdiv x y))) + (let + ((_ InstOutput (gen_div_overflow x y $I64)) + (_ InstOutput (gen_div_by_zero y)) ) + (rv_div x y))) + +(rule (lower (has_type $I64 (udiv x y))) + (let + ((_ InstOutput (gen_div_by_zero y))) + (rv_divu x y))) + +;;;; Rules for `rem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule -1 (lower (has_type (fits_in_16 ty) (urem x y))) + (let + ((y2 XReg (zext y ty $I64)) + (_ InstOutput (gen_div_by_zero y2))) + (rv_remuw (zext x ty $I64) y2))) + +(rule -1 (lower (has_type (fits_in_16 ty) (srem x y))) + (let + ((y2 XReg (sext y ty $I64)) + (_ InstOutput (gen_div_by_zero y2))) + (rv_remw (sext x ty $I64) y2))) + +(rule (lower (has_type $I32 (srem x y))) + (let + ((y2 XReg (sext y $I32 $I64)) + (_ InstOutput (gen_div_by_zero y2))) + (rv_remw x y2))) + +(rule (lower (has_type $I32 (urem x y))) + (let + ((y2 XReg (zext y $I32 $I64)) + (_ InstOutput (gen_div_by_zero y2))) + (rv_remuw x y2))) + +(rule (lower (has_type $I64 (srem x y))) + (let + ((_ InstOutput (gen_div_by_zero y))) + (rv_rem x y))) + +(rule (lower (has_type $I64 (urem x y))) + (let + ((_ InstOutput (gen_div_by_zero y))) + (rv_remu x y))) + +;;;; Rules for `and` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_int ty) (band x y))) + (gen_and ty x y)) + +;; Special cases for when one operand is an immediate that fits in 12 bits. +(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (band x (imm12_from_value y)))) + (rv_andi x y)) + +(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (band (imm12_from_value x) y))) + (rv_andi y x)) + +(rule 3 (lower (has_type (ty_scalar_float ty) (band x y))) + (lower_float_binary (AluOPRRR.And) x y ty)) + +;; Specialized lowerings for `(band x (bnot y))` which is additionally produced +;; by Cranelift's `band_not` instruction that is legalized into the simpler +;; forms early on. + +(rule 4 (lower (has_type (fits_in_64 (ty_int ty)) (band x (bnot y)))) + (if-let $true (has_zbb)) + (rv_andn x y)) + +(rule 5 (lower (has_type (fits_in_64 (ty_int ty)) (band (bnot y) x))) + (if-let $true (has_zbb)) + (rv_andn x y)) + +(rule 6 (lower (has_type $I128 (band x (bnot y)))) + (if-let $true (has_zbb)) + (let ((low XReg (rv_andn (value_regs_get x 0) (value_regs_get y 0))) + (high XReg (rv_andn (value_regs_get x 1) (value_regs_get y 1)))) + (value_regs low high))) + +(rule 7 (lower (has_type $I128 (band (bnot y) x))) + (if-let $true (has_zbb)) + (let ((low XReg (rv_andn (value_regs_get x 0) (value_regs_get y 0))) + (high XReg (rv_andn (value_regs_get x 1) (value_regs_get y 1)))) + (value_regs low high))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register ty) (band x y))) + (rv_vand_vv x y (unmasked) ty)) + +(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (band x (splat y)))) + (if (ty_vector_not_float ty)) + (rv_vand_vx x y (unmasked) ty)) + +(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (band (splat x) y))) + (if (ty_vector_not_float ty)) + (rv_vand_vx y x (unmasked) ty)) + +(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (band x (replicated_imm5 y)))) + (rv_vand_vi x y (unmasked) ty)) + +(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (band (replicated_imm5 x) y))) + (rv_vand_vi y x (unmasked) ty)) + + +;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_int ty) (bor x y))) + (gen_or ty x y)) + +;; Special cases for when one operand is an immediate that fits in 12 bits. +(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bor x (imm12_from_value y)))) + (rv_ori x y)) + +(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (bor (imm12_from_value x) y))) + (rv_ori y x)) + +(rule 3 (lower (has_type (ty_scalar_float ty) (bor x y))) + (lower_float_binary (AluOPRRR.Or) x y ty)) + +;; Specialized lowerings for `(bor x (bnot y))` which is additionally produced +;; by Cranelift's `bor_not` instruction that is legalized into the simpler +;; forms early on. + +(rule 4 (lower (has_type (fits_in_64 (ty_int ty)) (bor x (bnot y)))) + (if-let $true (has_zbb)) + (rv_orn x y)) + +(rule 5 (lower (has_type (fits_in_64 (ty_int ty)) (bor (bnot y) x))) + (if-let $true (has_zbb)) + (rv_orn x y)) + +(rule 6 (lower (has_type $I128 (bor x (bnot y)))) + (if-let $true (has_zbb)) + (let ((low XReg (rv_orn (value_regs_get x 0) (value_regs_get y 0))) + (high XReg (rv_orn (value_regs_get x 1) (value_regs_get y 1)))) + (value_regs low high))) + +(rule 7 (lower (has_type $I128 (bor (bnot y) x))) + (if-let $true (has_zbb)) + (let ((low XReg (rv_orn (value_regs_get x 0) (value_regs_get y 0))) + (high XReg (rv_orn (value_regs_get x 1) (value_regs_get y 1)))) + (value_regs low high))) + +(rule 8 (lower (has_type (ty_vec_fits_in_register ty) (bor x y))) + (rv_vor_vv x y (unmasked) ty)) + +(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (bor x (splat y)))) + (if (ty_vector_not_float ty)) + (rv_vor_vx x y (unmasked) ty)) + +(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (bor (splat x) y))) + (if (ty_vector_not_float ty)) + (rv_vor_vx y x (unmasked) ty)) + +(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (bor x (replicated_imm5 y)))) + (rv_vor_vi x y (unmasked) ty)) + +(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (bor (replicated_imm5 x) y))) + (rv_vor_vi y x (unmasked) ty)) + + +;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x y))) + (rv_xor x y)) + +;; Special cases for when one operand is an immediate that fits in 12 bits. +(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x (imm12_from_value y)))) + (rv_xori x y)) + +(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (bxor (imm12_from_value x) y))) + (rv_xori y x)) + +(rule 3 (lower (has_type $I128 (bxor x y))) + (lower_b128_binary (AluOPRRR.Xor) x y)) + +(rule 4 (lower (has_type (ty_scalar_float ty) (bxor x y))) + (lower_float_binary (AluOPRRR.Xor) x y ty)) + +(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (bxor x y))) + (rv_vxor_vv x y (unmasked) ty)) + +(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (bxor x (splat y)))) + (if (ty_vector_not_float ty)) + (rv_vxor_vx x y (unmasked) ty)) + +(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (bxor (splat x) y))) + (if (ty_vector_not_float ty)) + (rv_vxor_vx y x (unmasked) ty)) + +(rule 8 (lower (has_type (ty_vec_fits_in_register ty) (bxor x (replicated_imm5 y)))) + (rv_vxor_vi x y (unmasked) ty)) + +(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (bxor (replicated_imm5 x) y))) + (rv_vxor_vi y x (unmasked) ty)) + + +;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar ty) (bnot x))) + (gen_bnot ty x)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (bnot x))) + (rv_vnot_v x (unmasked) ty)) + +;;;; Rules for `bit_reverse` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 (ty_int ty)) (bitrev x))) + (lower_bit_reverse x ty)) + +(rule 1 (lower (has_type $I128 (bitrev x))) + (let ((val ValueRegs x) + (lo_rev XReg (lower_bit_reverse (value_regs_get val 0) $I64)) + (hi_rev XReg (lower_bit_reverse (value_regs_get val 1) $I64))) + (value_regs hi_rev lo_rev))) + +;;;; Rules for `bswap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bswap x))) + (gen_bswap ty x)) + +(rule 2 (lower (has_type $I128 (bswap x))) + (value_regs + (gen_bswap $I64 (value_regs_get x 1)) + (gen_bswap $I64 (value_regs_get x 0)))) + + +;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty) (ctz x))) + (lower_ctz ty x)) + +(rule 1 (lower (has_type $I128 (ctz x))) + (lower_ctz_128 x)) + +;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty) (clz x))) + (lower_clz ty x)) + +(rule 1 (lower (has_type $I128 (clz x))) + (lower_clz_i128 x)) + +;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty) (cls x))) + (lower_cls ty x)) + +(rule 1 (lower (has_type $I128 (cls x))) + (lower_cls_i128 x)) + +;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type out_ty (uextend val @ (value_type in_ty)))) + (extend val (ExtendOp.Zero) in_ty out_ty)) + +;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type out_ty (sextend val @ (value_type in_ty)))) + (extend val (ExtendOp.Signed) in_ty out_ty)) + +;; The instructions below are present in RV64I and sign-extend the result to 64 bits. + +(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (iadd x y))))) + (rv_addw x y)) + +(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (isub x y))))) + (rv_subw x y)) + +(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (ishl x y))))) + (rv_sllw x (value_regs_get y 0))) + +(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (ushr x y))))) + (rv_srlw x (value_regs_get y 0))) + +(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (sshr x y))))) + (rv_sraw x (value_regs_get y 0))) + + +(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (iadd x (imm12_from_value y)))))) + (rv_addiw x y)) + +(rule 3 (lower (has_type $I64 (sextend (has_type $I32 (iadd (imm12_from_value x) y))))) + (rv_addiw y x)) + +(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (ishl x (imm12_from_value y)))))) + (rv_slliw x y)) + +(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (ushr x (imm12_from_value y)))))) + (rv_srliw x y)) + +(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (sshr x (imm12_from_value y)))))) + (rv_sraiw x y)) + +;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (popcnt x))) + (lower_popcnt x ty)) + +(rule 1 (lower (has_type $I128 (popcnt x))) + (lower_popcnt_i128 x)) + +;; Popcount using multiply. +;; This is popcount64c() from +;; http://en.wikipedia.org/wiki/Hamming_weight +;; +;; Here's the C version for 32 bits: +;; x = x - ((x>> 1) & 0x55555555); +;; x = (x & 0x33333333) + ((x >> 2) & 0x33333333); +;; x = ((x + (x >> 4)) & 0x0F0F0F0F); +;; return (x * 0x01010101) >> 24; // Here 24 is the type width - 8. +;; +;; TODO: LLVM generates a much better implementation for I8X16. See: https://godbolt.org/z/qr6vf9Gr3 +;; For the other types it seems to be largely the same. +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (popcnt x))) + (if-let one (u64_to_uimm5 1)) + (if-let two (u64_to_uimm5 2)) + (if-let four (u64_to_uimm5 4)) + + (let (;; x = x - ((x >> 1) & 0x55555555); + (mask_55 XReg (imm (lane_type ty) (u64_and 0x5555555555555555 (ty_mask (lane_type ty))))) + (count2_shr VReg (rv_vsrl_vi x one (unmasked) ty)) + (count2_and VReg (rv_vand_vx count2_shr mask_55 (unmasked) ty)) + (count2 VReg (rv_vsub_vv x count2_and (unmasked) ty)) + + ;; x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + (mask_33 XReg (imm (lane_type ty) (u64_and 0x3333333333333333 (ty_mask (lane_type ty))))) + (count4_shr VReg (rv_vsrl_vi count2 two (unmasked) ty)) + (count4_and VReg (rv_vand_vx count4_shr mask_33 (unmasked) ty)) + (count4_lhs VReg (rv_vand_vx count2 mask_33 (unmasked) ty)) + (count4 VReg (rv_vadd_vv count4_lhs count4_and (unmasked) ty)) + + ;; x = (x + (x >> 4)) & 0x0F0F0F0F; + (mask_0f XReg (imm (lane_type ty) (u64_and 0x0f0f0f0f0f0f0f0f (ty_mask (lane_type ty))))) + (count8_shr VReg (rv_vsrl_vi count4 four (unmasked) ty)) + (count8_add VReg (rv_vadd_vv count4 count8_shr (unmasked) ty)) + (count8 VReg (rv_vand_vx count8_add mask_0f (unmasked) ty)) + + ;; (x * 0x01010101) >> ( - 8) + (mask_01 XReg (imm (lane_type ty) (u64_and 0x0101010101010101 (ty_mask (lane_type ty))))) + (mul VReg (rv_vmul_vx count8 mask_01 (unmasked) ty)) + (shift XReg (imm $I64 (u64_sub (ty_bits (lane_type ty)) 8))) + (res VReg (rv_vsrl_vx mul shift (unmasked) ty))) + res)) + +;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; 8/16 bit types need a mask on the shift amount +(rule 0 (lower (has_type (ty_int (ty_8_or_16 ty)) (ishl x y))) + (if-let mask (u64_to_imm12 (shift_mask ty))) + (rv_sllw x (rv_andi (value_regs_get y 0) mask))) + +;; Using the 32bit version of `sll` automatically masks the shift amount. +(rule 1 (lower (has_type $I32 (ishl x y))) + (rv_sllw x (value_regs_get y 0))) + +;; Similarly, the 64bit version does the right thing. +(rule 1 (lower (has_type $I64 (ishl x y))) + (rv_sll x (value_regs_get y 0))) + +;; If the shift amount is known. We can mask it and encode it in the instruction. +(rule 2 (lower (has_type (int_fits_in_32 ty) (ishl x (maybe_uextend (imm12_from_value y))))) + (rv_slliw x (imm12_and y (shift_mask ty)))) + +;; We technically don't need to mask the shift amount here. The instruction +;; does the right thing. But it's neater when pretty printing it. +(rule 3 (lower (has_type ty @ $I64 (ishl x (maybe_uextend (imm12_from_value y))))) + (rv_slli x (imm12_and y (shift_mask ty)))) + +;; With `Zba` we have a shift that zero extends the LHS argument. +(rule 4 (lower (has_type $I64 (ishl (uextend x @ (value_type $I32)) (maybe_uextend (imm12_from_value y))))) + (if-let $true (has_zba)) + (rv_slliuw x y)) + +;; I128 cases +(rule 4 (lower (has_type $I128 (ishl x y))) + (let ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) + (shamt XReg (value_regs_get tmp 0)) + (len_sub_shamt XReg (value_regs_get tmp 1)) + ;; + (low XReg (rv_sll (value_regs_get x 0) shamt)) + ;; high part. + (high_part1 XReg (rv_srl (value_regs_get x 0) len_sub_shamt)) + (high_part2 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) high_part1)) + ;; + (high_part3 XReg (rv_sll (value_regs_get x 1) shamt)) + (high XReg (rv_or high_part2 high_part3)) + ;; + (const64 XReg (load_u64_constant 64)) + (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127)))) + (value_regs + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 (zero_reg) low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high)))) + +;; SIMD Cases +;; We don't need to mask anything since it is done by the instruction according to SEW. + +(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (ishl x y))) + (rv_vsll_vx x (value_regs_get y 0) (unmasked) ty)) + +(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (ishl x (maybe_uextend (uimm5_from_value y))))) + (rv_vsll_vi x y (unmasked) ty)) + +;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; 8/16 bit types need a mask on the shift amount, and the LHS needs to be +;; zero extended. +(rule 0 (lower (has_type (ty_int (fits_in_16 ty)) (ushr x y))) + (if-let mask (u64_to_imm12 (shift_mask ty))) + (rv_srlw (zext x ty $I64) (rv_andi (value_regs_get y 0) mask))) + +;; Using the 32bit version of `srl` automatically masks the shift amount. +(rule 1 (lower (has_type $I32 (ushr x y))) + (rv_srlw x (value_regs_get y 0))) + +;; Similarly, the 64bit version does the right thing. +(rule 1 (lower (has_type $I64 (ushr x y))) + (rv_srl x (value_regs_get y 0))) + +;; When the RHS is known we can just encode it in the instruction. +(rule 2 (lower (has_type (ty_int (fits_in_16 ty)) (ushr x (maybe_uextend (imm12_from_value y))))) + (rv_srliw (zext x ty $I64) (imm12_and y (shift_mask ty)))) + +(rule 3 (lower (has_type $I32 (ushr x (maybe_uextend (imm12_from_value y))))) + (rv_srliw x y)) + +(rule 3 (lower (has_type $I64 (ushr x (maybe_uextend (imm12_from_value y))))) + (rv_srli x y)) + +(rule 3 (lower (has_type $I128 (ushr x y))) + (let ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) + (shamt XReg (value_regs_get tmp 0)) + (len_sub_shamt XReg (value_regs_get tmp 1)) + ;; low part. + (low_part1 XReg (rv_sll (value_regs_get x 1) len_sub_shamt)) + (low_part2 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part1)) + ;; + (low_part3 XReg (rv_srl (value_regs_get x 0) shamt)) + (low XReg (rv_or low_part2 low_part3)) + ;; + (const64 XReg (load_u64_constant 64)) + ;; + (high XReg (rv_srl (value_regs_get x 1) shamt)) + (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127)))) + (value_regs + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 (zero_reg) high)))) + +;; SIMD Cases +;; We don't need to mask or extend anything since it is done by the instruction according to SEW. + +(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (ushr x y))) + (rv_vsrl_vx x (value_regs_get y 0) (unmasked) ty)) + +(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (ushr x (maybe_uextend (uimm5_from_value y))))) + (rv_vsrl_vi x y (unmasked) ty)) + +;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; 8/16 bit types need a mask on the shift amount, and the LHS needs to be +;; zero extended. +(rule 0 (lower (has_type (ty_int (fits_in_16 ty)) (sshr x y))) + (if-let mask (u64_to_imm12 (shift_mask ty))) + (rv_sraw (sext x ty $I64) (rv_andi (value_regs_get y 0) mask))) + +;; Using the 32bit version of `sra` automatically masks the shift amount. +(rule 1 (lower (has_type $I32 (sshr x y))) + (rv_sraw x (value_regs_get y 0))) + +;; Similarly, the 64bit version does the right thing. +(rule 1 (lower (has_type $I64 (sshr x y))) + (rv_sra x (value_regs_get y 0))) + +;; When the RHS is known we can just encode it in the instruction. +(rule 2 (lower (has_type (ty_int (fits_in_16 ty)) (sshr x (maybe_uextend (imm12_from_value y))))) + (rv_sraiw (sext x ty $I64) (imm12_and y (shift_mask ty)))) + +(rule 3 (lower (has_type $I32 (sshr x (maybe_uextend (imm12_from_value y))))) + (rv_sraiw x y)) + +(rule 3 (lower (has_type $I64 (sshr x (maybe_uextend (imm12_from_value y))))) + (rv_srai x y)) + +(rule 3 (lower (has_type $I128 (sshr x y))) + (let ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) + (shamt XReg (value_regs_get tmp 0)) + (len_sub_shamt XReg (value_regs_get tmp 1)) + ;; low part. + (low_part1 XReg (rv_sll (value_regs_get x 1) len_sub_shamt)) + (low_part2 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part1)) + ;; + (low_part3 XReg (rv_srl (value_regs_get x 0) shamt)) + (low XReg (rv_or low_part2 low_part3)) + ;; + (const64 XReg (load_u64_constant 64)) + ;; + (high XReg (rv_sra (value_regs_get x 1) shamt)) + ;; + (const_neg_1 XReg (load_imm12 -1)) + ;; + (high_replacement XReg (gen_select_reg (IntCC.SignedLessThan) (value_regs_get x 1) (zero_reg) const_neg_1 (zero_reg))) + (const64 XReg (load_u64_constant 64)) + (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127)))) + (value_regs + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high_replacement high)))) + +;; SIMD Cases +;; We don't need to mask or extend anything since it is done by the instruction according to SEW. + +(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (sshr x y))) + (rv_vsra_vx x (value_regs_get y 0) (unmasked) ty)) + +(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (sshr x (maybe_uextend (uimm5_from_value y))))) + (rv_vsra_vi x y (unmasked) ty)) + + +;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty) (rotl x y))) + (lower_rotl ty (zext x ty $I64) (value_regs_get y 0))) + +(rule 1 (lower (has_type $I128 (rotl x y))) + (lower_i128_rotl x y)) + +;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty) (rotr x y))) + (lower_rotr ty (zext x ty $I64) (value_regs_get y 0))) + +(rule 1 (lower (has_type $I128 (rotr x y))) + (lower_i128_rotr x y)) + + +;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar_float ty) (fabs x))) + (rv_fabs ty x)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fabs x))) + (rv_vfabs_v x (unmasked) ty)) + +;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar_float ty) (fneg x))) + (rv_fneg ty x)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fneg x))) + (rv_vfneg_v x (unmasked) ty)) + +;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar_float ty) (fcopysign x y))) + (rv_fsgnj ty x y)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fcopysign x y))) + (rv_vfsgnj_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fcopysign x (splat y)))) + (rv_vfsgnj_vf x y (unmasked) ty)) + +;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar_float ty) (fma x y z))) + (rv_fmadd ty x y z)) + +;; (fma x y z) computes x * y + z +;; vfmacc computes vd[i] = +(vs1[i] * vs2[i]) + vd[i] +;; We need to reverse the order of the arguments + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fma x y z))) + (rv_vfmacc_vv z y x (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fma (splat x) y z))) + (rv_vfmacc_vf z y x (unmasked) ty)) + +;; vfmsac computes vd[i] = +(vs1[i] * vs2[i]) - vd[i] + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fma x y (fneg z)))) + (rv_vfmsac_vv z y x (unmasked) ty)) + +(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (fma (splat x) y (fneg z)))) + (rv_vfmsac_vf z y x (unmasked) ty)) + +;; vfnmacc computes vd[i] = -(vs1[i] * vs2[i]) - vd[i] + +(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg x) y (fneg z)))) + (rv_vfnmacc_vv z y x (unmasked) ty)) + +(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg (splat x)) y (fneg z)))) + (rv_vfnmacc_vf z y x (unmasked) ty)) + +;; vfnmsac computes vd[i] = -(vs1[i] * vs2[i]) + vd[i] + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg x) y z))) + (rv_vfnmsac_vv z y x (unmasked) ty)) + +(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg (splat x)) y z))) + (rv_vfnmsac_vf z y x (unmasked) ty)) + + +;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar_float ty) (sqrt x))) + (rv_fsqrt ty x)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (sqrt x))) + (rv_vfsqrt_v x (unmasked) ty)) + +;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule -1 + ;; + (lower + (has_type (valid_atomic_transaction ty) (atomic_rmw flags op addr x))) + (gen_atomic (get_atomic_rmw_op ty op) addr x (atomic_amo))) + +;;; for I8 and I16 +(rule 1 + (lower + (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags op addr x))) + (gen_atomic_rmw_loop op ty addr x)) + +;;;special for I8 and I16 max min etc. +;;;because I need uextend or sextend the value. +(rule 2 + (lower + (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $true) addr x))) + (gen_atomic_rmw_loop op ty addr (sext x ty $I64))) + + +(rule 2 + ;; + (lower + (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $false) addr x))) + ;; + (gen_atomic_rmw_loop op ty addr (zext x ty $I64))) + +;;;;; Rules for `AtomicRmwOp.Sub` +(rule + (lower + (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Sub) addr x))) + (let + ((tmp WritableReg (temp_writable_reg ty)) + (x2 Reg (rv_neg x))) + (gen_atomic (get_atomic_rmw_op ty (AtomicRmwOp.Add)) addr x2 (atomic_amo)))) + +(decl gen_atomic_rmw_loop (AtomicRmwOp Type XReg XReg) XReg) +(rule + (gen_atomic_rmw_loop op ty addr x) + (let + ((dst WritableXReg (temp_writable_xreg)) + (t0 WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.AtomicRmwLoop (gen_atomic_offset addr ty) op dst ty (gen_atomic_p addr ty) x t0)))) + (writable_reg_to_reg dst))) + +;;;;; Rules for `AtomicRmwOp.Nand` +(rule + (lower + (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Nand) addr x))) + (gen_atomic_rmw_loop (AtomicRmwOp.Nand) ty addr x)) + +(decl is_atomic_rmw_max_etc (AtomicRmwOp bool) AtomicRmwOp) +(extern extractor is_atomic_rmw_max_etc is_atomic_rmw_max_etc) + +;;;;; Rules for `atomic load`;;;;;;;;;;;;;;;;; +(rule + (lower (has_type (valid_atomic_transaction ty) (atomic_load flags p))) + (gen_atomic_load p ty)) + + +;;;;; Rules for `atomic store`;;;;;;;;;;;;;;;;; +(rule + (lower (atomic_store flags src @ (value_type (valid_atomic_transaction ty)) p)) + (gen_atomic_store p ty src)) + +(decl gen_atomic_offset (XReg Type) XReg) +(rule 1 (gen_atomic_offset p (fits_in_16 ty)) + (rv_slli (rv_andi p (imm12_const 3)) (imm12_const 3))) + +(rule (gen_atomic_offset p _) + (zero_reg)) + +(decl gen_atomic_p (XReg Type) XReg) +(rule 1 (gen_atomic_p p (fits_in_16 ty)) + (rv_andi p (imm12_const -4))) + +(rule (gen_atomic_p p _) + p) + + +;;;;; Rules for `atomic cas`;;;;;;;;;;;;;;;;; +(rule + (lower (has_type (valid_atomic_transaction ty) (atomic_cas flags p e x))) + (let + ((t0 WritableReg (temp_writable_reg ty)) + (dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.AtomicCas (gen_atomic_offset p ty) t0 dst (zext e ty $I64) (gen_atomic_p p ty) x ty)))) + (writable_reg_to_reg dst))) + +;;;;; Rules for `ireduce`;;;;;;;;;;;;;;;;; +(rule + (lower (has_type ty (ireduce x))) + (value_regs_get x 0)) + +;;;;; Rules for `fpromote`;;;;;;;;;;;;;;;;; +(rule (lower (fpromote x)) + (rv_fcvtds x)) + +;;;;; Rules for `fdemote`;;;;;;;;;;;;;;;;;; +(rule (lower (fdemote x)) + (rv_fcvtsd x)) + + +;;;;; Rules for for float arithmetic + + +;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_scalar_float ty) (fadd x y))) + (rv_fadd ty x y)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fadd x y))) + (rv_vfadd_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fadd x (splat y)))) + (rv_vfadd_vf x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fadd (splat x) y))) + (rv_vfadd_vf y x (unmasked) ty)) + + +;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar_float ty) (fsub x y))) + (rv_fsub ty x y)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fsub x y))) + (rv_vfsub_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fsub x (splat y)))) + (rv_vfsub_vf x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fsub (splat x) y))) + (rv_vfrsub_vf y x (unmasked) ty)) + +;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar_float ty) (fmul x y))) + (rv_fmul ty x y)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmul x y))) + (rv_vfmul_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fmul x (splat y)))) + (rv_vfmul_vf x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fmul (splat x) y))) + (rv_vfmul_vf y x (unmasked) ty)) + + +;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_scalar_float ty) (fdiv x y))) + (rv_fdiv ty x y)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fdiv x y))) + (rv_vfdiv_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fdiv x (splat y)))) + (rv_vfdiv_vf x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fdiv (splat x) y))) + (rv_vfrdiv_vf y x (unmasked) ty)) + +;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_scalar_float ty) (fmin x y))) + (gen_float_select (FloatSelectOP.Min) x y ty)) + +;; vfmin does almost the right thing, but it does not handle NaN's correctly. +;; We should return a NaN if any of the inputs is a NaN, but vfmin returns the +;; number input instead. +;; +;; TODO: We can improve this by using a masked `fmin` instruction that modifies +;; the canonical nan register. That way we could avoid the `vmerge.vv` instruction. +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmin x y))) + (let ((is_not_nan VReg (gen_fcmp_mask ty (FloatCC.Ordered) x y)) + (nan XReg (imm $I64 (canonical_nan_u64 (lane_type ty)))) + (vec_nan VReg (rv_vmv_vx nan ty)) + (min VReg (rv_vfmin_vv x y (unmasked) ty))) + (rv_vmerge_vvm vec_nan min is_not_nan ty))) + +;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_scalar_float ty) (fmax x y))) + (gen_float_select (FloatSelectOP.Max) x y ty)) + +;; vfmax does almost the right thing, but it does not handle NaN's correctly. +;; We should return a NaN if any of the inputs is a NaN, but vfmax returns the +;; number input instead. +;; +;; TODO: We can improve this by using a masked `fmax` instruction that modifies +;; the canonical nan register. That way we could avoid the `vmerge.vv` instruction. +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmax x y))) + (let ((is_not_nan VReg (gen_fcmp_mask ty (FloatCC.Ordered) x y)) + (nan XReg (imm $I64 (canonical_nan_u64 (lane_type ty)))) + (vec_nan VReg (rv_vmv_vx nan ty)) + (max VReg (rv_vfmax_vv x y (unmasked) ty))) + (rv_vmerge_vvm vec_nan max is_not_nan ty))) + +;;;;; Rules for `stack_addr`;;;;;;;;; +(rule + (lower (stack_addr ss offset)) + (gen_stack_addr ss offset)) + +;;;;; Rules for `is_null`;;;;;;;;; + +;; Null references are represented by the constant value `0`. +(rule (lower (is_null v)) + (rv_seqz v)) + +;;;;; Rules for `is_invalid`;;;;;;;;; + +;; Invalid references are represented by the constant value `-1`. +(rule (lower (is_invalid v)) + (rv_seqz (rv_addi v (imm12_const 1)))) + +;;;;; Rules for `select`;;;;;;;;; +(rule + (lower (has_type ty (select c @ (value_type cty) x y))) + (gen_select ty (truthy_to_reg cty (normalize_cmp_value cty c (ExtendOp.Zero))) x y)) + +(rule 1 + (lower (has_type (fits_in_64 ty) (select (icmp cc a b @ (value_type (fits_in_64 in_ty))) x y))) + (let ((a XReg (truthy_to_reg in_ty (normalize_cmp_value in_ty a (intcc_to_extend_op cc)))) + (b XReg (truthy_to_reg in_ty (normalize_cmp_value in_ty b (intcc_to_extend_op cc))))) + (gen_select_reg cc a b x y))) + +;;;;; Rules for `bitselect`;;;;;;;;; + +;; Do a (c & x) | (~c & y) operation. +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (bitselect c x y))) + (let ((tmp_x XReg (rv_and c x)) + (c_inverse XReg (rv_not c)) + (tmp_y XReg (rv_and c_inverse y))) + (rv_or tmp_x tmp_y))) + +;; For vectors, we also do the same operation. +;; We can technically use any type in the bitwise operations, but prefer +;; using the type of the inputs so that we avoid emitting unnecessary +;; `vsetvl` instructions. It's likeley that the vector unit is already +;; configured for that type. +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (bitselect c x y))) + (let ((tmp_x VReg (rv_vand_vv c x (unmasked) ty)) + (c_inverse VReg (rv_vnot_v c (unmasked) ty)) + (tmp_y VReg (rv_vand_vv c_inverse y (unmasked) ty))) + (rv_vor_vv tmp_x tmp_y (unmasked) ty))) + +;; Special case for bitselects with cmp's as an input. +;; +;; This allows us to skip the mask expansion step and use the more efficient +;; vmerge.vvm instruction. +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (bitselect (icmp cc a @ (value_type (ty_vec_fits_in_register cmp_ty)) b) x y))) + (let ((mask VReg (gen_icmp_mask cmp_ty cc a b))) + (rv_vmerge_vvm y x mask ty))) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (bitselect (fcmp cc a @ (value_type (ty_vec_fits_in_register cmp_ty)) b) x y))) + (let ((mask VReg (gen_fcmp_mask cmp_ty cc a b))) + (rv_vmerge_vvm y x mask ty))) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (bitselect (bitcast _ (fcmp cc a @ (value_type (ty_vec_fits_in_register cmp_ty)) b)) x y))) + (let ((mask VReg (gen_fcmp_mask cmp_ty cc a b))) + (rv_vmerge_vvm y x mask ty))) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (bitselect (bitcast _ (icmp cc a @ (value_type (ty_vec_fits_in_register cmp_ty)) b)) x y))) + (let ((mask VReg (gen_icmp_mask cmp_ty cc a b))) + (rv_vmerge_vvm y x mask ty))) + + +;;;;; Rules for `isplit`;;;;;;;;; +(rule + (lower (isplit x)) + (let + ((t1 XReg (value_regs_get x 0)) + (t2 XReg (value_regs_get x 1))) + (output_pair t1 t2))) + +;;;;; Rules for `iconcat`;;;;;;;;; +(rule + (lower (has_type $I128 (iconcat x y))) + (let + ((t1 XReg x) + (t2 XReg y)) + (value_regs t1 t2))) + + +;;;;; Rules for `smax`;;;;;;;;; + +(rule 0 (lower (has_type (ty_int ty) (smax x y))) + (gen_int_select ty (IntSelectOP.Smax) (ext_int_if_need $true x ty) (ext_int_if_need $true y ty))) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (smax x y))) + (rv_vmax_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (smax x (splat y)))) + (rv_vmax_vx x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (smax (splat x) y))) + (rv_vmax_vx y x (unmasked) ty)) + +;;;;; Rules for `smin`;;;;;;;;; + +(rule 0 (lower (has_type (ty_int ty) (smin x y))) + (gen_int_select ty (IntSelectOP.Smin) (ext_int_if_need $true x ty) (ext_int_if_need $true y ty))) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (smin x y))) + (rv_vmin_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (smin x (splat y)))) + (rv_vmin_vx x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (smin (splat x) y))) + (rv_vmin_vx y x (unmasked) ty)) + +;;;;; Rules for `umax`;;;;;;;;; + +(rule 0 (lower (has_type (ty_int ty) (umax x y))) + (gen_int_select ty (IntSelectOP.Umax) (ext_int_if_need $false x ty) (ext_int_if_need $false y ty))) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (umax x y))) + (rv_vmaxu_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (umax x (splat y)))) + (rv_vmaxu_vx x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (umax (splat x) y))) + (rv_vmaxu_vx y x (unmasked) ty)) + +;;;;; Rules for `umin`;;;;;;;;; + +(rule 0 (lower (has_type (ty_int ty) (umin x y))) + (gen_int_select ty (IntSelectOP.Umin) (ext_int_if_need $false x ty) (ext_int_if_need $false y ty))) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (umin x y))) + (rv_vminu_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (umin x (splat y)))) + (rv_vminu_vx x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (umin (splat x) y))) + (rv_vminu_vx y x (unmasked) ty)) + + +;;;;; Rules for `debugtrap`;;;;;;;;; +(rule + (lower (debugtrap)) + (side_effect (SideEffectNoResult.Inst (MInst.EBreak)))) + +;;;;; Rules for `fence`;;;;;;;;; +(rule + (lower (fence)) + (side_effect (SideEffectNoResult.Inst (MInst.Fence 15 15)))) + +;;;;; Rules for `trap`;;;;;;;;; +(rule + (lower (trap code)) + (udf code)) + +;;;;; Rules for `resumable_trap`;;;;;;;;; +(rule + (lower (resumable_trap code)) + (udf code)) + +;;;;; Rules for `uload8`;;;;;;;;; +(rule + (lower (uload8 flags p @ (value_type (ty_addr64 _)) offset)) + (gen_load p offset (int_load_op $false 8) flags $I64)) +;;;;; Rules for `sload8`;;;;;;;;; +(rule + (lower (sload8 flags p @ (value_type (ty_addr64 _)) offset)) + (gen_load p offset (int_load_op $true 8) flags $I64)) +;;;;; Rules for `uload16`;;;;;;;;; +(rule + (lower (uload16 flags p @ (value_type (ty_addr64 _)) offset)) + (gen_load p offset (int_load_op $false 16) flags $I64)) + +;;;;; Rules for `iload16`;;;;;;;;; +(rule + (lower (sload16 flags p @ (value_type (ty_addr64 _)) offset)) + (gen_load p offset (int_load_op $true 16) flags $I64)) + +;;;;; Rules for `uload32`;;;;;;;;; +(rule + (lower (uload32 flags p @ (value_type (ty_addr64 _)) offset)) + (gen_load p offset (int_load_op $false 32) flags $I64)) + +;;;;; Rules for `iload16`;;;;;;;;; +(rule + (lower (sload32 flags p @ (value_type (ty_addr64 _)) offset)) + (gen_load p offset (int_load_op $true 32) flags $I64)) + +(rule + (lower (has_type ty (load flags p @ (value_type (ty_addr64 _)) offset))) + (gen_load p offset (load_op ty) flags ty) +) +;;;; for I128 +(rule 1 + (lower (has_type $I128 (load flags p @ (value_type (ty_addr64 _)) offset))) + (gen_load_128 p offset flags)) + +(rule 2 + (lower (has_type (ty_vec_fits_in_register ty) (load flags p @ (value_type (ty_addr64 _)) offset))) + (let ((eew VecElementWidth (element_width_from_type ty))) + (vec_load eew (VecAMode.UnitStride (gen_amode p offset $I64)) flags (unmasked) ty))) + +;;;;; Rules for Load + Extend Combos ;;;;;;;;; + +;; These rules cover the special loads that load a 64bit value and do some sort of extension. +;; We don't have any special instructions to do this, so just load the 64 bits as a vector, and +;; do a SEW/2 extension. This only reads half width elements from the source vector register +;; extends it, and writes the back the full register. + +(decl gen_load64_extend (Type ExtendOp MemFlags XReg Offset32) VReg) + +(rule (gen_load64_extend ty (ExtendOp.Signed) flags addr offset) + (let ((eew VecElementWidth (element_width_from_type $I64)) + (load_state VState (vstate_from_type $I64)) + (loaded VReg (vec_load eew (VecAMode.UnitStride (gen_amode addr offset $I64)) flags (unmasked) load_state))) + (rv_vsext_vf2 loaded (unmasked) ty))) + +(rule (gen_load64_extend ty (ExtendOp.Zero) flags addr offset) + (let ((eew VecElementWidth (element_width_from_type $I64)) + (load_state VState (vstate_from_type $I64)) + (loaded VReg (vec_load eew (VecAMode.UnitStride (gen_amode addr offset $I64)) flags (unmasked) load_state))) + (rv_vzext_vf2 loaded (unmasked) ty))) + +;;;;; Rules for `uload8x8`;;;;;;;;;; +(rule (lower (has_type (ty_vec_fits_in_register ty @ $I16X8) (uload8x8 flags addr @ (value_type (ty_addr64 _)) offset))) + (gen_load64_extend ty (ExtendOp.Zero) flags addr offset)) + +;;;;; Rules for `uload16x4`;;;;;;;;; +(rule (lower (has_type (ty_vec_fits_in_register ty @ $I32X4) (uload16x4 flags addr @ (value_type (ty_addr64 _)) offset))) + (gen_load64_extend ty (ExtendOp.Zero) flags addr offset)) + +;;;;; Rules for `uload32x2`;;;;;;;;; +(rule (lower (has_type (ty_vec_fits_in_register ty @ $I64X2) (uload32x2 flags addr @ (value_type (ty_addr64 _)) offset))) + (gen_load64_extend ty (ExtendOp.Zero) flags addr offset)) + +;;;;; Rules for `sload8x8`;;;;;;;;;; +(rule (lower (has_type (ty_vec_fits_in_register ty @ $I16X8) (sload8x8 flags addr @ (value_type (ty_addr64 _)) offset))) + (gen_load64_extend ty (ExtendOp.Signed) flags addr offset)) + +;;;;; Rules for `sload16x4`;;;;;;;;; +(rule (lower (has_type (ty_vec_fits_in_register ty @ $I32X4) (sload16x4 flags addr @ (value_type (ty_addr64 _)) offset))) + (gen_load64_extend ty (ExtendOp.Signed) flags addr offset)) + +;;;;; Rules for `sload32x2`;;;;;;;;; +(rule (lower (has_type (ty_vec_fits_in_register ty @ $I64X2) (sload32x2 flags addr @ (value_type (ty_addr64 _)) offset))) + (gen_load64_extend ty (ExtendOp.Signed) flags addr offset)) + +;;;;; Rules for `istore8`;;;;;;;;; +(rule + (lower (istore8 flags x p @ (value_type (ty_addr64 _)) offset)) + (gen_store p offset (StoreOP.Sb) flags x)) +;;;;; Rules for `istore16`;;;;;;;;; +(rule + (lower (istore16 flags x p @ (value_type (ty_addr64 _)) offset)) + (gen_store p offset (StoreOP.Sh) flags x)) + +;;;;; Rules for `istore32`;;;;;;;;; +(rule + (lower (istore32 flags x p @ (value_type (ty_addr64 _)) offset)) + (gen_store p offset (StoreOP.Sw) flags x)) + +;;;;; Rules for `store`;;;;;;;;; +(rule + (lower (store flags x @ (value_type ty) p @ (value_type (ty_addr64 _)) offset)) + (gen_store p offset (store_op ty) flags x)) + +;;; special for I128 +(rule 1 + (lower (store flags x @ (value_type $I128 ) p @ (value_type (ty_addr64 _)) offset)) + (gen_store_128 p offset flags x)) + +(rule 2 + (lower (store flags x @ (value_type (ty_vec_fits_in_register ty)) p @ (value_type (ty_addr64 _)) offset)) + (let ((eew VecElementWidth (element_width_from_type ty))) + (vec_store eew (VecAMode.UnitStride (gen_amode p offset $I64)) x flags (unmasked) ty))) + +(decl gen_icmp (IntCC ValueRegs ValueRegs Type) XReg) +(rule + (gen_icmp cc x y ty) + (let + ((result WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Icmp cc result x y ty)))) + result)) + +;;;;; Rules for `icmp`;;;;;;;;; +(rule 0 (lower (icmp cc x @ (value_type (ty_int ty)) y)) + (lower_icmp cc x y ty)) + +(rule 1 (lower (icmp cc x @ (value_type (ty_vec_fits_in_register ty)) y)) + (gen_expand_mask ty (gen_icmp_mask ty cc x y))) + + +;;;;; Rules for `fcmp`;;;;;;;;; +(rule 0 (lower (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) + (cmp_value (emit_fcmp cc ty x y))) + +(rule 1 (lower (fcmp cc x @ (value_type (ty_vec_fits_in_register ty)) y)) + (gen_expand_mask ty (gen_fcmp_mask ty cc x y))) + +;;;;; Rules for `func_addr`;;;;;;;;; +(rule + (lower (func_addr (func_ref_data _ name _))) + (load_ext_name name 0)) + +;;;;; Rules for `fcvt_to_uint`;;;;;;;;; +(rule + (lower (has_type to (fcvt_to_uint v @ (value_type from)))) + (gen_fcvt_int $false v $false from to)) + +;;;;; Rules for `fcvt_to_sint`;;;;;;;;; +(rule + (lower (has_type to (fcvt_to_sint v @ (value_type from)))) + (gen_fcvt_int $false v $true from to)) + +;;;;; Rules for `fcvt_to_sint_sat`;;;;;;;;; +(rule + (lower (has_type to (fcvt_to_sint_sat v @ (value_type from)))) + (gen_fcvt_int $true v $true from to)) + +;;;;; Rules for `fcvt_to_uint_sat`;;;;;;;;; +(rule + (lower (has_type to (fcvt_to_uint_sat v @ (value_type from)))) + (gen_fcvt_int $true v $false from to)) + +;;;;; Rules for `fcvt_from_sint`;;;;;;;;; +(rule + (lower (has_type to (fcvt_from_sint v @ (value_type from_ty)))) + (let ((float_op FpuOPRR (int_convert_2_float_op from_ty $true to)) + (value XReg (normalize_fcvt_from_int v from_ty (ExtendOp.Signed)))) + (fpu_rr float_op to value))) + +;;;;; Rules for `fcvt_from_uint`;;;;;;;;; +(rule + (lower (has_type to (fcvt_from_uint v @ (value_type from_ty)))) + (let ((float_op FpuOPRR (int_convert_2_float_op from_ty $false to)) + (value XReg (normalize_fcvt_from_int v from_ty (ExtendOp.Zero)))) + (fpu_rr float_op to value))) + +;;;;; Rules for `symbol_value`;;;;;;;;; +(rule + (lower (symbol_value (symbol_value_data name _ offset))) + (load_ext_name name offset) +) +;;;;; Rules for `bitcast`;;;;;;;;; +(rule + (lower (has_type out_ty (bitcast _ v @ (value_type in_ty)))) + (gen_bitcast v in_ty out_ty)) + +;;;;; Rules for `ceil`;;;;;;;;; +(rule + (lower (has_type ty (ceil x))) + (gen_float_round (FloatRoundOP.Ceil) x ty) +) + +;;;;; Rules for `floor`;;;;;;;;; +(rule + (lower (has_type ty (floor x))) + (gen_float_round (FloatRoundOP.Floor) x ty)) +;;;;; Rules for `trunc`;;;;;;;;; +(rule + (lower (has_type ty (trunc x))) + (gen_float_round (FloatRoundOP.Trunc) x ty)) + +;;;;; Rules for `nearest`;;;;;;;;; +(rule + (lower (has_type ty (nearest x))) + (gen_float_round (FloatRoundOP.Nearest) x ty)) + + +;;;;; Rules for `select_spectre_guard`;;;;;;;;; + +;; SelectSpectreGuard is equivalent to Select, but we should not use a branch based +;; lowering for it. Instead we use a conditional move based lowering. +;; +;; We don't have cmov's in RISC-V either, but we can emulate those using bitwise +;; operations, which is what we do below. +(rule (lower (has_type ty (select_spectre_guard cmp @ (value_type cmp_ty) x @ (value_type arg_ty) y))) + (let (;; Build a mask that is 0 or -1 depending on the input comparision value. + ;; `lower_bmask` handles normalizing the input. + (mask ValueRegs (lower_bmask arg_ty cmp_ty cmp)) + ;; Using the mask above we can select either `x` or `y` by + ;; performing a bitwise `and` on both sides and then merging them + ;; together. We know that only the bits of one of the sides will be selected. + ;; TODO: We can use `andn` here if we have `Zbb` + (lhs ValueRegs (gen_and arg_ty x mask)) + (rhs ValueRegs (gen_and arg_ty y (gen_bnot arg_ty mask)))) + (gen_or arg_ty lhs rhs))) + +;;;;; Rules for `bmask`;;;;;;;;; +(rule + (lower (has_type oty (bmask x @ (value_type ity)))) + (lower_bmask oty ity x)) + +;; N.B.: the Ret itself is generated by the ABI. +(rule (lower (return args)) + (lower_return args)) + +;;; Rules for `get_{frame,stack}_pointer` and `get_return_address` ;;;;;;;;;;;;; + +(rule (lower (get_frame_pointer)) + (gen_mov_from_preg (fp_reg))) + +(rule (lower (get_stack_pointer)) + (gen_mov_from_preg (sp_reg))) + +(rule (lower (get_return_address)) + (load_ra)) + +;;; Rules for `iabs` ;;;;;;;;;;;;; + +;; I64 and lower +;; Generate the following code: +;; sext.{b,h,w} a0, a0 +;; neg a1, a0 +;; max a0, a0, a1 +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (iabs x))) + (let ((extended XReg (sext x ty $I64)) + (negated XReg (rv_neg extended))) + (max $I64 extended negated))) + +;; For vectors we generate the same code, but with vector instructions +;; we can skip the sign extension, since the vector unit will only process +;; Element Sized chunks. +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (iabs x))) + (let ((negated VReg (rv_vneg_v x (unmasked) ty))) + (rv_vmax_vv x negated (unmasked) ty))) + +;;;; Rules for calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (call (func_ref_data sig_ref extname dist) inputs)) + (gen_call sig_ref extname dist inputs)) + +(rule (lower (call_indirect sig_ref val inputs)) + (gen_call_indirect sig_ref val inputs)) + +;;;; Rules for `return_call` and `return_call_indirect` ;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (return_call (func_ref_data sig_ref extname dist) args)) + (gen_return_call sig_ref extname dist args)) + +(rule (lower (return_call_indirect sig_ref callee args)) + (gen_return_call_indirect sig_ref callee args)) + + +;;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (extractlane x @ (value_type ty) (u8_from_uimm8 idx))) + (gen_extractlane ty x idx)) + +;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; We can insert a lane by using a masked splat from an X register. +;; Build a mask that is only enabled in the lane we want to insert. +;; Then use a masked splat (vmerge) to insert the value. +(rule 0 (lower (insertlane vec @ (value_type (ty_vec_fits_in_register ty)) + val @ (value_type (ty_int _)) + (u8_from_uimm8 lane))) + (let ((mask VReg (gen_vec_mask (u64_shl 1 lane)))) + (rv_vmerge_vxm vec val mask ty))) + +;; Similar to above, but using the float variants of the instructions. +(rule 1 (lower (insertlane vec @ (value_type (ty_vec_fits_in_register ty)) + val @ (value_type (ty_scalar_float _)) + (u8_from_uimm8 lane))) + (let ((mask VReg (gen_vec_mask (u64_shl 1 lane)))) + (rv_vfmerge_vfm vec val mask ty))) + +;; If we are inserting from an Imm5 const we can use the immediate +;; variant of vmerge. +(rule 2 (lower (insertlane vec @ (value_type (ty_vec_fits_in_register ty)) + (iconst (u64_from_imm64 (imm5_from_u64 imm))) + (u8_from_uimm8 lane))) + (let ((mask VReg (gen_vec_mask (u64_shl 1 lane)))) + (rv_vmerge_vim vec imm mask ty))) + +;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type ty (splat n @ (value_type (ty_scalar_float _))))) + (rv_vfmv_vf n ty)) + +(rule 1 (lower (has_type ty (splat n @ (value_type (ty_int_ref_scalar_64 _))))) + (rv_vmv_vx n ty)) + +(rule 2 (lower (has_type ty (splat (iconst (u64_from_imm64 (imm5_from_u64 imm)))))) + (rv_vmv_vi imm ty)) + +;; TODO: We can splat out more patterns by using for example a vmv.v.i i8x16 for +;; a i64x2 const with a compatible bit pattern. The AArch64 Backend does something +;; similar in its splat rules. +;; TODO: Look through bitcasts when splatting out registers. We can use +;; `vmv.v.x` in a `(splat.f32x4 (bitcast.f32 val))`. And vice versa for integers. + +;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat x y))) + (rv_vsaddu_vv x y (unmasked) ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat x (splat y)))) + (rv_vsaddu_vx x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat (splat x) y))) + (rv_vsaddu_vx y x (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat x (replicated_imm5 y)))) + (rv_vsaddu_vi x y (unmasked) ty)) + +(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat (replicated_imm5 x) y))) + (rv_vsaddu_vi y x (unmasked) ty)) + +;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat x y))) + (rv_vsadd_vv x y (unmasked) ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat x (splat y)))) + (rv_vsadd_vx x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat (splat x) y))) + (rv_vsadd_vx y x (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat x (replicated_imm5 y)))) + (rv_vsadd_vi x y (unmasked) ty)) + +(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat (replicated_imm5 x) y))) + (rv_vsadd_vi y x (unmasked) ty)) + +;;;; Rules for `usub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (usub_sat x y))) + (rv_vssubu_vv x y (unmasked) ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (usub_sat x (splat y)))) + (rv_vssubu_vx x y (unmasked) ty)) + +;;;; Rules for `ssub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (ssub_sat x y))) + (rv_vssub_vv x y (unmasked) ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (ssub_sat x (splat y)))) + (rv_vssub_vx x y (unmasked) ty)) + +;;;; Rules for `vall_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Here we do a Vector Reduce operation. Get the unsigned minimum value of any +;; lane in the vector. The fixed input to the reduce operation is a 1. +;; This way, if any lane is 0, the result will be 0. Otherwise, the result will +;; be a 1. +;; The reduce operation leaves the result in the lowest lane, we then move it +;; into the destination X register. +(rule (lower (vall_true x @ (value_type (ty_vec_fits_in_register ty)))) + (if-let one (imm5_from_i8 1)) + ;; We don't need to broadcast the immediate into all lanes, only into lane 0. + ;; I did it this way since it uses one less instruction than with a vmv.s.x. + (let ((fixed VReg (rv_vmv_vi one ty)) + (min VReg (rv_vredminu_vs x fixed (unmasked) ty))) + (rv_vmv_xs min ty))) + + +;;;; Rules for `vany_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Here we do a Vector Reduce operation. Get the unsigned maximum value of the +;; input vector register. Move the max to an X register, and do a `snez` on it +;; to ensure its either 1 or 0. +(rule (lower (vany_true x @ (value_type (ty_vec_fits_in_register ty)))) + (let ((max VReg (rv_vredmaxu_vs x x (unmasked) ty)) + (x_max XReg (rv_vmv_xs max ty))) + (rv_snez x_max))) + + +;;;; Rules for `vhigh_bits` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; To check if the MSB of a lane is set, we do a `vmslt` with zero, this sets +;; the mask bit to 1 if the value is negative (MSB 1) and 0 if not. We can then +;; just move that mask to an X Register. +;; +;; We must ensure that the move to the X register has a SEW with enough bits +;; to hold the full mask. Additionally, in some cases (e.g. i64x2) we are going +;; to read some tail bits. These are undefined, so we need to further mask them +;; off. +(rule (lower (vhigh_bits x @ (value_type (ty_vec_fits_in_register ty)))) + (let ((mask VReg (rv_vmslt_vx x (zero_reg) (unmasked) ty)) + ;; Here we only need I64X1, but emit an AVL of 2 since it + ;; saves one vector state change in the case of I64X2. + ;; + ;; TODO: For types that have more lanes than element bits, we can + ;; use the original type as a VState and avoid a state change. + (x_mask XReg (rv_vmv_xs mask (vstate_from_type $I64X2)))) + (gen_andi x_mask (ty_lane_mask ty)))) + +;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (swizzle x y))) + (rv_vrgather_vv x y (unmasked) ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (swizzle x (splat y)))) + (rv_vrgather_vx x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (swizzle x (replicated_uimm5 y)))) + (rv_vrgather_vi x y (unmasked) ty)) + +;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Use a vrgather to load all 0-15 lanes from x. And then modify the mask to load all +;; 16-31 lanes from y. Finally, use a vor to combine the two vectors. +;; +;; vrgather will insert a 0 for lanes that are out of bounds, so we can let it load +;; negative and out of bounds indexes. +(rule (lower (has_type (ty_vec_fits_in_register ty @ $I8X16) (shuffle x y (vconst_from_immediate mask)))) + (if-let neg16 (imm5_from_i8 -16)) + (let ((x_mask VReg (gen_constant ty mask)) + (x_lanes VReg (rv_vrgather_vv x x_mask (unmasked) ty)) + (y_mask VReg (rv_vadd_vi x_mask neg16 (unmasked) ty)) + (y_lanes VReg (rv_vrgather_vv y y_mask (unmasked) ty))) + (rv_vor_vv x_lanes y_lanes (unmasked) ty))) + +;;;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Slide down half the vector, and do a signed extension. +(rule 0 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_high x @ (value_type in_ty)))) + (rv_vsext_vf2 (gen_slidedown_half in_ty x) (unmasked) out_ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_high (swiden_high x @ (value_type in_ty))))) + (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty))) + (rv_vsext_vf4 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_high (swiden_high (swiden_high x @ (value_type in_ty)))))) + (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty))) + (rv_vsext_vf8 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty)) + +;;;; Rules for `uwiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Slide down half the vector, and do a zero extension. +(rule 0 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_high x @ (value_type in_ty)))) + (rv_vzext_vf2 (gen_slidedown_half in_ty x) (unmasked) out_ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_high (uwiden_high x @ (value_type in_ty))))) + (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty))) + (rv_vzext_vf4 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_high (uwiden_high (uwiden_high x @ (value_type in_ty)))))) + (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty))) + (rv_vzext_vf8 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty)) + +;;;; Rules for `swiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_low x))) + (rv_vsext_vf2 x (unmasked) out_ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_low (swiden_low x)))) + (rv_vsext_vf4 x (unmasked) out_ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_low (swiden_low (swiden_low x))))) + (rv_vsext_vf8 x (unmasked) out_ty)) + +;;;; Rules for `uwiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_low x))) + (rv_vzext_vf2 x (unmasked) out_ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_low (uwiden_low x)))) + (rv_vzext_vf4 x (unmasked) out_ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_low (uwiden_low (uwiden_low x))))) + (rv_vzext_vf8 x (unmasked) out_ty)) + +;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; We don't have a dedicated instruction for this, rearrange the register elements +;; and use a vadd. +;; +;; We do this by building two masks, one for the even elements and one for the odd +;; elements. Using vcompress we can extract the elements and group them together. +;; +;; This is likely not the optimal way of doing this. LLVM does this using a bunch +;; of vrgathers (See: https://godbolt.org/z/jq8Wj8WG4), that doesen't seem to be +;; too much better than this. +;; +;; However V8 does something better. They use 2 vcompresses using LMUL2, that means +;; that they can do the whole thing in 3 instructions (2 vcompress + vadd). We don't +;; support LMUL > 1, so we can't do that. +(rule (lower (has_type (ty_vec_fits_in_register ty) (iadd_pairwise x y))) + (if-let half_size (u64_to_uimm5 (u64_udiv (ty_lane_count ty) 2))) + (let ((odd_mask VReg (gen_vec_mask 0x5555555555555555)) + (lhs_lo VReg (rv_vcompress_vm x odd_mask ty)) + (lhs_hi VReg (rv_vcompress_vm y odd_mask ty)) + (lhs VReg (rv_vslideup_vvi lhs_lo lhs_hi half_size (unmasked) ty)) + + (even_mask VReg (gen_vec_mask 0xAAAAAAAAAAAAAAAA)) + (rhs_lo VReg (rv_vcompress_vm x even_mask ty)) + (rhs_hi VReg (rv_vcompress_vm y even_mask ty)) + (rhs VReg (rv_vslideup_vvi rhs_lo rhs_hi half_size (unmasked) ty))) + (rv_vadd_vv lhs rhs (unmasked) ty))) + +;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; `avg_round` computes the unsigned average with rounding: a := (x + y + 1) // 2 +;; +;; See Section "2–5 Average of Two Integers" of the Hacker's Delight book +;; +;; The floor average of two integers without overflow can be computed as: +;; t = (x & y) + ((x ^ y) >> 1) +;; +;; The right shift should be a logical shift if the integers are unsigned. +;; +;; We are however interested in the ceiling average (x + y + 1). For that +;; we use a special rounding mode in the right shift instruction. +;; +;; For the right shift instruction we use `vssrl` which is a Scaling Shift +;; Right Logical instruction using the `vxrm` fixed-point rouding mode. The +;; default rounding mode is `rnu` (round-to-nearest-up (add +0.5 LSB)). +;; Which is coincidentally the rounding mode we want for `avg_round`. +(rule (lower (has_type (ty_vec_fits_in_register ty) (avg_round x y))) + (if-let one (u64_to_uimm5 1)) + (let ((lhs VReg (rv_vand_vv x y (unmasked) ty)) + (xor VReg (rv_vxor_vv x y (unmasked) ty)) + (rhs VReg (rv_vssrl_vi xor one (unmasked) ty))) + (rv_vadd_vv lhs rhs (unmasked) ty))) + +;;;; Rules for `scalar_to_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (scalar_to_vector x))) + (if (ty_vector_not_float ty)) + (let ((zero VReg (rv_vmv_vx (zero_reg) ty)) + (mask VReg (gen_vec_mask 1))) + (rv_vmerge_vxm zero x mask ty))) + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (scalar_to_vector x))) + (if (ty_vector_float ty)) + (let ((zero VReg (rv_vmv_vx (zero_reg) ty)) + (elem VReg (rv_vfmv_sf x ty)) + (mask VReg (gen_vec_mask 1))) + (rv_vmerge_vvm zero elem mask ty))) + +;;;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (sqmul_round_sat x y))) + (rv_vsmul_vv x y (unmasked) ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (sqmul_round_sat x (splat y)))) + (rv_vsmul_vx x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (sqmul_round_sat (splat x) y))) + (rv_vsmul_vx y x (unmasked) ty)) + +;;;; Rules for `snarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (ty_vec_fits_in_register out_ty) (snarrow x @ (value_type in_ty) y))) + (if-let lane_diff (u64_to_uimm5 (u64_udiv (ty_lane_count out_ty) 2))) + (if-let zero (u64_to_uimm5 0)) + (let ((x_clip VReg (rv_vnclip_wi x zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty)))) + (y_clip VReg (rv_vnclip_wi y zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty))))) + (rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty))) + +;;;; Rules for `uunarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (ty_vec_fits_in_register out_ty) (uunarrow x @ (value_type in_ty) y))) + (if-let lane_diff (u64_to_uimm5 (u64_udiv (ty_lane_count out_ty) 2))) + (if-let zero (u64_to_uimm5 0)) + (let ((x_clip VReg (rv_vnclipu_wi x zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty)))) + (y_clip VReg (rv_vnclipu_wi y zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty))))) + (rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty))) + +;;;; Rules for `unarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; We don't have a instruction that saturates a signed source into an unsigned destination. +;; To correct for this we just remove negative values using `vmax` and then use the normal +;; unsigned to unsigned narrowing instruction. + +(rule (lower (has_type (ty_vec_fits_in_register out_ty) (unarrow x @ (value_type in_ty) y))) + (if-let lane_diff (u64_to_uimm5 (u64_udiv (ty_lane_count out_ty) 2))) + (if-let zero (u64_to_uimm5 0)) + (let ((x_pos VReg (rv_vmax_vx x (zero_reg) (unmasked) in_ty)) + (y_pos VReg (rv_vmax_vx y (zero_reg) (unmasked) in_ty)) + (x_clip VReg (rv_vnclipu_wi x_pos zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty)))) + (y_clip VReg (rv_vnclipu_wi y_pos zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty))))) + (rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty))) diff --git a/cranelift/codegen/src/isa/zkasm/lower.rs b/cranelift/codegen/src/isa/zkasm/lower.rs new file mode 100644 index 000000000000..384fba864596 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/lower.rs @@ -0,0 +1,33 @@ +//! Lowering rules for Riscv64. +use crate::ir::Inst as IRInst; +use crate::isa::zkasm::inst::*; +use crate::isa::zkasm::Riscv64Backend; +use crate::machinst::lower::*; +use crate::machinst::*; +pub mod isle; + +//============================================================================= +// Lowering-backend trait implementation. + +impl LowerBackend for Riscv64Backend { + type MInst = Inst; + + fn lower(&self, ctx: &mut Lower, ir_inst: IRInst) -> Option { + isle::lower(ctx, self, ir_inst) + } + + fn lower_branch( + &self, + ctx: &mut Lower, + ir_inst: IRInst, + targets: &[MachLabel], + ) -> Option<()> { + isle::lower_branch(ctx, self, ir_inst, targets) + } + + fn maybe_pinned_reg(&self) -> Option { + // pinned register is a register that you want put anything in it. + // right now zkasm not support this feature. + None + } +} diff --git a/cranelift/codegen/src/isa/zkasm/lower/isle.rs b/cranelift/codegen/src/isa/zkasm/lower/isle.rs new file mode 100644 index 000000000000..28734ac30df4 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/lower/isle.rs @@ -0,0 +1,620 @@ +//! ISLE integration glue code for zkasm lowering. + +// Pull in the ISLE generated code. +#[allow(unused)] +pub mod generated_code; +use generated_code::{Context, ExtendOp, MInst}; + +// Types that the generated ISLE code uses via `use super::*`. +use self::generated_code::{VecAluOpRR, VecLmul}; +use super::{writable_zero_reg, zero_reg}; +use crate::isa::zkasm::abi::Riscv64ABICallSite; +use crate::isa::zkasm::lower::args::{ + FReg, VReg, WritableFReg, WritableVReg, WritableXReg, XReg, +}; +use crate::isa::zkasm::Riscv64Backend; +use crate::machinst::Reg; +use crate::machinst::{isle::*, MachInst, SmallInstVec}; +use crate::machinst::{VCodeConstant, VCodeConstantData}; +use crate::{ + ir::{ + immediates::*, types::*, AtomicRmwOp, BlockCall, ExternalName, Inst, InstructionData, + MemFlags, StackSlot, TrapCode, Value, ValueList, + }, + isa::zkasm::inst::*, + machinst::{ArgPair, InstOutput, Lower}, +}; +use crate::{isa, isle_common_prelude_methods, isle_lower_prelude_methods}; +use regalloc2::PReg; +use std::boxed::Box; +use std::convert::TryFrom; +use std::vec::Vec; + +type BoxCallInfo = Box; +type BoxCallIndInfo = Box; +type BoxReturnCallInfo = Box; +type BoxExternalName = Box; +type VecMachLabel = Vec; +type VecArgPair = Vec; +use crate::machinst::valueregs; + +pub(crate) struct RV64IsleContext<'a, 'b, I, B> +where + I: VCodeInst, + B: LowerBackend, +{ + pub lower_ctx: &'a mut Lower<'b, I>, + pub backend: &'a B, + /// Precalucated value for the minimum vector register size. Will be 0 if + /// vectors are not supported. + min_vec_reg_size: u64, +} + +impl<'a, 'b> RV64IsleContext<'a, 'b, MInst, Riscv64Backend> { + isle_prelude_method_helpers!(Riscv64ABICallSite); + + fn new(lower_ctx: &'a mut Lower<'b, MInst>, backend: &'a Riscv64Backend) -> Self { + Self { + lower_ctx, + backend, + min_vec_reg_size: backend.isa_flags.min_vec_reg_size(), + } + } + + #[inline] + fn emit_list(&mut self, list: &SmallInstVec) { + for i in list { + self.lower_ctx.emit(i.clone()); + } + } +} + +impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> { + isle_lower_prelude_methods!(); + isle_prelude_caller_methods!(Riscv64MachineDeps, Riscv64ABICallSite); + + fn gen_return_call( + &mut self, + callee_sig: SigRef, + callee: ExternalName, + distance: RelocDistance, + args: ValueSlice, + ) -> InstOutput { + let caller_conv = isa::CallConv::Tail; + debug_assert_eq!( + self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()), + caller_conv, + "Can only do `return_call`s from within a `tail` calling convention function" + ); + + let call_site = Riscv64ABICallSite::from_func( + self.lower_ctx.sigs(), + callee_sig, + &callee, + distance, + caller_conv, + self.backend.flags().clone(), + ); + call_site.emit_return_call(self.lower_ctx, args); + + InstOutput::new() + } + + fn gen_return_call_indirect( + &mut self, + callee_sig: SigRef, + callee: Value, + args: ValueSlice, + ) -> InstOutput { + let caller_conv = isa::CallConv::Tail; + debug_assert_eq!( + self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()), + caller_conv, + "Can only do `return_call`s from within a `tail` calling convention function" + ); + + let callee = self.put_in_reg(callee); + + let call_site = Riscv64ABICallSite::from_ptr( + self.lower_ctx.sigs(), + callee_sig, + callee, + Opcode::ReturnCallIndirect, + caller_conv, + self.backend.flags().clone(), + ); + call_site.emit_return_call(self.lower_ctx, args); + + InstOutput::new() + } + + fn vreg_new(&mut self, r: Reg) -> VReg { + VReg::new(r).unwrap() + } + fn writable_vreg_new(&mut self, r: WritableReg) -> WritableVReg { + r.map(|wr| VReg::new(wr).unwrap()) + } + fn writable_vreg_to_vreg(&mut self, arg0: WritableVReg) -> VReg { + arg0.to_reg() + } + fn writable_vreg_to_writable_reg(&mut self, arg0: WritableVReg) -> WritableReg { + arg0.map(|vr| vr.to_reg()) + } + fn vreg_to_reg(&mut self, arg0: VReg) -> Reg { + *arg0 + } + fn xreg_new(&mut self, r: Reg) -> XReg { + XReg::new(r).unwrap() + } + fn writable_xreg_new(&mut self, r: WritableReg) -> WritableXReg { + r.map(|wr| XReg::new(wr).unwrap()) + } + fn writable_xreg_to_xreg(&mut self, arg0: WritableXReg) -> XReg { + arg0.to_reg() + } + fn writable_xreg_to_writable_reg(&mut self, arg0: WritableXReg) -> WritableReg { + arg0.map(|xr| xr.to_reg()) + } + fn xreg_to_reg(&mut self, arg0: XReg) -> Reg { + *arg0 + } + fn freg_new(&mut self, r: Reg) -> FReg { + FReg::new(r).unwrap() + } + fn writable_freg_new(&mut self, r: WritableReg) -> WritableFReg { + r.map(|wr| FReg::new(wr).unwrap()) + } + fn writable_freg_to_freg(&mut self, arg0: WritableFReg) -> FReg { + arg0.to_reg() + } + fn writable_freg_to_writable_reg(&mut self, arg0: WritableFReg) -> WritableReg { + arg0.map(|fr| fr.to_reg()) + } + fn freg_to_reg(&mut self, arg0: FReg) -> Reg { + *arg0 + } + + fn vec_writable_to_regs(&mut self, val: &VecWritableReg) -> ValueRegs { + match val.len() { + 1 => ValueRegs::one(val[0].to_reg()), + 2 => ValueRegs::two(val[0].to_reg(), val[1].to_reg()), + _ => unreachable!(), + } + } + fn intcc_to_extend_op(&mut self, cc: &IntCC) -> ExtendOp { + use IntCC::*; + match *cc { + Equal + | NotEqual + | UnsignedLessThan + | UnsignedGreaterThanOrEqual + | UnsignedGreaterThan + | UnsignedLessThanOrEqual => ExtendOp::Zero, + + SignedLessThan + | SignedGreaterThanOrEqual + | SignedGreaterThan + | SignedLessThanOrEqual => ExtendOp::Signed, + } + } + fn lower_cond_br( + &mut self, + cc: &IntCC, + a: ValueRegs, + targets: &VecMachLabel, + ty: Type, + ) -> Unit { + MInst::lower_br_icmp( + *cc, + a, + self.int_zero_reg(ty), + BranchTarget::Label(targets[0]), + BranchTarget::Label(targets[1]), + ty, + ) + .iter() + .for_each(|i| self.emit(i)); + } + fn lower_br_icmp( + &mut self, + cc: &IntCC, + a: ValueRegs, + b: ValueRegs, + targets: &VecMachLabel, + ty: Type, + ) -> Unit { + let test = generated_code::constructor_lower_icmp(self, cc, a, b, ty); + self.emit(&MInst::CondBr { + taken: BranchTarget::Label(targets[0]), + not_taken: BranchTarget::Label(targets[1]), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: test, + rs2: zero_reg(), + }, + }); + } + fn load_ra(&mut self) -> Reg { + if self.backend.flags.preserve_frame_pointers() { + let tmp = self.temp_writable_reg(I64); + self.emit(&MInst::Load { + rd: tmp, + op: LoadOP::Ld, + flags: MemFlags::trusted(), + from: AMode::FPOffset(8, I64), + }); + tmp.to_reg() + } else { + link_reg() + } + } + fn int_zero_reg(&mut self, ty: Type) -> ValueRegs { + assert!(ty.is_int(), "{:?}", ty); + if ty.bits() == 128 { + ValueRegs::two(self.zero_reg(), self.zero_reg()) + } else { + ValueRegs::one(self.zero_reg()) + } + } + + fn vec_label_get(&mut self, val: &VecMachLabel, x: u8) -> MachLabel { + val[x as usize] + } + + fn label_to_br_target(&mut self, label: MachLabel) -> BranchTarget { + BranchTarget::Label(label) + } + + fn vec_writable_clone(&mut self, v: &VecWritableReg) -> VecWritableReg { + v.clone() + } + + fn imm12_and(&mut self, imm: Imm12, x: u64) -> Imm12 { + Imm12::from_bits(imm.as_i16() & (x as i16)) + } + + fn alloc_vec_writable(&mut self, ty: Type) -> VecWritableReg { + if ty.is_int() || ty == R32 || ty == R64 { + if ty.bits() <= 64 { + vec![self.temp_writable_reg(I64)] + } else { + vec![self.temp_writable_reg(I64), self.temp_writable_reg(I64)] + } + } else if ty.is_float() || ty.is_vector() { + vec![self.temp_writable_reg(ty)] + } else { + unimplemented!("ty:{:?}", ty) + } + } + + fn imm(&mut self, ty: Type, val: u64) -> Reg { + let tmp = self.temp_writable_reg(ty); + let alloc_tmp = &mut |ty| self.temp_writable_reg(ty); + let insts = match ty { + F32 => MInst::load_fp_constant32(tmp, val as u32, alloc_tmp), + F64 => MInst::load_fp_constant64(tmp, val, alloc_tmp), + _ => MInst::load_constant_u64(tmp, val, alloc_tmp), + }; + self.emit_list(&insts); + tmp.to_reg() + } + #[inline] + fn emit(&mut self, arg0: &MInst) -> Unit { + self.lower_ctx.emit(arg0.clone()); + } + #[inline] + fn imm12_from_u64(&mut self, arg0: u64) -> Option { + Imm12::maybe_from_u64(arg0) + } + #[inline] + fn imm5_from_u64(&mut self, arg0: u64) -> Option { + Imm5::maybe_from_i8(i8::try_from(arg0 as i64).ok()?) + } + #[inline] + fn imm5_from_i8(&mut self, arg0: i8) -> Option { + Imm5::maybe_from_i8(arg0) + } + #[inline] + fn uimm5_bitcast_to_imm5(&mut self, arg0: UImm5) -> Imm5 { + Imm5::from_bits(arg0.bits() as u8) + } + #[inline] + fn uimm5_from_u8(&mut self, arg0: u8) -> Option { + UImm5::maybe_from_u8(arg0) + } + #[inline] + fn uimm5_from_u64(&mut self, arg0: u64) -> Option { + arg0.try_into().ok().and_then(UImm5::maybe_from_u8) + } + #[inline] + fn writable_zero_reg(&mut self) -> WritableReg { + writable_zero_reg() + } + #[inline] + fn neg_imm12(&mut self, arg0: Imm12) -> Imm12 { + -arg0 + } + #[inline] + fn zero_reg(&mut self) -> Reg { + zero_reg() + } + #[inline] + fn imm_from_bits(&mut self, val: u64) -> Imm12 { + Imm12::maybe_from_u64(val).unwrap() + } + #[inline] + fn imm_from_neg_bits(&mut self, val: i64) -> Imm12 { + Imm12::maybe_from_u64(val as u64).unwrap() + } + + fn gen_default_frm(&mut self) -> OptionFloatRoundingMode { + None + } + fn gen_select_reg(&mut self, cc: &IntCC, a: XReg, b: XReg, rs1: Reg, rs2: Reg) -> Reg { + let rd = self.temp_writable_reg(MInst::canonical_type_for_rc(rs1.class())); + self.emit(&MInst::SelectReg { + rd, + rs1, + rs2, + condition: IntegerCompare { + kind: *cc, + rs1: a.to_reg(), + rs2: b.to_reg(), + }, + }); + rd.to_reg() + } + fn load_u64_constant(&mut self, val: u64) -> Reg { + let rd = self.temp_writable_reg(I64); + MInst::load_constant_u64(rd, val, &mut |ty| self.temp_writable_reg(ty)) + .iter() + .for_each(|i| self.emit(i)); + rd.to_reg() + } + fn u8_as_i32(&mut self, x: u8) -> i32 { + x as i32 + } + + fn imm12_const(&mut self, val: i32) -> Imm12 { + if let Some(res) = Imm12::maybe_from_u64(val as u64) { + res + } else { + panic!("Unable to make an Imm12 value from {}", val) + } + } + fn imm12_const_add(&mut self, val: i32, add: i32) -> Imm12 { + Imm12::maybe_from_u64((val + add) as u64).unwrap() + } + + // + fn gen_shamt(&mut self, ty: Type, shamt: XReg) -> ValueRegs { + let ty_bits = if ty.bits() > 64 { 64 } else { ty.bits() }; + let shamt = { + let tmp = self.temp_writable_reg(I64); + self.emit(&MInst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd: tmp, + rs: shamt.to_reg(), + imm12: Imm12::from_bits((ty_bits - 1) as i16), + }); + tmp.to_reg() + }; + let len_sub_shamt = { + let tmp = self.temp_writable_reg(I64); + self.emit(&MInst::load_imm12(tmp, Imm12::from_bits(ty_bits as i16))); + let len_sub_shamt = self.temp_writable_reg(I64); + self.emit(&MInst::AluRRR { + alu_op: AluOPRRR::Sub, + rd: len_sub_shamt, + rs1: tmp.to_reg(), + rs2: shamt, + }); + len_sub_shamt.to_reg() + }; + ValueRegs::two(shamt, len_sub_shamt) + } + + fn has_v(&mut self) -> bool { + self.backend.isa_flags.has_v() + } + + fn has_zbkb(&mut self) -> bool { + self.backend.isa_flags.has_zbkb() + } + + fn has_zba(&mut self) -> bool { + self.backend.isa_flags.has_zba() + } + + fn has_zbb(&mut self) -> bool { + self.backend.isa_flags.has_zbb() + } + + fn has_zbc(&mut self) -> bool { + self.backend.isa_flags.has_zbc() + } + + fn has_zbs(&mut self) -> bool { + self.backend.isa_flags.has_zbs() + } + + fn offset32_imm(&mut self, offset: i32) -> Offset32 { + Offset32::new(offset) + } + fn default_memflags(&mut self) -> MemFlags { + MemFlags::new() + } + + fn pack_float_rounding_mode(&mut self, f: &FRM) -> OptionFloatRoundingMode { + Some(*f) + } + + fn int_convert_2_float_op(&mut self, from: Type, is_signed: bool, to: Type) -> FpuOPRR { + FpuOPRR::int_convert_2_float_op(from, is_signed, to) + } + + fn gen_amode(&mut self, base: Reg, offset: Offset32, ty: Type) -> AMode { + AMode::RegOffset(base, i64::from(offset), ty) + } + + fn gen_const_amode(&mut self, c: VCodeConstant) -> AMode { + AMode::Const(c) + } + + fn valid_atomic_transaction(&mut self, ty: Type) -> Option { + if ty.is_int() && ty.bits() <= 64 { + Some(ty) + } else { + None + } + } + fn is_atomic_rmw_max_etc(&mut self, op: &AtomicRmwOp) -> Option<(AtomicRmwOp, bool)> { + let op = *op; + match op { + crate::ir::AtomicRmwOp::Umin => Some((op, false)), + crate::ir::AtomicRmwOp::Umax => Some((op, false)), + crate::ir::AtomicRmwOp::Smin => Some((op, true)), + crate::ir::AtomicRmwOp::Smax => Some((op, true)), + _ => None, + } + } + fn load_op(&mut self, ty: Type) -> LoadOP { + LoadOP::from_type(ty) + } + fn store_op(&mut self, ty: Type) -> StoreOP { + StoreOP::from_type(ty) + } + fn load_ext_name(&mut self, name: ExternalName, offset: i64) -> Reg { + let tmp = self.temp_writable_reg(I64); + self.emit(&MInst::LoadExtName { + rd: tmp, + name: Box::new(name), + offset, + }); + tmp.to_reg() + } + + fn offset32_add(&mut self, a: Offset32, adden: i64) -> Offset32 { + a.try_add_i64(adden).expect("offset exceed range.") + } + + fn gen_stack_addr(&mut self, slot: StackSlot, offset: Offset32) -> Reg { + let result = self.temp_writable_reg(I64); + let i = self + .lower_ctx + .abi() + .sized_stackslot_addr(slot, i64::from(offset) as u32, result); + self.emit(&i); + result.to_reg() + } + fn atomic_amo(&mut self) -> AMO { + AMO::SeqCst + } + + fn lower_br_table(&mut self, index: Reg, targets: &VecMachLabel) -> Unit { + let tmp1 = self.temp_writable_reg(I64); + let tmp2 = self.temp_writable_reg(I64); + let targets: Vec = targets + .into_iter() + .copied() + .map(BranchTarget::Label) + .collect(); + self.emit(&MInst::BrTable { + index, + tmp1, + tmp2, + targets, + }); + } + + fn fp_reg(&mut self) -> PReg { + px_reg(8) + } + + fn sp_reg(&mut self) -> PReg { + px_reg(2) + } + + fn shift_int_to_most_significant(&mut self, v: XReg, ty: Type) -> XReg { + assert!(ty.is_int() && ty.bits() <= 64); + if ty == I64 { + return v; + } + let tmp = self.temp_writable_reg(I64); + self.emit(&MInst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp, + rs: v.to_reg(), + imm12: Imm12::from_bits((64 - ty.bits()) as i16), + }); + + self.xreg_new(tmp.to_reg()) + } + + #[inline] + fn int_compare(&mut self, kind: &IntCC, rs1: XReg, rs2: XReg) -> IntegerCompare { + IntegerCompare { + kind: *kind, + rs1: rs1.to_reg(), + rs2: rs2.to_reg(), + } + } + + #[inline] + fn vstate_from_type(&mut self, ty: Type) -> VState { + VState::from_type(ty) + } + + #[inline] + fn vstate_mf2(&mut self, vs: VState) -> VState { + VState { + vtype: VType { + lmul: VecLmul::LmulF2, + ..vs.vtype + }, + ..vs + } + } + + fn min_vec_reg_size(&mut self) -> u64 { + self.min_vec_reg_size + } + + #[inline] + fn ty_vec_fits_in_register(&mut self, ty: Type) -> Option { + if ty.is_vector() && (ty.bits() as u64) <= self.min_vec_reg_size() { + Some(ty) + } else { + None + } + } + + fn vec_alu_rr_dst_type(&mut self, op: &VecAluOpRR) -> Type { + MInst::canonical_type_for_rc(op.dst_regclass()) + } +} + +/// The main entry point for lowering with ISLE. +pub(crate) fn lower( + lower_ctx: &mut Lower, + backend: &Riscv64Backend, + inst: Inst, +) -> Option { + // TODO: reuse the ISLE context across lowerings so we can reuse its + // internal heap allocations. + let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend); + generated_code::constructor_lower(&mut isle_ctx, inst) +} + +/// The main entry point for branch lowering with ISLE. +pub(crate) fn lower_branch( + lower_ctx: &mut Lower, + backend: &Riscv64Backend, + branch: Inst, + targets: &[MachLabel], +) -> Option<()> { + // TODO: reuse the ISLE context across lowerings so we can reuse its + // internal heap allocations. + let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend); + generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets.to_vec()) +} diff --git a/cranelift/codegen/src/isa/zkasm/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/zkasm/lower/isle/generated_code.rs new file mode 100644 index 000000000000..955a0a2b1171 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/lower/isle/generated_code.rs @@ -0,0 +1,9 @@ +// See https://github.com/rust-lang/rust/issues/47995: we cannot use `#![...]` attributes inside of +// the generated ISLE source below because we include!() it. We must include!() it because its path +// depends on an environment variable; and also because of this, we can't do the `#[path = "..."] +// mod generated_code;` trick either. +#![allow(dead_code, unreachable_code, unreachable_patterns)] +#![allow(unused_imports, unused_variables, non_snake_case, unused_mut)] +#![allow(irrefutable_let_patterns)] + +include!(concat!(env!("ISLE_DIR"), "/isle_zkasm.rs")); diff --git a/cranelift/codegen/src/isa/zkasm/mod.rs b/cranelift/codegen/src/isa/zkasm/mod.rs new file mode 100644 index 000000000000..415888d51cdf --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/mod.rs @@ -0,0 +1,228 @@ +//! risc-v 64-bit Instruction Set Architecture. + +use crate::dominator_tree::DominatorTree; +use crate::ir; +use crate::ir::{Function, Type}; +use crate::isa::zkasm::settings as riscv_settings; +use crate::isa::{Builder as IsaBuilder, FunctionAlignment, TargetIsa}; +use crate::machinst::{ + compile, CompiledCode, CompiledCodeStencil, MachInst, MachTextSectionBuilder, Reg, SigSet, + TextSectionBuilder, VCode, +}; +use crate::result::CodegenResult; +use crate::settings as shared_settings; +use alloc::{boxed::Box, vec::Vec}; +use core::fmt; +use cranelift_control::ControlPlane; +use regalloc2::MachineEnv; +use target_lexicon::{Architecture, Triple}; +mod abi; +pub(crate) mod inst; +mod lower; +mod settings; +#[cfg(feature = "unwind")] +use crate::isa::unwind::systemv; + +use inst::crate_reg_eviroment; + +use self::inst::EmitInfo; + +/// An zkasm backend. +pub struct Riscv64Backend { + triple: Triple, + flags: shared_settings::Flags, + isa_flags: riscv_settings::Flags, + mach_env: MachineEnv, +} + +impl Riscv64Backend { + /// Create a new zkasm backend with the given (shared) flags. + pub fn new_with_flags( + triple: Triple, + flags: shared_settings::Flags, + isa_flags: riscv_settings::Flags, + ) -> Riscv64Backend { + let mach_env = crate_reg_eviroment(&flags); + Riscv64Backend { + triple, + flags, + isa_flags, + mach_env, + } + } + + /// This performs lowering to VCode, register-allocates the code, computes block layout and + /// finalizes branches. The result is ready for binary emission. + fn compile_vcode( + &self, + func: &Function, + domtree: &DominatorTree, + ctrl_plane: &mut ControlPlane, + ) -> CodegenResult<(VCode, regalloc2::Output)> { + let emit_info = EmitInfo::new(self.flags.clone(), self.isa_flags.clone()); + let sigs = SigSet::new::(func, &self.flags)?; + let abi = abi::Riscv64Callee::new(func, self, &self.isa_flags, &sigs)?; + compile::compile::(func, domtree, self, abi, emit_info, sigs, ctrl_plane) + } +} + +impl TargetIsa for Riscv64Backend { + fn compile_function( + &self, + func: &Function, + domtree: &DominatorTree, + want_disasm: bool, + ctrl_plane: &mut ControlPlane, + ) -> CodegenResult { + let (vcode, regalloc_result) = self.compile_vcode(func, domtree, ctrl_plane)?; + + let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug); + let emit_result = vcode.emit(®alloc_result, want_disasm, &self.flags, ctrl_plane); + let frame_size = emit_result.frame_size; + let value_labels_ranges = emit_result.value_labels_ranges; + let buffer = emit_result.buffer; + let sized_stackslot_offsets = emit_result.sized_stackslot_offsets; + let dynamic_stackslot_offsets = emit_result.dynamic_stackslot_offsets; + + if let Some(disasm) = emit_result.disasm.as_ref() { + log::debug!("disassembly:\n{}", disasm); + } + + Ok(CompiledCodeStencil { + buffer, + frame_size, + vcode: emit_result.disasm, + value_labels_ranges, + sized_stackslot_offsets, + dynamic_stackslot_offsets, + bb_starts: emit_result.bb_offsets, + bb_edges: emit_result.bb_edges, + }) + } + + fn name(&self) -> &'static str { + "zkasm" + } + fn dynamic_vector_bytes(&self, _dynamic_ty: ir::Type) -> u32 { + 16 + } + + fn triple(&self) -> &Triple { + &self.triple + } + + fn flags(&self) -> &shared_settings::Flags { + &self.flags + } + + fn machine_env(&self) -> &MachineEnv { + &self.mach_env + } + + fn isa_flags(&self) -> Vec { + self.isa_flags.iter().collect() + } + + #[cfg(feature = "unwind")] + fn emit_unwind_info( + &self, + result: &CompiledCode, + kind: crate::machinst::UnwindInfoKind, + ) -> CodegenResult> { + use crate::isa::unwind::UnwindInfo; + use crate::machinst::UnwindInfoKind; + Ok(match kind { + UnwindInfoKind::SystemV => { + let mapper = self::inst::unwind::systemv::RegisterMapper; + Some(UnwindInfo::SystemV( + crate::isa::unwind::systemv::create_unwind_info_from_insts( + &result.buffer.unwind_info[..], + result.buffer.data().len(), + &mapper, + )?, + )) + } + UnwindInfoKind::Windows => None, + _ => None, + }) + } + + #[cfg(feature = "unwind")] + fn create_systemv_cie(&self) -> Option { + Some(inst::unwind::systemv::create_cie()) + } + + fn text_section_builder(&self, num_funcs: usize) -> Box { + Box::new(MachTextSectionBuilder::::new(num_funcs)) + } + + #[cfg(feature = "unwind")] + fn map_regalloc_reg_to_dwarf(&self, reg: Reg) -> Result { + inst::unwind::systemv::map_reg(reg).map(|reg| reg.0) + } + + fn function_alignment(&self) -> FunctionAlignment { + inst::Inst::function_alignment() + } + + #[cfg(feature = "disas")] + fn to_capstone(&self) -> Result { + use capstone::prelude::*; + let mut cs = Capstone::new() + .riscv() + .mode(arch::riscv::ArchMode::RiscV64) + .build()?; + // Similar to AArch64, RISC-V uses inline constants rather than a separate + // constant pool. We want to skip dissasembly over inline constants instead + // of stopping on invalid bytes. + cs.set_skipdata(true)?; + Ok(cs) + } + + fn has_native_fma(&self) -> bool { + true + } + + fn has_x86_blendv_lowering(&self, _: Type) -> bool { + false + } + + fn has_x86_pshufb_lowering(&self) -> bool { + false + } + + fn has_x86_pmulhrsw_lowering(&self) -> bool { + false + } + + fn has_x86_pmaddubsw_lowering(&self) -> bool { + false + } +} + +impl fmt::Display for Riscv64Backend { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("MachBackend") + .field("name", &self.name()) + .field("triple", &self.triple()) + .field("flags", &format!("{}", self.flags())) + .finish() + } +} + +/// Create a new `isa::Builder`. +pub fn isa_builder(triple: Triple) -> IsaBuilder { + match triple.architecture { + Architecture::Riscv64(..) => {} + _ => unreachable!(), + } + IsaBuilder { + triple, + setup: riscv_settings::builder(), + constructor: |triple, shared_flags, builder| { + let isa_flags = riscv_settings::Flags::new(&shared_flags, builder); + let backend = Riscv64Backend::new_with_flags(triple, shared_flags, isa_flags); + Ok(backend.wrapped()) + }, + } +} diff --git a/cranelift/codegen/src/isa/zkasm/settings.rs b/cranelift/codegen/src/isa/zkasm/settings.rs new file mode 100644 index 000000000000..a91e91e61938 --- /dev/null +++ b/cranelift/codegen/src/isa/zkasm/settings.rs @@ -0,0 +1,8 @@ +//! zkasm Settings. + +use crate::settings::{self, detail, Builder, Value}; +use core::fmt; + +// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a +// public `Flags` struct with an impl for all of the settings defined in +include!(concat!(env!("OUT_DIR"), "/settings-zkasm.rs")); From 366fcc3c58644450596090d7e8220f5fbea43f68 Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Thu, 24 Aug 2023 12:41:44 +0300 Subject: [PATCH 02/68] rename --- cranelift/codegen/meta/src/isa/zkasm.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cranelift/codegen/meta/src/isa/zkasm.rs b/cranelift/codegen/meta/src/isa/zkasm.rs index f080aaef3737..22ab97407540 100644 --- a/cranelift/codegen/meta/src/isa/zkasm.rs +++ b/cranelift/codegen/meta/src/isa/zkasm.rs @@ -26,7 +26,7 @@ macro_rules! define_zvl_ext { } pub(crate) fn define() -> TargetIsa { - let mut setting = SettingGroupBuilder::new("riscv64"); + let mut setting = SettingGroupBuilder::new("zkasm"); let _has_m = setting.add_bool("has_m", "has extension M?", "", false); let _has_a = setting.add_bool("has_a", "has extension A?", "", false); @@ -97,5 +97,5 @@ pub(crate) fn define() -> TargetIsa { let (_, zvl32768b) = define_zvl_ext!(setting, 32768, zvl16384b); let (_, _zvl65536b) = define_zvl_ext!(setting, 65536, zvl32768b); - TargetIsa::new("riscv64", setting.build()) + TargetIsa::new("zkasm", setting.build()) } From 4ca385a126e2b0ec3ae9495f7bd3f26c069fb124 Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Thu, 24 Aug 2023 12:44:16 +0300 Subject: [PATCH 03/68] Fix the wasm command --- cranelift/codegen/src/isa/zkasm/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/codegen/src/isa/zkasm/mod.rs b/cranelift/codegen/src/isa/zkasm/mod.rs index 415888d51cdf..7e19f7578d2e 100644 --- a/cranelift/codegen/src/isa/zkasm/mod.rs +++ b/cranelift/codegen/src/isa/zkasm/mod.rs @@ -213,7 +213,7 @@ impl fmt::Display for Riscv64Backend { /// Create a new `isa::Builder`. pub fn isa_builder(triple: Triple) -> IsaBuilder { match triple.architecture { - Architecture::Riscv64(..) => {} + Architecture::Sparc => {} _ => unreachable!(), } IsaBuilder { From 95d9bf38d5aff239c72dcf9c204557e2525aa561 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Gaspard?= Date: Thu, 24 Aug 2023 12:19:52 +0200 Subject: [PATCH 04/68] todo-out riscv64 codegen --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 36 +++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 6e2b3f875465..ffc8ffc54f71 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -61,6 +61,8 @@ impl LoadConstant { rd: Writable, alloc_tmp: &mut F, ) -> SmallInstVec { + todo!() + /* let mut insts = SmallInstVec::new(); // get current pc. let pc = alloc_tmp(I64); @@ -82,10 +84,13 @@ impl LoadConstant { }); insts.push(Inst::RawData { data }); insts + */ } // load and perform an extra add. pub(crate) fn load_constant_and_add(self, rd: Writable, rs: Reg) -> SmallInstVec { + todo!() + /* let mut insts = self.load_constant(rd, &mut |_| rd); insts.push(Inst::AluRRR { alu_op: AluOPRRR::Add, @@ -94,6 +99,7 @@ impl LoadConstant { rs2: rs, }); insts + */ } } @@ -179,20 +185,22 @@ impl MachInstEmitState for EmitState { impl Inst { /// construct a "imm - rs". pub(crate) fn construct_imm_sub_rs(rd: Writable, imm: u64, rs: Reg) -> SmallInstVec { - let mut insts = Inst::load_constant_u64(rd, imm, &mut |_| rd); + todo!() + /* let mut insts = Inst::load_constant_u64(rd, imm, &mut |_| rd); insts.push(Inst::AluRRR { alu_op: AluOPRRR::Sub, rd, rs1: rd.to_reg(), rs2: rs, }); - insts + insts */ } /// Load int mask. /// If ty is int then 0xff in rd. pub(crate) fn load_int_mask(rd: Writable, ty: Type) -> SmallInstVec { - let mut insts = SmallInstVec::new(); + todo!() + /* let mut insts = SmallInstVec::new(); assert!(ty.is_int() && ty.bits() <= 64); match ty { I64 => { @@ -213,7 +221,7 @@ impl Inst { } _ => unreachable!("ty:{:?}", ty), } - insts + insts */ } /// inverse all bit pub(crate) fn construct_bit_not(rd: Writable, rs: Reg) -> Inst { @@ -261,7 +269,8 @@ impl Inst { taken: BranchTarget, not_taken: BranchTarget, ) -> SmallInstVec { - let mut insts = SmallInstVec::new(); + todo!() + /* let mut insts = SmallInstVec::new(); let class_op = if ty == F32 { FpuOPRR::FclassS } else { @@ -288,7 +297,7 @@ impl Inst { rs2: zero_reg(), }, }); - insts + insts */ } pub(crate) fn emit_fneg(rd: Writable, rs: Reg, ty: Type) -> Inst { Inst::FpuRRR { @@ -312,7 +321,8 @@ impl Inst { not_taken: BranchTarget, ty: Type, ) -> SmallInstVec { - let mut insts = SmallInstVec::new(); + todo!() + /* let mut insts = SmallInstVec::new(); if ty.bits() <= 64 { let rs1 = a.only_reg().unwrap(); let rs2 = b.only_reg().unwrap(); @@ -399,7 +409,7 @@ impl Inst { }); } } - insts + insts */ } /// Returns Some(VState) if this insturction is expecting a specific vector state @@ -497,7 +507,8 @@ impl MachInstEmit for Inst { emit_info: &Self::Info, state: &mut EmitState, ) { - let mut allocs = AllocationConsumer::new(allocs); + todo!() + /* let mut allocs = AllocationConsumer::new(allocs); // Check if we need to update the vector state before emitting this instruction if let Some(expected) = self.expected_vstate() { @@ -3060,7 +3071,7 @@ impl MachInstEmit for Inst { self, end_off - start_off, Inst::worst_case_size() - ); + ); */ } fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut Self::State) -> String { @@ -3087,7 +3098,8 @@ fn emit_return_call_common_sequence( old_stack_arg_size: u32, uses: &CallArgList, ) { - for u in uses { + todo!() + /* for u in uses { let _ = allocs.next(u.vreg); } @@ -3222,5 +3234,5 @@ fn emit_return_call_common_sequence( "return_call[_ind] adjusts virtual sp offset by {} -> {}", new_stack_arg_size, state.virtual_sp_offset - ); + ); */ } From c721b1e67e3a5681c3bf6e4daba000db754c368e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Gaspard?= Date: Thu, 24 Aug 2023 12:33:28 +0200 Subject: [PATCH 05/68] split todo for emit further --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 249 +++++++++---------- 1 file changed, 124 insertions(+), 125 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index ffc8ffc54f71..294a639c83e6 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -507,8 +507,7 @@ impl MachInstEmit for Inst { emit_info: &Self::Info, state: &mut EmitState, ) { - todo!() - /* let mut allocs = AllocationConsumer::new(allocs); + let mut allocs = AllocationConsumer::new(allocs); // Check if we need to update the vector state before emitting this instruction if let Some(expected) = self.expected_vstate() { @@ -534,13 +533,13 @@ impl MachInstEmit for Inst { } // Addi x0, x0, 0 &Inst::Nop4 => { - let x = Inst::AluRRImm12 { + todo!() /* let x = Inst::AluRRImm12 { alu_op: AluOPRRI::Addi, rd: Writable::from_reg(zero_reg()), rs: zero_reg(), imm12: Imm12::zero(), }; - x.emit(&[], sink, emit_info, state) + x.emit(&[], sink, emit_info, state) */ } &Inst::RawData { ref data } => { // Right now we only put a u32 or u64 in this instruction. @@ -549,26 +548,26 @@ impl MachInstEmit for Inst { // use to load some data and rely on some positon in the code stream. // and we may exceed `Inst::worst_case_size`. // for more information see https://github.com/bytecodealliance/wasmtime/pull/5612. - sink.put_data(&data[..]); + todo!() // sink.put_data(&data[..]); } &Inst::Lui { rd, ref imm } => { - let rd = allocs.next_writable(rd); + todo!() /* let rd = allocs.next_writable(rd); let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.as_u32() << 12); - sink.put4(x); + sink.put4(x); */ } &Inst::LoadConst32 { rd, imm } => { - let rd = allocs.next_writable(rd); + todo!() /* let rd = allocs.next_writable(rd); LoadConstant::U32(imm) .load_constant(rd, &mut |_| rd) .into_iter() - .for_each(|inst| inst.emit(&[], sink, emit_info, state)); + .for_each(|inst| inst.emit(&[], sink, emit_info, state)); */ } &Inst::LoadConst64 { rd, imm } => { - let rd = allocs.next_writable(rd); + todo!() /* let rd = allocs.next_writable(rd); LoadConstant::U64(imm) .load_constant(rd, &mut |_| rd) .into_iter() - .for_each(|inst| inst.emit(&[], sink, emit_info, state)); + .for_each(|inst| inst.emit(&[], sink, emit_info, state)); */ } &Inst::FpuRR { frm, @@ -576,7 +575,7 @@ impl MachInstEmit for Inst { rd, rs, } => { - let rs = allocs.next(rs); + todo!() /* let rs = allocs.next(rs); let rd = allocs.next_writable(rd); let x = alu_op.op_code() | reg_to_gpr_num(rd.to_reg()) << 7 @@ -588,7 +587,7 @@ impl MachInstEmit for Inst { if !srcloc.is_default() && alu_op.is_convert_to_int() { sink.add_trap(TrapCode::BadConversionToInteger); } - sink.put4(x); + sink.put4(x); */ } &Inst::FpuRRRR { alu_op, @@ -598,7 +597,7 @@ impl MachInstEmit for Inst { rs3, frm, } => { - let rs1 = allocs.next(rs1); + todo!() /* let rs1 = allocs.next(rs1); let rs2 = allocs.next(rs2); let rs3 = allocs.next(rs3); let rd = allocs.next_writable(rd); @@ -610,7 +609,7 @@ impl MachInstEmit for Inst { | alu_op.funct2() << 25 | reg_to_gpr_num(rs3) << 27; - sink.put4(x); + sink.put4(x); */ } &Inst::FpuRRR { alu_op, @@ -619,7 +618,7 @@ impl MachInstEmit for Inst { rs1, rs2, } => { - let rs1 = allocs.next(rs1); + todo!() /* let rs1 = allocs.next(rs1); let rs2 = allocs.next(rs2); let rd = allocs.next_writable(rd); @@ -629,13 +628,13 @@ impl MachInstEmit for Inst { | reg_to_gpr_num(rs1) << 15 | reg_to_gpr_num(rs2) << 20 | alu_op.funct7() << 25; - sink.put4(x); + sink.put4(x); */ } &Inst::Unwind { ref inst } => { - sink.add_unwind(inst.clone()); + todo!() // sink.add_unwind(inst.clone()); } &Inst::DummyUse { reg } => { - allocs.next(reg); + todo!() // allocs.next(reg); } &Inst::AluRRR { alu_op, @@ -643,7 +642,7 @@ impl MachInstEmit for Inst { rs1, rs2, } => { - let rs1 = allocs.next(rs1); + todo!() /* let rs1 = allocs.next(rs1); let rs2 = allocs.next(rs2); let rd = allocs.next_writable(rd); let (rs1, rs2) = if alu_op.reverse_rs() { @@ -659,7 +658,7 @@ impl MachInstEmit for Inst { rs1, rs2, alu_op.funct7(), - )); + )); */ } &Inst::AluRRImm12 { alu_op, @@ -667,14 +666,14 @@ impl MachInstEmit for Inst { rs, imm12, } => { - let rs = allocs.next(rs); + todo!() /* let rs = allocs.next(rs); let rd = allocs.next_writable(rd); let x = alu_op.op_code() | reg_to_gpr_num(rd.to_reg()) << 7 | alu_op.funct3() << 12 | reg_to_gpr_num(rs) << 15 | alu_op.imm12(imm12) << 20; - sink.put4(x); + sink.put4(x); */ } &Inst::Load { rd, @@ -682,7 +681,7 @@ impl MachInstEmit for Inst { from, flags, } => { - let from = from.clone().with_allocs(&mut allocs); + todo!() /* let from = from.clone().with_allocs(&mut allocs); let rd = allocs.next_writable(rd); let base = from.get_base_register(); @@ -706,10 +705,10 @@ impl MachInstEmit for Inst { sink.add_trap(TrapCode::HeapOutOfBounds); } - sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12)); + sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12)); */ } &Inst::Store { op, src, flags, to } => { - let to = to.clone().with_allocs(&mut allocs); + todo!() /* let to = to.clone().with_allocs(&mut allocs); let src = allocs.next(src); let base = to.get_base_register(); @@ -733,7 +732,7 @@ impl MachInstEmit for Inst { sink.add_trap(TrapCode::HeapOutOfBounds); } - sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12)); + sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12)); */ } &Inst::Args { .. } => { // Nothing: this is a pseudoinstruction that serves @@ -742,7 +741,7 @@ impl MachInstEmit for Inst { &Inst::Ret { stack_bytes_to_pop, .. } => { - if stack_bytes_to_pop != 0 { + todo!() /* if stack_bytes_to_pop != 0 { Inst::AdjustSp { amount: i64::from(stack_bytes_to_pop), } @@ -750,7 +749,7 @@ impl MachInstEmit for Inst { } //jalr x0, x1, 0 let x: u32 = (0b1100111) | (1 << 15); - sink.put4(x); + sink.put4(x); */ } &Inst::Extend { @@ -760,7 +759,7 @@ impl MachInstEmit for Inst { from_bits, to_bits: _to_bits, } => { - let rn = allocs.next(rn); + todo!() /* let rn = allocs.next(rn); let rd = allocs.next_writable(rd); let mut insts = SmallInstVec::new(); let shift_bits = (64 - from_bits) as i16; @@ -793,10 +792,10 @@ impl MachInstEmit for Inst { } insts .into_iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); + .for_each(|i| i.emit(&[], sink, emit_info, state)); */ } &Inst::AdjustSp { amount } => { - if let Some(imm) = Imm12::maybe_from_u64(amount as u64) { + todo!() /* if let Some(imm) = Imm12::maybe_from_u64(amount as u64) { Inst::AluRRImm12 { alu_op: AluOPRRI::Addi, rd: writable_stack_reg(), @@ -816,11 +815,11 @@ impl MachInstEmit for Inst { insts .into_iter() .for_each(|i| i.emit(&[], sink, emit_info, state)); - } + } */ } &Inst::Call { ref info } => { // call - match info.dest { + todo!() /* match info.dest { ExternalName::User { .. } => { if info.opcode.is_call() { sink.add_call_site(info.opcode); @@ -870,10 +869,10 @@ impl MachInstEmit for Inst { trace!( "call adjusts virtual sp offset by {callee_pop_size} -> {}", state.virtual_sp_offset - ); + ); */ } &Inst::CallInd { ref info } => { - let rn = allocs.next(info.rn); + todo!() /* let rn = allocs.next(info.rn); if let Some(s) = state.take_stack_map() { sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); } @@ -893,14 +892,14 @@ impl MachInstEmit for Inst { trace!( "call adjusts virtual sp offset by {callee_pop_size} -> {}", state.virtual_sp_offset - ); + ); */ } &Inst::ReturnCall { ref callee, ref info, } => { - emit_return_call_common_sequence( + todo!() /* emit_return_call_common_sequence( &mut allocs, sink, emit_info, @@ -919,11 +918,11 @@ impl MachInstEmit for Inst { // `emit_return_call_common_sequence` emits an island if // necessary, so we can safely disable the worst-case-size check // in this case. - start_off = sink.cur_offset(); + start_off = sink.cur_offset(); */ } &Inst::ReturnCallInd { callee, ref info } => { - let callee = allocs.next(callee); + todo!() /* let callee = allocs.next(callee); emit_return_call_common_sequence( &mut allocs, @@ -945,11 +944,11 @@ impl MachInstEmit for Inst { // `emit_return_call_common_sequence` emits an island if // necessary, so we can safely disable the worst-case-size check // in this case. - start_off = sink.cur_offset(); + start_off = sink.cur_offset(); */ } &Inst::Jal { dest } => { - let code: u32 = 0b1101111; + todo!() /* let code: u32 = 0b1101111; match dest { BranchTarget::Label(lable) => { sink.use_label_at_offset(start_off, lable, LabelUse::Jal20); @@ -976,14 +975,14 @@ impl MachInstEmit for Inst { // CondBr often generate Jal {dest : 0}, means otherwise no jump. } } - } + } */ } &Inst::CondBr { taken, not_taken, mut kind, } => { - kind.rs1 = allocs.next(kind.rs1); + todo!() /* kind.rs1 = allocs.next(kind.rs1); kind.rs2 = allocs.next(kind.rs2); match taken { BranchTarget::Label(label) => { @@ -1015,11 +1014,11 @@ impl MachInstEmit for Inst { } } } - Inst::Jal { dest: not_taken }.emit(&[], sink, emit_info, state); + Inst::Jal { dest: not_taken }.emit(&[], sink, emit_info, state); */ } &Inst::Mov { rd, rm, ty } => { - debug_assert_eq!(rd.to_reg().class(), rm.class()); + todo!() /* debug_assert_eq!(rd.to_reg().class(), rm.class()); if rd.to_reg() == rm { return; } @@ -1056,11 +1055,11 @@ impl MachInstEmit for Inst { vstate: VState::from_type(ty), }, } - .emit(&[], sink, emit_info, state); + .emit(&[], sink, emit_info, state); */ } &Inst::MovFromPReg { rd, rm } => { - debug_assert!([px_reg(2), px_reg(8)].contains(&rm)); + todo!() /* debug_assert!([px_reg(2), px_reg(8)].contains(&rm)); let rd = allocs.next_writable(rd); let x = Inst::AluRRImm12 { alu_op: AluOPRRI::Ori, @@ -1068,7 +1067,7 @@ impl MachInstEmit for Inst { rs: Reg::from(rm), imm12: Imm12::zero(), }; - x.emit(&[], sink, emit_info, state); + x.emit(&[], sink, emit_info, state); */ } &Inst::BrTable { @@ -1077,7 +1076,7 @@ impl MachInstEmit for Inst { tmp2, ref targets, } => { - let index = allocs.next(index); + todo!() /* let index = allocs.next(index); let tmp1 = allocs.next_writable(tmp1); let tmp2 = allocs.next_writable(tmp2); let ext_index = writable_spilltmp_reg(); @@ -1218,16 +1217,16 @@ impl MachInstEmit for Inst { // We've just emitted an island that is safe up to *here*. // Mark it as such so that we don't needlessly emit additional islands. - start_off = sink.cur_offset(); + start_off = sink.cur_offset(); */ } &Inst::VirtualSPOffsetAdj { amount } => { - crate::trace!( + todo!() /* crate::trace!( "virtual sp offset adjusted by {} -> {}", amount, state.virtual_sp_offset + amount ); - state.virtual_sp_offset += amount; + state.virtual_sp_offset += amount; */ } &Inst::Atomic { op, @@ -1236,7 +1235,7 @@ impl MachInstEmit for Inst { src, amo, } => { - let addr = allocs.next(addr); + todo!() /* let addr = allocs.next(addr); let src = allocs.next(src); let rd = allocs.next_writable(rd); let srcloc = state.cur_srcloc(); @@ -1250,27 +1249,27 @@ impl MachInstEmit for Inst { | reg_to_gpr_num(src) << 20 | op.funct7(amo) << 25; - sink.put4(x); + sink.put4(x); */ } &Inst::Fence { pred, succ } => { - let x = 0b0001111 + todo!() /* let x = 0b0001111 | 0b00000 << 7 | 0b000 << 12 | 0b00000 << 15 | (succ as u32) << 20 | (pred as u32) << 24; - sink.put4(x); + sink.put4(x); */ } - &Inst::FenceI => sink.put4(0x0000100f), + &Inst::FenceI => todo!(), // sink.put4(0x0000100f), &Inst::Auipc { rd, imm } => { - let rd = allocs.next_writable(rd); + todo!() /* let rd = allocs.next_writable(rd); let x = enc_auipc(rd, imm); - sink.put4(x); + sink.put4(x); */ } &Inst::LoadAddr { rd, mem } => { - let mem = mem.with_allocs(&mut allocs); + todo!() /* let mem = mem.with_allocs(&mut allocs); let rd = allocs.next_writable(rd); let base = mem.get_base_register(); @@ -1325,7 +1324,7 @@ impl MachInstEmit for Inst { (amode, _, _) => { unimplemented!("LoadAddr: {:?}", amode); } - } + } */ } &Inst::Select { @@ -1335,7 +1334,7 @@ impl MachInstEmit for Inst { ref y, ty: _ty, } => { - let condition = allocs.next(condition); + todo!() /* let condition = allocs.next(condition); let x = alloc_value_regs(x, &mut allocs); let y = alloc_value_regs(y, &mut allocs); let dst: Vec<_> = dst @@ -1372,18 +1371,18 @@ impl MachInstEmit for Inst { insts .into_iter() .for_each(|i| i.emit(&[], sink, emit_info, state)); - sink.bind_label(label_jump_over, &mut state.ctrl_plane); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ } &Inst::Jalr { rd, base, offset } => { - let rd = allocs.next_writable(rd); + todo!() /* let rd = allocs.next_writable(rd); let x = enc_jalr(rd, base, offset); - sink.put4(x); + sink.put4(x); */ } &Inst::ECall => { - sink.put4(0x00000073); + todo!() // sink.put4(0x00000073); } &Inst::EBreak => { - sink.put4(0x00100073); + todo!() // sink.put4(0x00100073); } &Inst::Icmp { cc, @@ -1392,7 +1391,7 @@ impl MachInstEmit for Inst { ref b, ty, } => { - let a = alloc_value_regs(a, &mut allocs); + todo!() /* let a = alloc_value_regs(a, &mut allocs); let b = alloc_value_regs(b, &mut allocs); let rd = allocs.next_writable(rd); let label_true = sink.get_label(); @@ -1415,7 +1414,7 @@ impl MachInstEmit for Inst { } .emit(&[], sink, emit_info, state); sink.bind_label(label_false, &mut state.ctrl_plane); - Inst::load_imm12(rd, Imm12::FALSE).emit(&[], sink, emit_info, state); + Inst::load_imm12(rd, Imm12::FALSE).emit(&[], sink, emit_info, state); */ } &Inst::AtomicCas { offset, @@ -1426,7 +1425,7 @@ impl MachInstEmit for Inst { v, ty, } => { - let offset = allocs.next(offset); + todo!() /* let offset = allocs.next(offset); let e = allocs.next(e); let addr = allocs.next(addr); let v = allocs.next(v); @@ -1515,7 +1514,7 @@ impl MachInstEmit for Inst { }, } .emit(&[], sink, emit_info, state); - sink.bind_label(fail_label, &mut state.ctrl_plane); + sink.bind_label(fail_label, &mut state.ctrl_plane); */ } &Inst::AtomicRmwLoop { offset, @@ -1526,7 +1525,7 @@ impl MachInstEmit for Inst { x, t0, } => { - let offset = allocs.next(offset); + todo!() /* let offset = allocs.next(offset); let p = allocs.next(p); let x = allocs.next(x); let t0 = allocs.next_writable(t0); @@ -1726,7 +1725,7 @@ impl MachInstEmit for Inst { rs2: zero_reg(), }, } - .emit(&[], sink, emit_info, state); + .emit(&[], sink, emit_info, state); */ } &Inst::IntSelect { @@ -1736,7 +1735,7 @@ impl MachInstEmit for Inst { ref y, ty, } => { - let x = alloc_value_regs(x, &mut allocs); + todo!() /* let x = alloc_value_regs(x, &mut allocs); let y = alloc_value_regs(y, &mut allocs); let dst: Vec<_> = dst.iter().map(|r| allocs.next_writable(*r)).collect(); let label_true = sink.get_label(); @@ -1781,7 +1780,7 @@ impl MachInstEmit for Inst { // here is false use y sink.bind_label(label_false, &mut state.ctrl_plane); gen_move(&dst, &y, sink, state); - sink.bind_label(label_done, &mut state.ctrl_plane); + sink.bind_label(label_done, &mut state.ctrl_plane); */ } &Inst::SelectReg { @@ -1790,7 +1789,7 @@ impl MachInstEmit for Inst { rs1, rs2, } => { - let mut condition = condition.clone(); + todo!() /* let mut condition = condition.clone(); condition.rs1 = allocs.next(condition.rs1); condition.rs2 = allocs.next(condition.rs2); let rs1 = allocs.next(rs1); @@ -1813,7 +1812,7 @@ impl MachInstEmit for Inst { // here condition is true , use rs1 sink.bind_label(label_true, &mut state.ctrl_plane); Inst::gen_move(rd, rs1, ty).emit(&[], sink, emit_info, state); - sink.bind_label(label_jump_over, &mut state.ctrl_plane); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ } &Inst::FcvtToInt { is_sat, @@ -1824,7 +1823,7 @@ impl MachInstEmit for Inst { out_type, tmp, } => { - let rs = allocs.next(rs); + todo!() /* let rs = allocs.next(rs); let tmp = allocs.next_writable(tmp); let rd = allocs.next_writable(rd); let label_nan = sink.get_label(); @@ -1984,7 +1983,7 @@ impl MachInstEmit for Inst { .emit(&[], sink, emit_info, state); } // bind jump_over - sink.bind_label(label_jump_over, &mut state.ctrl_plane); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ } &Inst::LoadExtName { @@ -1992,7 +1991,7 @@ impl MachInstEmit for Inst { ref name, offset, } => { - let rd = allocs.next_writable(rd); + todo!() /* let rd = allocs.next_writable(rd); // get the current pc. Inst::Auipc { rd: rd, @@ -2019,7 +2018,7 @@ impl MachInstEmit for Inst { .emit(&[], sink, emit_info, state); sink.add_reloc(Reloc::Abs8, name.as_ref(), offset); - sink.put8(0); + sink.put8(0); */ } &Inst::TrapIfC { rs1, @@ -2027,7 +2026,7 @@ impl MachInstEmit for Inst { cc, trap_code, } => { - let rs1 = allocs.next(rs1); + todo!() /* let rs1 = allocs.next(rs1); let rs2 = allocs.next(rs2); let label_trap = sink.get_label(); let label_jump_over = sink.get_label(); @@ -2040,10 +2039,10 @@ impl MachInstEmit for Inst { // trap sink.bind_label(label_trap, &mut state.ctrl_plane); Inst::Udf { trap_code }.emit(&[], sink, emit_info, state); - sink.bind_label(label_jump_over, &mut state.ctrl_plane); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ } &Inst::TrapIf { test, trap_code } => { - let test = allocs.next(test); + todo!() /* let test = allocs.next(test); let label_trap = sink.get_label(); let label_jump_over = sink.get_label(); Inst::CondBr { @@ -2062,17 +2061,17 @@ impl MachInstEmit for Inst { trap_code: trap_code, } .emit(&[], sink, emit_info, state); - sink.bind_label(label_jump_over, &mut state.ctrl_plane); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ } &Inst::Udf { trap_code } => { - sink.add_trap(trap_code); + todo!() /* sink.add_trap(trap_code); if let Some(s) = state.take_stack_map() { sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); } - sink.put_data(Inst::TRAP_OPCODE); + sink.put_data(Inst::TRAP_OPCODE); */ } &Inst::AtomicLoad { rd, ty, p } => { - let p = allocs.next(p); + todo!() /* let p = allocs.next(p); let rd = allocs.next_writable(rd); // emit the fence. Inst::Fence { @@ -2092,10 +2091,10 @@ impl MachInstEmit for Inst { pred: Inst::FENCE_REQ_R, succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, } - .emit(&[], sink, emit_info, state); + .emit(&[], sink, emit_info, state); */ } &Inst::AtomicStore { src, ty, p } => { - let src = allocs.next(src); + todo!() /* let src = allocs.next(src); let p = allocs.next(p); Inst::Fence { pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, @@ -2108,7 +2107,7 @@ impl MachInstEmit for Inst { flags: MemFlags::new(), src, } - .emit(&[], sink, emit_info, state); + .emit(&[], sink, emit_info, state); */ } &Inst::FloatRound { op, @@ -2118,7 +2117,7 @@ impl MachInstEmit for Inst { rs, ty, } => { - // this code is port from glibc ceil floor ... implementation. + todo!() /* // this code is port from glibc ceil floor ... implementation. let rs = allocs.next(rs); let int_tmp = allocs.next_writable(int_tmp); let f_tmp = allocs.next_writable(f_tmp); @@ -2250,7 +2249,7 @@ impl MachInstEmit for Inst { // here select origin x. sink.bind_label(label_x, &mut state.ctrl_plane); Inst::gen_move(rd, rs, ty).emit(&[], sink, emit_info, state); - sink.bind_label(label_jump_over, &mut state.ctrl_plane); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ } &Inst::FloatSelect { @@ -2261,7 +2260,7 @@ impl MachInstEmit for Inst { rs2, ty, } => { - let rs1 = allocs.next(rs1); + todo!() /* let rs1 = allocs.next(rs1); let rs2 = allocs.next(rs2); let tmp = allocs.next_writable(tmp); let rd = allocs.next_writable(rd); @@ -2378,7 +2377,7 @@ impl MachInstEmit for Inst { rs: tmp.to_reg(), } .emit(&[], sink, emit_info, state); - sink.bind_label(label_jump_over, &mut state.ctrl_plane); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ } &Inst::Popcnt { sum, @@ -2387,7 +2386,7 @@ impl MachInstEmit for Inst { rs, ty, } => { - let rs = allocs.next(rs); + todo!() /* let rs = allocs.next(rs); let tmp = allocs.next_writable(tmp); let step = allocs.next_writable(step); let sum = allocs.next_writable(sum); @@ -2472,10 +2471,10 @@ impl MachInstEmit for Inst { } .emit(&[], sink, emit_info, state); } - sink.bind_label(label_done, &mut state.ctrl_plane); + sink.bind_label(label_done, &mut state.ctrl_plane); */ } &Inst::Rev8 { rs, rd, tmp, step } => { - let rs = allocs.next(rs); + todo!() /* let rs = allocs.next(rs); let tmp = allocs.next_writable(tmp); let step = allocs.next_writable(step); let rd = allocs.next_writable(rd); @@ -2542,7 +2541,7 @@ impl MachInstEmit for Inst { } } .emit(&[], sink, emit_info, state); - sink.bind_label(label_done, &mut state.ctrl_plane); + sink.bind_label(label_done, &mut state.ctrl_plane); */ } &Inst::Cltz { sum, @@ -2552,7 +2551,7 @@ impl MachInstEmit for Inst { leading, ty, } => { - let rs = allocs.next(rs); + todo!() /* let rs = allocs.next(rs); let tmp = allocs.next_writable(tmp); let step = allocs.next_writable(step); let sum = allocs.next_writable(sum); @@ -2641,7 +2640,7 @@ impl MachInstEmit for Inst { } .emit(&[], sink, emit_info, state); } - sink.bind_label(label_done, &mut state.ctrl_plane); + sink.bind_label(label_done, &mut state.ctrl_plane); */ } &Inst::Brev8 { rs, @@ -2651,7 +2650,7 @@ impl MachInstEmit for Inst { tmp2, rd, } => { - let rs = allocs.next(rs); + todo!() /* let rs = allocs.next(rs); let step = allocs.next_writable(step); let tmp = allocs.next_writable(tmp); let tmp2 = allocs.next_writable(tmp2); @@ -2794,14 +2793,14 @@ impl MachInstEmit for Inst { } .emit(&[], sink, emit_info, state); } - sink.bind_label(label_done, &mut state.ctrl_plane); + sink.bind_label(label_done, &mut state.ctrl_plane); */ } &Inst::StackProbeLoop { guard_size, probe_count, tmp: guard_size_tmp, } => { - let step = writable_spilltmp_reg(); + todo!() /* let step = writable_spilltmp_reg(); Inst::load_constant_u64( step, (guard_size as u64) * (probe_count as u64), @@ -2853,7 +2852,7 @@ impl MachInstEmit for Inst { dest: BranchTarget::Label(loop_start), } .emit(&[], sink, emit_info, state); - sink.bind_label(label_done, &mut state.ctrl_plane); + sink.bind_label(label_done, &mut state.ctrl_plane); */ } &Inst::VecAluRRRImm5 { op, @@ -2864,14 +2863,14 @@ impl MachInstEmit for Inst { ref mask, .. } => { - let vs2 = allocs.next(vs2); + todo!() /* let vs2 = allocs.next(vs2); let vd_src = allocs.next(vd_src); let vd = allocs.next_writable(vd); let mask = mask.with_allocs(&mut allocs); debug_assert_eq!(vd.to_reg(), vd_src); - sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, mask)); + sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, mask)); */ } &Inst::VecAluRRRR { op, @@ -2882,7 +2881,7 @@ impl MachInstEmit for Inst { ref mask, .. } => { - let vs1 = allocs.next(vs1); + todo!() /* let vs1 = allocs.next(vs1); let vs2 = allocs.next(vs2); let vd_src = allocs.next(vd_src); let vd = allocs.next_writable(vd); @@ -2890,7 +2889,7 @@ impl MachInstEmit for Inst { debug_assert_eq!(vd.to_reg(), vd_src); - sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, mask)); + sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, mask)); */ } &Inst::VecAluRRR { op, @@ -2900,12 +2899,12 @@ impl MachInstEmit for Inst { ref mask, .. } => { - let vs1 = allocs.next(vs1); + todo!() /* let vs1 = allocs.next(vs1); let vs2 = allocs.next(vs2); let vd = allocs.next_writable(vd); let mask = mask.with_allocs(&mut allocs); - sink.put4(encode_valu(op, vd, vs1, vs2, mask)); + sink.put4(encode_valu(op, vd, vs1, vs2, mask)); */ } &Inst::VecAluRRImm5 { op, @@ -2915,11 +2914,11 @@ impl MachInstEmit for Inst { ref mask, .. } => { - let vs2 = allocs.next(vs2); + todo!() /* let vs2 = allocs.next(vs2); let vd = allocs.next_writable(vd); let mask = mask.with_allocs(&mut allocs); - sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, mask)); + sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, mask)); */ } &Inst::VecAluRR { op, @@ -2928,11 +2927,11 @@ impl MachInstEmit for Inst { ref mask, .. } => { - let vs = allocs.next(vs); + todo!() /* let vs = allocs.next(vs); let vd = allocs.next_writable(vd); let mask = mask.with_allocs(&mut allocs); - sink.put4(encode_valu_rr(op, vd, vs, mask)); + sink.put4(encode_valu_rr(op, vd, vs, mask)); */ } &Inst::VecAluRImm5 { op, @@ -2941,13 +2940,13 @@ impl MachInstEmit for Inst { ref mask, .. } => { - let vd = allocs.next_writable(vd); + todo!() /* let vd = allocs.next_writable(vd); let mask = mask.with_allocs(&mut allocs); - sink.put4(encode_valu_r_imm(op, vd, imm, mask)); + sink.put4(encode_valu_r_imm(op, vd, imm, mask)); */ } &Inst::VecSetState { rd, ref vstate } => { - let rd = allocs.next_writable(rd); + todo!() /* let rd = allocs.next_writable(rd); sink.put4(encode_vcfg_imm( 0x57, @@ -2957,7 +2956,7 @@ impl MachInstEmit for Inst { )); // Update the current vector emit state. - state.vstate = EmitVState::Known(vstate.clone()); + state.vstate = EmitVState::Known(vstate.clone()); */ } &Inst::VecLoad { @@ -2968,7 +2967,7 @@ impl MachInstEmit for Inst { flags, .. } => { - let from = from.clone().with_allocs(&mut allocs); + todo!() /* let from = from.clone().with_allocs(&mut allocs); let to = allocs.next_writable(to); let mask = mask.with_allocs(&mut allocs); @@ -3009,7 +3008,7 @@ impl MachInstEmit for Inst { mask, from.mop(), from.nf(), - )); + )); */ } &Inst::VecStore { @@ -3020,7 +3019,7 @@ impl MachInstEmit for Inst { flags, .. } => { - let to = to.clone().with_allocs(&mut allocs); + todo!() /* let to = to.clone().with_allocs(&mut allocs); let from = allocs.next(from); let mask = mask.with_allocs(&mut allocs); @@ -3061,7 +3060,7 @@ impl MachInstEmit for Inst { mask, to.mop(), to.nf(), - )); + )); */ } }; let end_off = sink.cur_offset(); @@ -3071,7 +3070,7 @@ impl MachInstEmit for Inst { self, end_off - start_off, Inst::worst_case_size() - ); */ + ); } fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut Self::State) -> String { From 07338e985b6240f6e732f4b0a644e405d567d64a Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Thu, 24 Aug 2023 12:48:04 +0200 Subject: [PATCH 06/68] Use fixed registers for AluRRR --- cranelift/codegen/src/isa/zkasm/inst/mod.rs | 4 ++-- cranelift/codegen/src/isa/zkasm/inst/regs.rs | 12 +++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/mod.rs b/cranelift/codegen/src/isa/zkasm/inst/mod.rs index 597d9d1cf22c..e9709c7ed2dc 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/mod.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/mod.rs @@ -391,8 +391,8 @@ fn zkasm_get_operands VReg>(inst: &Inst, collector: &mut OperandC &Inst::LoadConst32 { rd, .. } => collector.reg_def(rd), &Inst::LoadConst64 { rd, .. } => collector.reg_def(rd), &Inst::AluRRR { rd, rs1, rs2, .. } => { - collector.reg_use(rs1); - collector.reg_use(rs2); + collector.reg_fixed_use(rs1, a0()); + collector.reg_fixed_use(rs2, b0()); collector.reg_def(rd); } &Inst::FpuRRR { rd, rs1, rs2, .. } => { diff --git a/cranelift/codegen/src/isa/zkasm/inst/regs.rs b/cranelift/codegen/src/isa/zkasm/inst/regs.rs index 429625e22ede..2e348805f2b9 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/regs.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/regs.rs @@ -15,7 +15,17 @@ use regalloc2::{MachineEnv, PReg, RegClass}; // first argument of function call #[inline] pub fn a0() -> Reg { - x_reg(10) + x_reg(1) +} + +#[inline] +pub fn b0() -> Reg { + x_reg(2) +} + +#[inline] +pub fn c0() -> Reg { + x_reg(3) } // second argument of function call From ced97756dd2b8422ced7ee2b2ce6a01e1e5c5ccf Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Thu, 24 Aug 2023 12:59:50 +0200 Subject: [PATCH 07/68] Add prints to emit --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 21 +++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 294a639c83e6..11b8d211b703 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -631,7 +631,9 @@ impl MachInstEmit for Inst { sink.put4(x); */ } &Inst::Unwind { ref inst } => { - todo!() // sink.add_unwind(inst.clone()); + println!("Unwind"); + // todo!() + // sink.add_unwind(inst.clone()); } &Inst::DummyUse { reg } => { todo!() // allocs.next(reg); @@ -666,7 +668,10 @@ impl MachInstEmit for Inst { rs, imm12, } => { - todo!() /* let rs = allocs.next(rs); + println!("{rs:?} + {imm12} => {rd:?}: {alu_op:?}"); + + // todo!() + /* let rs = allocs.next(rs); let rd = allocs.next_writable(rd); let x = alu_op.op_code() | reg_to_gpr_num(rd.to_reg()) << 7 @@ -708,7 +713,9 @@ impl MachInstEmit for Inst { sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12)); */ } &Inst::Store { op, src, flags, to } => { - todo!() /* let to = to.clone().with_allocs(&mut allocs); + println!("{src:?} : MSTORE({to:?})"); + // todo!() + /* let to = to.clone().with_allocs(&mut allocs); let src = allocs.next(src); let base = to.get_base_register(); @@ -795,7 +802,9 @@ impl MachInstEmit for Inst { .for_each(|i| i.emit(&[], sink, emit_info, state)); */ } &Inst::AdjustSp { amount } => { - todo!() /* if let Some(imm) = Imm12::maybe_from_u64(amount as u64) { + println!("SP - {} => SP", amount); + // todo!() + /* if let Some(imm) = Imm12::maybe_from_u64(amount as u64) { Inst::AluRRImm12 { alu_op: AluOPRRI::Addi, rd: writable_stack_reg(), @@ -1018,7 +1027,9 @@ impl MachInstEmit for Inst { } &Inst::Mov { rd, rm, ty } => { - todo!() /* debug_assert_eq!(rd.to_reg().class(), rm.class()); + println!("{rm:?} => {rd:?}"); + // todo!(); + /* debug_assert_eq!(rd.to_reg().class(), rm.class()); if rd.to_reg() == rm { return; } From 932c52766f5939e77df7b6f333d9c78508528bcb Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Thu, 24 Aug 2023 16:56:07 +0300 Subject: [PATCH 08/68] add some call prints --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 11b8d211b703..3ab3c00597d3 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -668,9 +668,11 @@ impl MachInstEmit for Inst { rs, imm12, } => { - println!("{rs:?} + {imm12} => {rd:?}: {alu_op:?}"); + let rs = allocs.next(rs); + let rd = allocs.next_writable(rd); + println!("{rs:?} {imm12} => {rd:?}: {alu_op:?}"); - // todo!() + // todo!() /* let rs = allocs.next(rs); let rd = allocs.next_writable(rd); let x = alu_op.op_code() @@ -802,8 +804,8 @@ impl MachInstEmit for Inst { .for_each(|i| i.emit(&[], sink, emit_info, state)); */ } &Inst::AdjustSp { amount } => { - println!("SP - {} => SP", amount); - // todo!() + println!("SP {} => SP", amount); + // todo!() /* if let Some(imm) = Imm12::maybe_from_u64(amount as u64) { Inst::AluRRImm12 { alu_op: AluOPRRI::Addi, @@ -2002,7 +2004,9 @@ impl MachInstEmit for Inst { ref name, offset, } => { - todo!() /* let rd = allocs.next_writable(rd); + let rd = allocs.next_writable(rd); + println!("{rd:?} => CALL {name:?}"); + /* let rd = allocs.next_writable(rd); // get the current pc. Inst::Auipc { rd: rd, From 21f2a56dd0f0aa4cc2998c9ca57669ecee1fe10c Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Thu, 24 Aug 2023 16:02:49 +0200 Subject: [PATCH 09/68] Allow unused vars in some places --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 3ab3c00597d3..0a1f8cdd1572 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -36,6 +36,7 @@ pub(crate) enum LoadConstant { U64(u64), } +#[allow(unused)] impl LoadConstant { fn to_le_bytes(self) -> Vec { match self { @@ -182,6 +183,7 @@ impl MachInstEmitState for EmitState { } } +#[allow(unused)] impl Inst { /// construct a "imm - rs". pub(crate) fn construct_imm_sub_rs(rd: Writable, imm: u64, rs: Reg) -> SmallInstVec { @@ -496,6 +498,7 @@ impl Inst { } } +#[allow(unused)] impl MachInstEmit for Inst { type State = EmitState; type Info = EmitInfo; @@ -3103,6 +3106,7 @@ fn alloc_value_regs(orgin: &ValueRegs, alloc: &mut AllocationConsumer) -> V } } +#[allow(unused)] fn emit_return_call_common_sequence( allocs: &mut AllocationConsumer<'_>, sink: &mut MachBuffer, From 387da7c2f90411061232cc3b336eb19c868425e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Gaspard?= Date: Thu, 24 Aug 2023 16:06:56 +0200 Subject: [PATCH 10/68] add printlns for other things we need to implement --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 0a1f8cdd1572..8e55d74a337e 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -691,7 +691,9 @@ impl MachInstEmit for Inst { from, flags, } => { - todo!() /* let from = from.clone().with_allocs(&mut allocs); + println!("$ => {rd:?} ; LOAD op={op:?} from={from:?} flags={flags:?}"); + // todo!() + /* let from = from.clone().with_allocs(&mut allocs); let rd = allocs.next_writable(rd); let base = from.get_base_register(); @@ -753,7 +755,9 @@ impl MachInstEmit for Inst { &Inst::Ret { stack_bytes_to_pop, .. } => { - todo!() /* if stack_bytes_to_pop != 0 { + println!("ret"); + // todo!() + /* if stack_bytes_to_pop != 0 { Inst::AdjustSp { amount: i64::from(stack_bytes_to_pop), } @@ -886,7 +890,9 @@ impl MachInstEmit for Inst { ); */ } &Inst::CallInd { ref info } => { - todo!() /* let rn = allocs.next(info.rn); + println!("call {info:?}"); + // todo!() + /* let rn = allocs.next(info.rn); if let Some(s) = state.take_stack_map() { sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); } @@ -962,7 +968,9 @@ impl MachInstEmit for Inst { } &Inst::Jal { dest } => { - todo!() /* let code: u32 = 0b1101111; + println!("jal {dest:?}"); + // todo!() + /* let code: u32 = 0b1101111; match dest { BranchTarget::Label(lable) => { sink.use_label_at_offset(start_off, lable, LabelUse::Jal20); From 68c42d10b46dc64754da0d0f2b27e8cb5905dbab Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Thu, 24 Aug 2023 17:11:03 +0300 Subject: [PATCH 11/68] Add an AddImm32 MInst variant We can later generalize it to any similar instruction too. --- cranelift/codegen/src/isa/zkasm/inst.isle | 22 ++ cranelift/codegen/src/isa/zkasm/inst/emit.rs | 65 +++++ cranelift/codegen/src/isa/zkasm/inst/imms.rs | 30 +++ cranelift/codegen/src/isa/zkasm/inst/mod.rs | 135 ++++++++++ cranelift/codegen/src/isa/zkasm/lower.isle | 238 ++---------------- cranelift/codegen/src/isa/zkasm/lower/isle.rs | 5 + 6 files changed, 271 insertions(+), 224 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst.isle b/cranelift/codegen/src/isa/zkasm/inst.isle index d2f3933f3bd6..984d98e33d79 100644 --- a/cranelift/codegen/src/isa/zkasm/inst.isle +++ b/cranelift/codegen/src/isa/zkasm/inst.isle @@ -393,6 +393,13 @@ (flags MemFlags) (mask VecOpMasking) (vstate VState)) + + ;; An addition with 2 32-bit immediates. + (AddImm32 + (rd WritableReg) + (src1 Imm32) + (src2 Imm32)) + )) @@ -738,6 +745,7 @@ (type OptionImm12 (primitive OptionImm12)) (type OptionUimm5 (primitive OptionUimm5)) (type Imm12 (primitive Imm12)) +(type Imm32 (primitive Imm32)) (type UImm5 (primitive UImm5)) (type Imm5 (primitive Imm5)) (type Imm20 (primitive Imm20)) @@ -992,6 +1000,12 @@ ;; RV32I Base Integer Instruction Set +(decl zk_add (Imm32 Imm32) XReg) +(rule (zk_add imm1 imm2) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.AddImm32 dst imm1 imm2)))) + dst)) + ;; Helper for emitting the `add` instruction. ;; rd ← rs1 + rs2 (decl rv_add (XReg XReg) XReg) @@ -1561,9 +1575,17 @@ (imm12_from_value n) (def_inst (iconst (u64_from_imm64 (imm12_from_u64 n))))) +(decl imm32_from_value (Imm32) Value) +(extractor + (imm32_from_value n) + (def_inst (iconst (u64_from_imm64 (imm32_from_u64 n))))) + (decl imm12_from_u64 (Imm12) u64) (extern extractor imm12_from_u64 imm12_from_u64) +(decl imm32_from_u64 (Imm32) u64) +(extern extractor imm32_from_u64 imm32_from_u64) + (decl pure partial u64_to_imm12 (u64) Imm12) (rule (u64_to_imm12 (imm12_from_u64 n)) n) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 8e55d74a337e..49b23f371657 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -426,6 +426,7 @@ impl Inst { | Inst::LoadConst32 { .. } | Inst::LoadConst64 { .. } | Inst::AluRRR { .. } + | Inst::AddImm32 { .. } | Inst::FpuRRR { .. } | Inst::AluRRImm12 { .. } | Inst::Load { .. } @@ -641,6 +642,9 @@ impl MachInstEmit for Inst { &Inst::DummyUse { reg } => { todo!() // allocs.next(reg); } + &Inst::AddImm32 { rd, src1, src2 } => { + println!("{src1} + {src2} => {:?}", rd.to_reg()); + }, &Inst::AluRRR { alu_op, rd, @@ -3088,6 +3092,67 @@ impl MachInstEmit for Inst { to.nf(), )); */ } + Inst::Lui { rd, imm } => todo!(), + Inst::LoadConst32 { rd, imm } => todo!(), + Inst::LoadConst64 { rd, imm } => todo!(), + Inst::Auipc { rd, imm } => todo!(), + Inst::FpuRR { alu_op, frm, rd, rs } => todo!(), + Inst::AluRRR { alu_op, rd, rs1, rs2 } => todo!(), + Inst::FpuRRR { alu_op, frm, rd, rs1, rs2 } => todo!(), + Inst::FpuRRRR { alu_op, frm, rd, rs1, rs2, rs3 } => todo!(), + Inst::AluRRImm12 { alu_op, rd, rs, imm12 } => todo!(), + Inst::Load { rd, op, flags, from } => todo!(), + Inst::Store { to, op, flags, src } => todo!(), + Inst::Args { args } => todo!(), + Inst::Ret { rets, stack_bytes_to_pop } => todo!(), + Inst::Extend { rd, rn, signed, from_bits, to_bits } => todo!(), + Inst::AdjustSp { amount } => todo!(), + Inst::Call { info } => todo!(), + Inst::CallInd { info } => todo!(), + Inst::ReturnCall { callee, info } => todo!(), + Inst::ReturnCallInd { callee, info } => todo!(), + Inst::TrapIf { test, trap_code } => todo!(), + Inst::TrapIfC { rs1, rs2, cc, trap_code } => todo!(), + Inst::Jal { dest } => todo!(), + Inst::CondBr { taken, not_taken, kind } => todo!(), + Inst::LoadExtName { rd, name, offset } => todo!(), + Inst::LoadAddr { rd, mem } => todo!(), + Inst::VirtualSPOffsetAdj { amount } => todo!(), + Inst::Mov { rd, rm, ty } => todo!(), + Inst::MovFromPReg { rd, rm } => todo!(), + Inst::Fence { pred, succ } => todo!(), + Inst::Udf { trap_code } => todo!(), + Inst::Jalr { rd, base, offset } => todo!(), + Inst::Atomic { op, rd, addr, src, amo } => todo!(), + Inst::AtomicStore { src, ty, p } => todo!(), + Inst::AtomicLoad { rd, ty, p } => todo!(), + Inst::AtomicRmwLoop { offset, op, dst, ty, p, x, t0 } => todo!(), + Inst::Select { dst, ty, condition, x, y } => todo!(), + Inst::BrTable { index, tmp1, tmp2, targets } => todo!(), + Inst::AtomicCas { offset, t0, dst, e, addr, v, ty } => todo!(), + Inst::IntSelect { op, dst, x, y, ty } => todo!(), + Inst::Icmp { cc, rd, a, b, ty } => todo!(), + Inst::SelectReg { rd, rs1, rs2, condition } => todo!(), + Inst::FcvtToInt { is_sat, rd, tmp, rs, is_signed, in_type, out_type } => todo!(), + Inst::RawData { data } => todo!(), + Inst::Unwind { inst } => todo!(), + Inst::DummyUse { reg } => todo!(), + Inst::FloatRound { op, rd, int_tmp, f_tmp, rs, ty } => todo!(), + Inst::FloatSelect { op, rd, tmp, rs1, rs2, ty } => todo!(), + Inst::Popcnt { sum, step, tmp, rs, ty } => todo!(), + Inst::Cltz { leading, sum, step, tmp, rs, ty } => todo!(), + Inst::Rev8 { rs, step, tmp, rd } => todo!(), + Inst::Brev8 { rs, ty, step, tmp, tmp2, rd } => todo!(), + Inst::StackProbeLoop { guard_size, probe_count, tmp } => todo!(), + Inst::VecAluRRRR { op, vd, vd_src, vs2, vs1, mask, vstate } => todo!(), + Inst::VecAluRRRImm5 { op, vd, vd_src, vs2, imm, mask, vstate } => todo!(), + Inst::VecAluRRR { op, vd, vs2, vs1, mask, vstate } => todo!(), + Inst::VecAluRRImm5 { op, vd, vs2, imm, mask, vstate } => todo!(), + Inst::VecAluRR { op, vd, vs, mask, vstate } => todo!(), + Inst::VecAluRImm5 { op, vd, imm, mask, vstate } => todo!(), + Inst::VecSetState { rd, vstate } => todo!(), + Inst::VecLoad { eew, to, from, flags, mask, vstate } => todo!(), + Inst::VecStore { eew, to, from, flags, mask, vstate } => todo!(), }; let end_off = sink.cur_offset(); assert!( diff --git a/cranelift/codegen/src/isa/zkasm/inst/imms.rs b/cranelift/codegen/src/isa/zkasm/inst/imms.rs index 2f9b544b15ed..f04477e1dcd4 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/imms.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/imms.rs @@ -10,6 +10,11 @@ pub struct Imm12 { pub bits: i16, } +#[derive(Copy, Clone, Debug, Default)] +pub struct Imm32 { + pub bits: i32, +} + impl Imm12 { pub(crate) const FALSE: Self = Self { bits: 0 }; pub(crate) const TRUE: Self = Self { bits: 1 }; @@ -48,6 +53,25 @@ impl Imm12 { } } +impl Imm32 { + pub fn maybe_from_u64(val: u64) -> Option { + let sign_bit = 1 << 31; + if val == 0 { + Some(Imm32 { bits: 0 }) + } else if (val & sign_bit) != 0 && (val >> 31) == 0xffff_ffff { + Some(Imm32 { + bits: (val & 0xffff_ffff) as i32, + }) + } else if (val & sign_bit) == 0 && (val >> 32) == 0 { + Some(Imm32 { + bits: (val & 0xffff_ffff) as i32, + }) + } else { + None + } + } +} + impl Into for Imm12 { fn into(self) -> i64 { self.bits as i64 @@ -60,6 +84,12 @@ impl Display for Imm12 { } } +impl Display for Imm32 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{:+}", self.bits) + } +} + impl std::ops::Neg for Imm12 { type Output = Self; fn neg(self) -> Self::Output { diff --git a/cranelift/codegen/src/isa/zkasm/inst/mod.rs b/cranelift/codegen/src/isa/zkasm/inst/mod.rs index e9709c7ed2dc..cec7b583dbd5 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/mod.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/mod.rs @@ -3,6 +3,7 @@ // Some variants are not constructed, but we still want them as options in the future. #![allow(dead_code)] #![allow(non_camel_case_types)] +#![allow(warnings)] use super::lower::isle::generated_code::{VecAMode, VecElementWidth, VecOpMasking}; use crate::binemit::{Addend, CodeOffset, Reloc}; @@ -698,6 +699,11 @@ fn zkasm_get_operands VReg>(inst: &Inst, collector: &mut OperandC collector.reg_reuse_def(vd, 2); // `vd` == `vd_src`. vec_mask_late_operands(mask, collector); } + + Inst::AddImm32 { rd, src1, src2 } => { + collector.reg_def(*rd); + }, + &Inst::VecAluRRRImm5 { op, vd, @@ -829,6 +835,67 @@ fn zkasm_get_operands VReg>(inst: &Inst, collector: &mut OperandC collector.reg_use(from); vec_mask_operands(mask, collector); } + Inst::Lui { rd, imm } => todo!(), + Inst::LoadConst32 { rd, imm } => todo!(), + Inst::LoadConst64 { rd, imm } => todo!(), + Inst::Auipc { rd, imm } => todo!(), + Inst::FpuRR { alu_op, frm, rd, rs } => todo!(), + Inst::AluRRR { alu_op, rd, rs1, rs2 } => todo!(), + Inst::FpuRRR { alu_op, frm, rd, rs1, rs2 } => todo!(), + Inst::FpuRRRR { alu_op, frm, rd, rs1, rs2, rs3 } => todo!(), + Inst::AluRRImm12 { alu_op, rd, rs, imm12 } => todo!(), + Inst::Load { rd, op, flags, from } => todo!(), + Inst::Store { to, op, flags, src } => todo!(), + Inst::Args { args } => todo!(), + Inst::Ret { rets, stack_bytes_to_pop } => todo!(), + Inst::Extend { rd, rn, signed, from_bits, to_bits } => todo!(), + Inst::AdjustSp { amount } => todo!(), + Inst::Call { info } => todo!(), + Inst::CallInd { info } => todo!(), + Inst::ReturnCall { callee, info } => todo!(), + Inst::ReturnCallInd { callee, info } => todo!(), + Inst::TrapIf { test, trap_code } => todo!(), + Inst::TrapIfC { rs1, rs2, cc, trap_code } => todo!(), + Inst::Jal { dest } => todo!(), + Inst::CondBr { taken, not_taken, kind } => todo!(), + Inst::LoadExtName { rd, name, offset } => todo!(), + Inst::LoadAddr { rd, mem } => todo!(), + Inst::VirtualSPOffsetAdj { amount } => todo!(), + Inst::Mov { rd, rm, ty } => todo!(), + Inst::MovFromPReg { rd, rm } => todo!(), + Inst::Fence { pred, succ } => todo!(), + Inst::Udf { trap_code } => todo!(), + Inst::Jalr { rd, base, offset } => todo!(), + Inst::Atomic { op, rd, addr, src, amo } => todo!(), + Inst::AtomicStore { src, ty, p } => todo!(), + Inst::AtomicLoad { rd, ty, p } => todo!(), + Inst::AtomicRmwLoop { offset, op, dst, ty, p, x, t0 } => todo!(), + Inst::Select { dst, ty, condition, x, y } => todo!(), + Inst::BrTable { index, tmp1, tmp2, targets } => todo!(), + Inst::AtomicCas { offset, t0, dst, e, addr, v, ty } => todo!(), + Inst::IntSelect { op, dst, x, y, ty } => todo!(), + Inst::Icmp { cc, rd, a, b, ty } => todo!(), + Inst::SelectReg { rd, rs1, rs2, condition } => todo!(), + Inst::FcvtToInt { is_sat, rd, tmp, rs, is_signed, in_type, out_type } => todo!(), + Inst::RawData { data } => todo!(), + Inst::Unwind { inst } => todo!(), + Inst::DummyUse { reg } => todo!(), + Inst::FloatRound { op, rd, int_tmp, f_tmp, rs, ty } => todo!(), + Inst::FloatSelect { op, rd, tmp, rs1, rs2, ty } => todo!(), + Inst::Popcnt { sum, step, tmp, rs, ty } => todo!(), + Inst::Cltz { leading, sum, step, tmp, rs, ty } => todo!(), + Inst::Rev8 { rs, step, tmp, rd } => todo!(), + Inst::Brev8 { rs, ty, step, tmp, tmp2, rd } => todo!(), + Inst::StackProbeLoop { guard_size, probe_count, tmp } => todo!(), + Inst::VecAluRRRR { op, vd, vd_src, vs2, vs1, mask, vstate } => todo!(), + Inst::VecAluRRRImm5 { op, vd, vd_src, vs2, imm, mask, vstate } => todo!(), + Inst::VecAluRRR { op, vd, vs2, vs1, mask, vstate } => todo!(), + Inst::VecAluRRImm5 { op, vd, vs2, imm, mask, vstate } => todo!(), + Inst::VecAluRR { op, vd, vs, mask, vstate } => todo!(), + Inst::VecAluRImm5 { op, vd, imm, mask, vstate } => todo!(), + Inst::VecSetState { rd, vstate } => todo!(), + Inst::VecLoad { eew, to, from, flags, mask, vstate } => todo!(), + Inst::VecStore { eew, to, from, flags, mask, vstate } => todo!(), } } @@ -1418,6 +1485,12 @@ impl Inst { } } } + + Inst::AddImm32 { rd, src1, src2 } => { + let rd = format_reg(rd.to_reg(), allocs); + format!("{src1} + {src2} => {rd};") + }, + &Inst::FpuRR { frm, alu_op, @@ -1908,6 +1981,68 @@ impl Inst { format!("vs{eew}.v {vs3},{dst}{mask} {vstate}") } + Inst::Lui { rd, imm } => todo!(), + Inst::LoadConst32 { rd, imm } => todo!(), + Inst::LoadConst64 { rd, imm } => todo!(), + Inst::Auipc { rd, imm } => todo!(), + Inst::FpuRR { alu_op, frm, rd, rs } => todo!(), + Inst::AluRRR { alu_op, rd, rs1, rs2 } => todo!(), + Inst::FpuRRR { alu_op, frm, rd, rs1, rs2 } => todo!(), + Inst::FpuRRRR { alu_op, frm, rd, rs1, rs2, rs3 } => todo!(), + Inst::AluRRImm12 { alu_op, rd, rs, imm12 } => todo!(), + Inst::Load { rd, op, flags, from } => todo!(), + Inst::Store { to, op, flags, src } => todo!(), + Inst::Args { args } => todo!(), + Inst::Ret { rets, stack_bytes_to_pop } => todo!(), + Inst::Extend { rd, rn, signed, from_bits, to_bits } => todo!(), + Inst::AdjustSp { amount } => todo!(), + Inst::Call { info } => todo!(), + Inst::CallInd { info } => todo!(), + Inst::ReturnCall { callee, info } => todo!(), + Inst::ReturnCallInd { callee, info } => todo!(), + Inst::TrapIf { test, trap_code } => todo!(), + Inst::TrapIfC { rs1, rs2, cc, trap_code } => todo!(), + Inst::Jal { dest } => todo!(), + Inst::CondBr { taken, not_taken, kind } => todo!(), + Inst::LoadExtName { rd, name, offset } => todo!(), + Inst::LoadAddr { rd, mem } => todo!(), + Inst::VirtualSPOffsetAdj { amount } => todo!(), + Inst::Mov { rd, rm, ty } => todo!(), + Inst::MovFromPReg { rd, rm } => todo!(), + Inst::Fence { pred, succ } => todo!(), + Inst::FenceI => todo!(), + Inst::ECall => todo!(), + Inst::EBreak => todo!(), + Inst::Udf { trap_code } => todo!(), + Inst::Jalr { rd, base, offset } => todo!(), + Inst::Atomic { op, rd, addr, src, amo } => todo!(), + Inst::AtomicStore { src, ty, p } => todo!(), + Inst::AtomicLoad { rd, ty, p } => todo!(), + Inst::AtomicRmwLoop { offset, op, dst, ty, p, x, t0 } => todo!(), + Inst::Select { dst, ty, condition, x, y } => todo!(), + Inst::BrTable { index, tmp1, tmp2, targets } => todo!(), + Inst::AtomicCas { offset, t0, dst, e, addr, v, ty } => todo!(), + Inst::IntSelect { op, dst, x, y, ty } => todo!(), + Inst::Icmp { cc, rd, a, b, ty } => todo!(), + Inst::SelectReg { rd, rs1, rs2, condition } => todo!(), + Inst::FcvtToInt { is_sat, rd, tmp, rs, is_signed, in_type, out_type } => todo!(), + Inst::RawData { data } => todo!(), + Inst::Unwind { inst } => todo!(), + Inst::DummyUse { reg } => todo!(), + Inst::FloatRound { op, rd, int_tmp, f_tmp, rs, ty } => todo!(), + Inst::FloatSelect { op, rd, tmp, rs1, rs2, ty } => todo!(), + Inst::Popcnt { sum, step, tmp, rs, ty } => todo!(), + Inst::Cltz { leading, sum, step, tmp, rs, ty } => todo!(), + Inst::Rev8 { rs, step, tmp, rd } => todo!(), + Inst::Brev8 { rs, ty, step, tmp, tmp2, rd } => todo!(), + Inst::StackProbeLoop { guard_size, probe_count, tmp } => todo!(), + Inst::VecAluRRRR { op, vd, vd_src, vs2, vs1, mask, vstate } => todo!(), + Inst::VecAluRRRImm5 { op, vd, vd_src, vs2, imm, mask, vstate } => todo!(), + Inst::VecAluRRR { op, vd, vs2, vs1, mask, vstate } => todo!(), + Inst::VecAluRRImm5 { op, vd, vs2, imm, mask, vstate } => todo!(), + Inst::VecAluRR { op, vd, vs, mask, vstate } => todo!(), + Inst::VecAluRImm5 { op, vd, imm, mask, vstate } => todo!(), + Inst::VecSetState { rd, vstate } => todo!(), } } } diff --git a/cranelift/codegen/src/isa/zkasm/lower.isle b/cranelift/codegen/src/isa/zkasm/lower.isle index 9e466c624fd2..acbd9e90c96f 100644 --- a/cranelift/codegen/src/isa/zkasm/lower.isle +++ b/cranelift/codegen/src/isa/zkasm/lower.isle @@ -36,218 +36,8 @@ (rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x y))) (rv_add x y)) -;; Special cases for when one operand is an immediate that fits in 12 bits. -(rule 1 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x (imm12_from_value y)))) - (alu_rr_imm12 (select_addi ty) x y)) - -(rule 2 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd (imm12_from_value x) y))) - (alu_rr_imm12 (select_addi ty) y x)) - -;; Special case when one of the operands is uextended -;; Needs `Zba` -(rule 3 (lower (has_type $I64 (iadd x (uextend y @ (value_type $I32))))) - (if-let $true (has_zba)) - (rv_adduw y x)) - -(rule 4 (lower (has_type $I64 (iadd (uextend x @ (value_type $I32)) y))) - (if-let $true (has_zba)) - (rv_adduw x y)) - -;; Add with const shift. We have a few of these instructions with `Zba`. -(decl pure partial match_shnadd (Imm64) AluOPRRR) -(rule (match_shnadd (u64_from_imm64 1)) (AluOPRRR.Sh1add)) -(rule (match_shnadd (u64_from_imm64 2)) (AluOPRRR.Sh2add)) -(rule (match_shnadd (u64_from_imm64 3)) (AluOPRRR.Sh3add)) - -(rule 3 (lower (has_type $I64 (iadd x (ishl y (maybe_uextend (iconst n)))))) - (if-let $true (has_zba)) - (if-let shnadd (match_shnadd n)) - (alu_rrr shnadd y x)) - -(rule 4 (lower (has_type $I64 (iadd (ishl x (maybe_uextend (iconst n))) y))) - (if-let $true (has_zba)) - (if-let shnadd (match_shnadd n)) - (alu_rrr shnadd x y)) - - -;; Add with uextended const shift. We have a few of these instructions with `Zba`. -;; -;; !!! Important !!! -;; These rules only work for (ishl (uextend _) _) and not for (uextend (ishl _ _))! -;; Getting this wrong means a potential misscalculation of the shift amount. -;; Additionaly we can only ensure that this is correct if the uextend is 32 to 64 bits. -(decl pure partial match_shnadd_uw (Imm64) AluOPRRR) -(rule (match_shnadd_uw (u64_from_imm64 1)) (AluOPRRR.Sh1adduw)) -(rule (match_shnadd_uw (u64_from_imm64 2)) (AluOPRRR.Sh2adduw)) -(rule (match_shnadd_uw (u64_from_imm64 3)) (AluOPRRR.Sh3adduw)) - -(rule 5 (lower (has_type $I64 (iadd x (ishl (uextend y @ (value_type $I32)) (maybe_uextend (iconst n)))))) - (if-let $true (has_zba)) - (if-let shnadd_uw (match_shnadd_uw n)) - (alu_rrr shnadd_uw y x)) - -(rule 6 (lower (has_type $I64 (iadd (ishl (uextend x @ (value_type $I32)) (maybe_uextend (iconst n))) y))) - (if-let $true (has_zba)) - (if-let shnadd_uw (match_shnadd_uw n)) - (alu_rrr shnadd_uw x y)) - -;; I128 cases -(rule 7 (lower (has_type $I128 (iadd x y))) - (let ((low XReg (rv_add (value_regs_get x 0) (value_regs_get y 0))) - ;; compute carry. - (carry XReg (rv_sltu low (value_regs_get y 0))) - ;; - (high_tmp XReg (rv_add (value_regs_get x 1) (value_regs_get y 1))) - ;; add carry. - (high XReg (rv_add high_tmp carry))) - (value_regs low high))) - -;; SIMD Vectors -(rule 8 (lower (has_type (ty_vec_fits_in_register ty) (iadd x y))) - (rv_vadd_vv x y (unmasked) ty)) - -(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (splat y)))) - (rv_vadd_vx x y (unmasked) ty)) - -(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (splat (sextend y @ (value_type sext_ty)))))) - (if-let half_ty (ty_half_width ty)) - (if-let $true (ty_equal (lane_type half_ty) sext_ty)) - (rv_vwadd_wx x y (unmasked) (vstate_mf2 half_ty))) - -(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (splat (uextend y @ (value_type uext_ty)))))) - (if-let half_ty (ty_half_width ty)) - (if-let $true (ty_equal (lane_type half_ty) uext_ty)) - (rv_vwaddu_wx x y (unmasked) (vstate_mf2 half_ty))) - -(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (replicated_imm5 y)))) - (rv_vadd_vi x y (unmasked) ty)) - - -(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (iadd (splat x) y))) - (rv_vadd_vx y x (unmasked) ty)) - -(rule 13 (lower (has_type (ty_vec_fits_in_register ty) (iadd (splat (sextend x @ (value_type sext_ty))) y))) - (if-let half_ty (ty_half_width ty)) - (if-let $true (ty_equal (lane_type half_ty) sext_ty)) - (rv_vwadd_wx y x (unmasked) (vstate_mf2 half_ty))) - -(rule 13 (lower (has_type (ty_vec_fits_in_register ty) (iadd (splat (uextend x @ (value_type uext_ty))) y))) - (if-let half_ty (ty_half_width ty)) - (if-let $true (ty_equal (lane_type half_ty) uext_ty)) - (rv_vwaddu_wx y x (unmasked) (vstate_mf2 half_ty))) - -(rule 14 (lower (has_type (ty_vec_fits_in_register ty) (iadd (replicated_imm5 x) y))) - (rv_vadd_vi y x (unmasked) ty)) - -;; Signed Widening Low Additions - -(rule 9 (lower (has_type (ty_vec_fits_in_register _) (iadd x (swiden_low y @ (value_type in_ty))))) - (rv_vwadd_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 12 (lower (has_type (ty_vec_fits_in_register _) (iadd (swiden_low x @ (value_type in_ty)) y))) - (rv_vwadd_wv y x (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (swiden_low x @ (value_type in_ty)) - (swiden_low y)))) - (rv_vwadd_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (swiden_low x @ (value_type in_ty)) - (splat (sextend y @ (value_type sext_ty)))))) - (if-let $true (ty_equal (lane_type in_ty) sext_ty)) - (rv_vwadd_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 14 (lower (has_type (ty_vec_fits_in_register _) (iadd (splat (sextend x @ (value_type sext_ty))) - (swiden_low y @ (value_type in_ty))))) - (if-let $true (ty_equal (lane_type in_ty) sext_ty)) - (rv_vwadd_vx y x (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -;; Signed Widening High Additions -;; These are the same as the low additions, but we first slide down the inputs. - -(rule 9 (lower (has_type (ty_vec_fits_in_register _) (iadd x (swiden_high y @ (value_type in_ty))))) - (rv_vwadd_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 12 (lower (has_type (ty_vec_fits_in_register _) (iadd (swiden_high x @ (value_type in_ty)) y))) - (rv_vwadd_wv y (gen_slidedown_half in_ty x) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (swiden_high x @ (value_type in_ty)) - (swiden_high y)))) - (rv_vwadd_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (swiden_high x @ (value_type in_ty)) - (splat (sextend y @ (value_type sext_ty)))))) - (if-let $true (ty_equal (lane_type in_ty) sext_ty)) - (rv_vwadd_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 14 (lower (has_type (ty_vec_fits_in_register _) (iadd (splat (sextend x @ (value_type sext_ty))) - (swiden_high y @ (value_type in_ty))))) - (if-let $true (ty_equal (lane_type in_ty) sext_ty)) - (rv_vwadd_vx (gen_slidedown_half in_ty y) x (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -;; Unsigned Widening Low Additions - -(rule 9 (lower (has_type (ty_vec_fits_in_register _) (iadd x (uwiden_low y @ (value_type in_ty))))) - (rv_vwaddu_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 12 (lower (has_type (ty_vec_fits_in_register _) (iadd (uwiden_low x @ (value_type in_ty)) y))) - (rv_vwaddu_wv y x (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (uwiden_low x @ (value_type in_ty)) - (uwiden_low y)))) - (rv_vwaddu_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (uwiden_low x @ (value_type in_ty)) - (splat (uextend y @ (value_type uext_ty)))))) - (if-let $true (ty_equal (lane_type in_ty) uext_ty)) - (rv_vwaddu_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 14 (lower (has_type (ty_vec_fits_in_register _) (iadd (splat (uextend x @ (value_type uext_ty))) - (uwiden_low y @ (value_type in_ty))))) - (if-let $true (ty_equal (lane_type in_ty) uext_ty)) - (rv_vwaddu_vx y x (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -;; Unsigned Widening High Additions -;; These are the same as the low additions, but we first slide down the inputs. - -(rule 9 (lower (has_type (ty_vec_fits_in_register _) (iadd x (uwiden_high y @ (value_type in_ty))))) - (rv_vwaddu_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 12 (lower (has_type (ty_vec_fits_in_register _) (iadd (uwiden_high x @ (value_type in_ty)) y))) - (rv_vwaddu_wv y (gen_slidedown_half in_ty x) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (uwiden_high x @ (value_type in_ty)) - (uwiden_high y)))) - (rv_vwaddu_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (uwiden_high x @ (value_type in_ty)) - (splat (uextend y @ (value_type uext_ty)))))) - (if-let $true (ty_equal (lane_type in_ty) uext_ty)) - (rv_vwaddu_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 14 (lower (has_type (ty_vec_fits_in_register _) (iadd (splat (uextend y @ (value_type uext_ty))) - (uwiden_high x @ (value_type in_ty))))) - (if-let $true (ty_equal (lane_type in_ty) uext_ty)) - (rv_vwaddu_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -;; Signed Widening Mixed High/Low Additions - -(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (swiden_low x @ (value_type in_ty)) - (swiden_high y)))) - (rv_vwadd_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (swiden_high x @ (value_type in_ty)) - (swiden_low y)))) - (rv_vwadd_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -;; Unsigned Widening Mixed High/Low Additions - -(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (uwiden_low x @ (value_type in_ty)) - (uwiden_high y)))) - (rv_vwaddu_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_vec_fits_in_register _) (iadd (uwiden_high x @ (value_type in_ty)) - (uwiden_low y)))) - (rv_vwaddu_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) +(rule 1 (lower (iadd (imm32_from_value x) (imm32_from_value y))) + (zk_add x y)) ;; Fused Multiply Accumulate Rules `vmacc` ;; @@ -255,42 +45,42 @@ ;; register instead of the addition one. The actual pattern matched seems to be ;; exactly the same. -(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (imul y z)))) +(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (imul y z)))) (rv_vmacc_vv x y z (unmasked) ty)) -(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (imul y (splat z))))) +(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (imul y (splat z))))) (rv_vmacc_vx x y z (unmasked) ty)) -(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (imul (splat y) z)))) +(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (imul (splat y) z)))) (rv_vmacc_vx x z y (unmasked) ty)) -(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (iadd (imul x y) z))) +(rule 13 (lower (has_type (ty_vec_fits_in_register ty) (iadd (imul x y) z))) (rv_vmacc_vv z x y (unmasked) ty)) -(rule 13 (lower (has_type (ty_vec_fits_in_register ty) (iadd (imul x (splat y)) z))) +(rule 14 (lower (has_type (ty_vec_fits_in_register ty) (iadd (imul x (splat y)) z))) (rv_vmacc_vx z x y (unmasked) ty)) -(rule 14 (lower (has_type (ty_vec_fits_in_register ty) (iadd (imul (splat x) y) z))) +(rule 15 (lower (has_type (ty_vec_fits_in_register ty) (iadd (imul (splat x) y) z))) (rv_vmacc_vx z y x (unmasked) ty)) ;; Fused Multiply Subtract Rules `vnmsac` -(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (ineg (imul y z))))) +(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (ineg (imul y z))))) (rv_vnmsac_vv x y z (unmasked) ty)) -(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (ineg (imul y (splat z)))))) +(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (ineg (imul y (splat z)))))) (rv_vnmsac_vx x y z (unmasked) ty)) -(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (ineg (imul (splat y) z))))) +(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (ineg (imul (splat y) z))))) (rv_vnmsac_vx x z y (unmasked) ty)) -(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (iadd (ineg (imul x y)) z))) +(rule 13 (lower (has_type (ty_vec_fits_in_register ty) (iadd (ineg (imul x y)) z))) (rv_vnmsac_vv z x y (unmasked) ty)) -(rule 13 (lower (has_type (ty_vec_fits_in_register ty) (iadd (ineg (imul x (splat y))) z))) +(rule 14 (lower (has_type (ty_vec_fits_in_register ty) (iadd (ineg (imul x (splat y))) z))) (rv_vnmsac_vx z x y (unmasked) ty)) -(rule 14 (lower (has_type (ty_vec_fits_in_register ty) (iadd (ineg (imul (splat x) y)) z))) +(rule 15 (lower (has_type (ty_vec_fits_in_register ty) (iadd (ineg (imul (splat x) y)) z))) (rv_vnmsac_vx z y x (unmasked) ty)) ;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/zkasm/lower/isle.rs b/cranelift/codegen/src/isa/zkasm/lower/isle.rs index 28734ac30df4..a4b2b42defdc 100644 --- a/cranelift/codegen/src/isa/zkasm/lower/isle.rs +++ b/cranelift/codegen/src/isa/zkasm/lower/isle.rs @@ -306,6 +306,11 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> fn imm12_from_u64(&mut self, arg0: u64) -> Option { Imm12::maybe_from_u64(arg0) } + + #[inline] + fn imm32_from_u64(&mut self, arg0: u64) -> Option { + Imm32::maybe_from_u64(arg0) + } #[inline] fn imm5_from_u64(&mut self, arg0: u64) -> Option { Imm5::maybe_from_i8(i8::try_from(arg0 as i64).ok()?) From 2b02dcc53b8fda021c01a1c1694d24ece57aad51 Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Thu, 24 Aug 2023 17:20:53 +0300 Subject: [PATCH 12/68] remove rust-analyzer spew --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 61 --------- cranelift/codegen/src/isa/zkasm/inst/mod.rs | 123 ------------------- 2 files changed, 184 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 49b23f371657..baba1aaaa9c0 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -3092,67 +3092,6 @@ impl MachInstEmit for Inst { to.nf(), )); */ } - Inst::Lui { rd, imm } => todo!(), - Inst::LoadConst32 { rd, imm } => todo!(), - Inst::LoadConst64 { rd, imm } => todo!(), - Inst::Auipc { rd, imm } => todo!(), - Inst::FpuRR { alu_op, frm, rd, rs } => todo!(), - Inst::AluRRR { alu_op, rd, rs1, rs2 } => todo!(), - Inst::FpuRRR { alu_op, frm, rd, rs1, rs2 } => todo!(), - Inst::FpuRRRR { alu_op, frm, rd, rs1, rs2, rs3 } => todo!(), - Inst::AluRRImm12 { alu_op, rd, rs, imm12 } => todo!(), - Inst::Load { rd, op, flags, from } => todo!(), - Inst::Store { to, op, flags, src } => todo!(), - Inst::Args { args } => todo!(), - Inst::Ret { rets, stack_bytes_to_pop } => todo!(), - Inst::Extend { rd, rn, signed, from_bits, to_bits } => todo!(), - Inst::AdjustSp { amount } => todo!(), - Inst::Call { info } => todo!(), - Inst::CallInd { info } => todo!(), - Inst::ReturnCall { callee, info } => todo!(), - Inst::ReturnCallInd { callee, info } => todo!(), - Inst::TrapIf { test, trap_code } => todo!(), - Inst::TrapIfC { rs1, rs2, cc, trap_code } => todo!(), - Inst::Jal { dest } => todo!(), - Inst::CondBr { taken, not_taken, kind } => todo!(), - Inst::LoadExtName { rd, name, offset } => todo!(), - Inst::LoadAddr { rd, mem } => todo!(), - Inst::VirtualSPOffsetAdj { amount } => todo!(), - Inst::Mov { rd, rm, ty } => todo!(), - Inst::MovFromPReg { rd, rm } => todo!(), - Inst::Fence { pred, succ } => todo!(), - Inst::Udf { trap_code } => todo!(), - Inst::Jalr { rd, base, offset } => todo!(), - Inst::Atomic { op, rd, addr, src, amo } => todo!(), - Inst::AtomicStore { src, ty, p } => todo!(), - Inst::AtomicLoad { rd, ty, p } => todo!(), - Inst::AtomicRmwLoop { offset, op, dst, ty, p, x, t0 } => todo!(), - Inst::Select { dst, ty, condition, x, y } => todo!(), - Inst::BrTable { index, tmp1, tmp2, targets } => todo!(), - Inst::AtomicCas { offset, t0, dst, e, addr, v, ty } => todo!(), - Inst::IntSelect { op, dst, x, y, ty } => todo!(), - Inst::Icmp { cc, rd, a, b, ty } => todo!(), - Inst::SelectReg { rd, rs1, rs2, condition } => todo!(), - Inst::FcvtToInt { is_sat, rd, tmp, rs, is_signed, in_type, out_type } => todo!(), - Inst::RawData { data } => todo!(), - Inst::Unwind { inst } => todo!(), - Inst::DummyUse { reg } => todo!(), - Inst::FloatRound { op, rd, int_tmp, f_tmp, rs, ty } => todo!(), - Inst::FloatSelect { op, rd, tmp, rs1, rs2, ty } => todo!(), - Inst::Popcnt { sum, step, tmp, rs, ty } => todo!(), - Inst::Cltz { leading, sum, step, tmp, rs, ty } => todo!(), - Inst::Rev8 { rs, step, tmp, rd } => todo!(), - Inst::Brev8 { rs, ty, step, tmp, tmp2, rd } => todo!(), - Inst::StackProbeLoop { guard_size, probe_count, tmp } => todo!(), - Inst::VecAluRRRR { op, vd, vd_src, vs2, vs1, mask, vstate } => todo!(), - Inst::VecAluRRRImm5 { op, vd, vd_src, vs2, imm, mask, vstate } => todo!(), - Inst::VecAluRRR { op, vd, vs2, vs1, mask, vstate } => todo!(), - Inst::VecAluRRImm5 { op, vd, vs2, imm, mask, vstate } => todo!(), - Inst::VecAluRR { op, vd, vs, mask, vstate } => todo!(), - Inst::VecAluRImm5 { op, vd, imm, mask, vstate } => todo!(), - Inst::VecSetState { rd, vstate } => todo!(), - Inst::VecLoad { eew, to, from, flags, mask, vstate } => todo!(), - Inst::VecStore { eew, to, from, flags, mask, vstate } => todo!(), }; let end_off = sink.cur_offset(); assert!( diff --git a/cranelift/codegen/src/isa/zkasm/inst/mod.rs b/cranelift/codegen/src/isa/zkasm/inst/mod.rs index cec7b583dbd5..0c5d2310b8fc 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/mod.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/mod.rs @@ -835,67 +835,6 @@ fn zkasm_get_operands VReg>(inst: &Inst, collector: &mut OperandC collector.reg_use(from); vec_mask_operands(mask, collector); } - Inst::Lui { rd, imm } => todo!(), - Inst::LoadConst32 { rd, imm } => todo!(), - Inst::LoadConst64 { rd, imm } => todo!(), - Inst::Auipc { rd, imm } => todo!(), - Inst::FpuRR { alu_op, frm, rd, rs } => todo!(), - Inst::AluRRR { alu_op, rd, rs1, rs2 } => todo!(), - Inst::FpuRRR { alu_op, frm, rd, rs1, rs2 } => todo!(), - Inst::FpuRRRR { alu_op, frm, rd, rs1, rs2, rs3 } => todo!(), - Inst::AluRRImm12 { alu_op, rd, rs, imm12 } => todo!(), - Inst::Load { rd, op, flags, from } => todo!(), - Inst::Store { to, op, flags, src } => todo!(), - Inst::Args { args } => todo!(), - Inst::Ret { rets, stack_bytes_to_pop } => todo!(), - Inst::Extend { rd, rn, signed, from_bits, to_bits } => todo!(), - Inst::AdjustSp { amount } => todo!(), - Inst::Call { info } => todo!(), - Inst::CallInd { info } => todo!(), - Inst::ReturnCall { callee, info } => todo!(), - Inst::ReturnCallInd { callee, info } => todo!(), - Inst::TrapIf { test, trap_code } => todo!(), - Inst::TrapIfC { rs1, rs2, cc, trap_code } => todo!(), - Inst::Jal { dest } => todo!(), - Inst::CondBr { taken, not_taken, kind } => todo!(), - Inst::LoadExtName { rd, name, offset } => todo!(), - Inst::LoadAddr { rd, mem } => todo!(), - Inst::VirtualSPOffsetAdj { amount } => todo!(), - Inst::Mov { rd, rm, ty } => todo!(), - Inst::MovFromPReg { rd, rm } => todo!(), - Inst::Fence { pred, succ } => todo!(), - Inst::Udf { trap_code } => todo!(), - Inst::Jalr { rd, base, offset } => todo!(), - Inst::Atomic { op, rd, addr, src, amo } => todo!(), - Inst::AtomicStore { src, ty, p } => todo!(), - Inst::AtomicLoad { rd, ty, p } => todo!(), - Inst::AtomicRmwLoop { offset, op, dst, ty, p, x, t0 } => todo!(), - Inst::Select { dst, ty, condition, x, y } => todo!(), - Inst::BrTable { index, tmp1, tmp2, targets } => todo!(), - Inst::AtomicCas { offset, t0, dst, e, addr, v, ty } => todo!(), - Inst::IntSelect { op, dst, x, y, ty } => todo!(), - Inst::Icmp { cc, rd, a, b, ty } => todo!(), - Inst::SelectReg { rd, rs1, rs2, condition } => todo!(), - Inst::FcvtToInt { is_sat, rd, tmp, rs, is_signed, in_type, out_type } => todo!(), - Inst::RawData { data } => todo!(), - Inst::Unwind { inst } => todo!(), - Inst::DummyUse { reg } => todo!(), - Inst::FloatRound { op, rd, int_tmp, f_tmp, rs, ty } => todo!(), - Inst::FloatSelect { op, rd, tmp, rs1, rs2, ty } => todo!(), - Inst::Popcnt { sum, step, tmp, rs, ty } => todo!(), - Inst::Cltz { leading, sum, step, tmp, rs, ty } => todo!(), - Inst::Rev8 { rs, step, tmp, rd } => todo!(), - Inst::Brev8 { rs, ty, step, tmp, tmp2, rd } => todo!(), - Inst::StackProbeLoop { guard_size, probe_count, tmp } => todo!(), - Inst::VecAluRRRR { op, vd, vd_src, vs2, vs1, mask, vstate } => todo!(), - Inst::VecAluRRRImm5 { op, vd, vd_src, vs2, imm, mask, vstate } => todo!(), - Inst::VecAluRRR { op, vd, vs2, vs1, mask, vstate } => todo!(), - Inst::VecAluRRImm5 { op, vd, vs2, imm, mask, vstate } => todo!(), - Inst::VecAluRR { op, vd, vs, mask, vstate } => todo!(), - Inst::VecAluRImm5 { op, vd, imm, mask, vstate } => todo!(), - Inst::VecSetState { rd, vstate } => todo!(), - Inst::VecLoad { eew, to, from, flags, mask, vstate } => todo!(), - Inst::VecStore { eew, to, from, flags, mask, vstate } => todo!(), } } @@ -1981,68 +1920,6 @@ impl Inst { format!("vs{eew}.v {vs3},{dst}{mask} {vstate}") } - Inst::Lui { rd, imm } => todo!(), - Inst::LoadConst32 { rd, imm } => todo!(), - Inst::LoadConst64 { rd, imm } => todo!(), - Inst::Auipc { rd, imm } => todo!(), - Inst::FpuRR { alu_op, frm, rd, rs } => todo!(), - Inst::AluRRR { alu_op, rd, rs1, rs2 } => todo!(), - Inst::FpuRRR { alu_op, frm, rd, rs1, rs2 } => todo!(), - Inst::FpuRRRR { alu_op, frm, rd, rs1, rs2, rs3 } => todo!(), - Inst::AluRRImm12 { alu_op, rd, rs, imm12 } => todo!(), - Inst::Load { rd, op, flags, from } => todo!(), - Inst::Store { to, op, flags, src } => todo!(), - Inst::Args { args } => todo!(), - Inst::Ret { rets, stack_bytes_to_pop } => todo!(), - Inst::Extend { rd, rn, signed, from_bits, to_bits } => todo!(), - Inst::AdjustSp { amount } => todo!(), - Inst::Call { info } => todo!(), - Inst::CallInd { info } => todo!(), - Inst::ReturnCall { callee, info } => todo!(), - Inst::ReturnCallInd { callee, info } => todo!(), - Inst::TrapIf { test, trap_code } => todo!(), - Inst::TrapIfC { rs1, rs2, cc, trap_code } => todo!(), - Inst::Jal { dest } => todo!(), - Inst::CondBr { taken, not_taken, kind } => todo!(), - Inst::LoadExtName { rd, name, offset } => todo!(), - Inst::LoadAddr { rd, mem } => todo!(), - Inst::VirtualSPOffsetAdj { amount } => todo!(), - Inst::Mov { rd, rm, ty } => todo!(), - Inst::MovFromPReg { rd, rm } => todo!(), - Inst::Fence { pred, succ } => todo!(), - Inst::FenceI => todo!(), - Inst::ECall => todo!(), - Inst::EBreak => todo!(), - Inst::Udf { trap_code } => todo!(), - Inst::Jalr { rd, base, offset } => todo!(), - Inst::Atomic { op, rd, addr, src, amo } => todo!(), - Inst::AtomicStore { src, ty, p } => todo!(), - Inst::AtomicLoad { rd, ty, p } => todo!(), - Inst::AtomicRmwLoop { offset, op, dst, ty, p, x, t0 } => todo!(), - Inst::Select { dst, ty, condition, x, y } => todo!(), - Inst::BrTable { index, tmp1, tmp2, targets } => todo!(), - Inst::AtomicCas { offset, t0, dst, e, addr, v, ty } => todo!(), - Inst::IntSelect { op, dst, x, y, ty } => todo!(), - Inst::Icmp { cc, rd, a, b, ty } => todo!(), - Inst::SelectReg { rd, rs1, rs2, condition } => todo!(), - Inst::FcvtToInt { is_sat, rd, tmp, rs, is_signed, in_type, out_type } => todo!(), - Inst::RawData { data } => todo!(), - Inst::Unwind { inst } => todo!(), - Inst::DummyUse { reg } => todo!(), - Inst::FloatRound { op, rd, int_tmp, f_tmp, rs, ty } => todo!(), - Inst::FloatSelect { op, rd, tmp, rs1, rs2, ty } => todo!(), - Inst::Popcnt { sum, step, tmp, rs, ty } => todo!(), - Inst::Cltz { leading, sum, step, tmp, rs, ty } => todo!(), - Inst::Rev8 { rs, step, tmp, rd } => todo!(), - Inst::Brev8 { rs, ty, step, tmp, tmp2, rd } => todo!(), - Inst::StackProbeLoop { guard_size, probe_count, tmp } => todo!(), - Inst::VecAluRRRR { op, vd, vd_src, vs2, vs1, mask, vstate } => todo!(), - Inst::VecAluRRRImm5 { op, vd, vd_src, vs2, imm, mask, vstate } => todo!(), - Inst::VecAluRRR { op, vd, vs2, vs1, mask, vstate } => todo!(), - Inst::VecAluRRImm5 { op, vd, vs2, imm, mask, vstate } => todo!(), - Inst::VecAluRR { op, vd, vs, mask, vstate } => todo!(), - Inst::VecAluRImm5 { op, vd, imm, mask, vstate } => todo!(), - Inst::VecSetState { rd, vstate } => todo!(), } } } From cc40f6827d0107302c50bd1134bec7b6e8c1bff9 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Thu, 24 Aug 2023 16:40:13 +0200 Subject: [PATCH 13/68] Update the set of registers --- cranelift/codegen/src/isa/zkasm/inst/regs.rs | 46 ++++++++++---------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/regs.rs b/cranelift/codegen/src/isa/zkasm/inst/regs.rs index 2e348805f2b9..417189fd202f 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/regs.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/regs.rs @@ -12,7 +12,6 @@ use alloc::vec::Vec; use regalloc2::VReg; use regalloc2::{MachineEnv, PReg, RegClass}; -// first argument of function call #[inline] pub fn a0() -> Reg { x_reg(1) @@ -23,11 +22,6 @@ pub fn b0() -> Reg { x_reg(2) } -#[inline] -pub fn c0() -> Reg { - x_reg(3) -} - // second argument of function call #[inline] pub fn a1() -> Reg { @@ -147,33 +141,37 @@ pub fn writable_spilltmp_reg2() -> Writable { pub fn crate_reg_eviroment(_flags: &settings::Flags) -> MachineEnv { let preferred_regs_by_class: [Vec; 3] = { - let x_registers: Vec = (5..=7) - .chain(10..=17) - .chain(28..=29) + // Registers are A, B, C, D, E. + // TODO(akashin): Remove redundant registers. + let x_registers: Vec = (1..=15) .map(|i| PReg::new(i, RegClass::Int)) .collect(); - let f_registers: Vec = (0..=7) - .chain(10..=17) - .chain(28..=31) - .map(|i| PReg::new(i, RegClass::Float)) - .collect(); + let f_registers: Vec = Vec::new(); + // (0..=7) + // .chain(10..=17) + // .chain(28..=31) + // .map(|i| PReg::new(i, RegClass::Float)) + // .collect(); - let v_registers: Vec = (0..=31).map(|i| PReg::new(i, RegClass::Vector)).collect(); + let v_registers: Vec = Vec::new(); + // (0..=31).map(|i| PReg::new(i, RegClass::Vector)).collect(); [x_registers, f_registers, v_registers] }; let non_preferred_regs_by_class: [Vec; 3] = { - let x_registers: Vec = (9..=9) - .chain(18..=27) - .map(|i| PReg::new(i, RegClass::Int)) - .collect(); - - let f_registers: Vec = (8..=9) - .chain(18..=27) - .map(|i| PReg::new(i, RegClass::Float)) - .collect(); + let x_registers: Vec = Vec::new(); + // (9..=9) + // .chain(18..=27) + // .map(|i| PReg::new(i, RegClass::Int)) + // .collect(); + + let f_registers: Vec = Vec::new(); + // (8..=9) + // .chain(18..=27) + // .map(|i| PReg::new(i, RegClass::Float)) + // .collect(); let v_registers = vec![]; From a5e74b0f69de2d95d43718178c9832dd1bf82c4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Gaspard?= Date: Thu, 24 Aug 2023 16:50:38 +0200 Subject: [PATCH 14/68] implement jump-to-label --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index baba1aaaa9c0..5fea5cdcac9c 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -6,6 +6,7 @@ use crate::isa::zkasm::inst::*; use crate::machinst::{AllocationConsumer, Reg, Writable}; use crate::trace; use cranelift_control::ControlPlane; +use cranelift_entity::EntityRef; use regalloc2::Allocation; pub struct EmitInfo { @@ -973,16 +974,16 @@ impl MachInstEmit for Inst { &Inst::Jal { dest } => { println!("jal {dest:?}"); - // todo!() - /* let code: u32 = 0b1101111; match dest { - BranchTarget::Label(lable) => { - sink.use_label_at_offset(start_off, lable, LabelUse::Jal20); - sink.add_uncond_branch(start_off, start_off + 4, lable); - sink.put4(code); + BranchTarget::Label(label) => { + // TODO: the following two lines allow eg. optimizing out jump-to-here + /* sink.use_label_at_offset(start_off, label, LabelUse::Jal20); + sink.add_uncond_branch(start_off, start_off + 4, label); */ + sink.put_data(format!(":JMP(L{})", label.index()).as_bytes()); } BranchTarget::ResolvedOffset(offset) => { - let offset = offset as i64; + todo!() + /* let offset = offset as i64; if offset != 0 { if LabelUse::Jal20.offset_in_range(offset) { let mut code = code.to_le_bytes(); @@ -999,9 +1000,9 @@ impl MachInstEmit for Inst { } } else { // CondBr often generate Jal {dest : 0}, means otherwise no jump. - } + } */ } - } */ + } } &Inst::CondBr { taken, From 938219356c23877309ef835f85b24ac3dd728cbd Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Thu, 24 Aug 2023 18:02:11 +0300 Subject: [PATCH 15/68] Use allocated physical registers where appropriate... --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 5fea5cdcac9c..cd67f66af6dd 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -644,7 +644,8 @@ impl MachInstEmit for Inst { todo!() // allocs.next(reg); } &Inst::AddImm32 { rd, src1, src2 } => { - println!("{src1} + {src2} => {:?}", rd.to_reg()); + let rd = allocs.next(rd.to_reg()); + println!("{src1} + {src2} => {:?}", rd); }, &Inst::AluRRR { alu_op, @@ -725,10 +726,11 @@ impl MachInstEmit for Inst { sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12)); */ } &Inst::Store { op, src, flags, to } => { + let src = allocs.next(src); + println!("{src:?} : MSTORE({to:?})"); // todo!() /* let to = to.clone().with_allocs(&mut allocs); - let src = allocs.next(src); let base = to.get_base_register(); let offset = to.get_offset_with_state(state); From aea57ad44a3a479bef93e37d9e26d1b3614fef89 Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Thu, 24 Aug 2023 18:02:41 +0300 Subject: [PATCH 16/68] This target does not support unwinding --- cranelift/codegen/src/isa/zkasm/abi.rs | 27 --------------------- cranelift/codegen/src/isa/zkasm/inst/mod.rs | 2 +- 2 files changed, 1 insertion(+), 28 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/abi.rs b/cranelift/codegen/src/isa/zkasm/abi.rs index b410c4fceb6f..5b573813cc67 100644 --- a/cranelift/codegen/src/isa/zkasm/abi.rs +++ b/cranelift/codegen/src/isa/zkasm/abi.rs @@ -15,7 +15,6 @@ use crate::ir::types::I8; use crate::ir::LibCall; use crate::ir::Signature; use crate::isa::zkasm::settings::Flags as RiscvFlags; -use crate::isa::unwind::UnwindInst; use crate::settings; use crate::CodegenError; use crate::CodegenResult; @@ -359,13 +358,6 @@ impl ABIMachineSpec for Riscv64MachineDeps { fp_reg(), I64, )); - if flags.unwind_info() { - insts.push(Inst::Unwind { - inst: UnwindInst::PushFrameRegs { - offset_upward_to_caller_sp: 16, // FP, LR - }, - }); - } insts.push(Inst::Mov { rd: writable_fp_reg(), rm: stack_reg(), @@ -427,20 +419,9 @@ impl ABIMachineSpec for Riscv64MachineDeps { // Adjust the stack pointer downward for clobbers and the function fixed // frame (spillslots and storage slots). let stack_size = fixed_frame_storage_size + clobbered_size; - if flags.unwind_info() && setup_frame { - // The *unwind* frame (but not the actual frame) starts at the - // clobbers, just below the saved FP/LR pair. - insts.push(Inst::Unwind { - inst: UnwindInst::DefineNewFrame { - offset_downward_to_clobbers: clobbered_size, - offset_upward_to_caller_sp: 16, // FP, LR - }, - }); - } // Store each clobbered register in order at offsets from SP, // placing them above the fixed frame slots. if stack_size > 0 { - // since we use fp, we didn't need use UnwindInst::StackAlloc. let mut cur_offset = 8; for reg in clobbered_callee_saves { let r_reg = reg.to_reg(); @@ -449,14 +430,6 @@ impl ABIMachineSpec for Riscv64MachineDeps { RegClass::Float => F64, RegClass::Vector => unimplemented!("Vector Clobber Saves"), }; - if flags.unwind_info() { - insts.push(Inst::Unwind { - inst: UnwindInst::SaveReg { - clobber_offset: clobbered_size - cur_offset, - reg: r_reg, - }, - }); - } insts.push(Self::gen_store_stack( StackAMode::SPOffset(-(cur_offset as i64), ty), real_reg_to_reg(reg.to_reg()), diff --git a/cranelift/codegen/src/isa/zkasm/inst/mod.rs b/cranelift/codegen/src/isa/zkasm/inst/mod.rs index 0c5d2310b8fc..9aadec03e034 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/mod.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/mod.rs @@ -1213,7 +1213,7 @@ impl Inst { } }, &Inst::Unwind { ref inst } => { - format!("unwind {:?}", inst) + todo!() } &Inst::Brev8 { rs, From e60253679b9db38ec1caf44dd3d1050e847ccbb5 Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Thu, 24 Aug 2023 18:07:13 +0300 Subject: [PATCH 17/68] MSTORE: make it more general --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index cd67f66af6dd..6b2fe35f1dcf 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -727,8 +727,14 @@ impl MachInstEmit for Inst { } &Inst::Store { op, src, flags, to } => { let src = allocs.next(src); - - println!("{src:?} : MSTORE({to:?})"); + let stack_offset = to.get_offset_with_state(state); + if let Some(base_register) = to.get_base_register() { + println!("{src:?} : MSTORE({base_register:?} + {stack_offset})"); + sink.put_data(b"{src:?} : MSTORE({base_register:?} + {stack_offset})\n"); + } else { + println!("{src:?} : MSTORE({stack_offset})"); + sink.put_data(b"{src:?} : MSTORE({stack_offset})\n"); + } // todo!() /* let to = to.clone().with_allocs(&mut allocs); From 12977bab6d3c8304332fb939a567fc68be35d088 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Thu, 24 Aug 2023 17:24:04 +0200 Subject: [PATCH 18/68] Migrate from println to sink --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 56 ++++++++++---------- cranelift/codegen/src/isa/zkasm/inst/mod.rs | 2 +- cranelift/src/wasm.rs | 3 ++ 3 files changed, 33 insertions(+), 28 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 6b2fe35f1dcf..1ebc09e12e3c 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -500,6 +500,10 @@ impl Inst { } } +fn put_string(s: &str, sink: &mut MachBuffer) { + sink.put_data(s.as_bytes()); +} + #[allow(unused)] impl MachInstEmit for Inst { type State = EmitState; @@ -636,16 +640,15 @@ impl MachInstEmit for Inst { sink.put4(x); */ } &Inst::Unwind { ref inst } => { - println!("Unwind"); - // todo!() - // sink.add_unwind(inst.clone()); + put_string(&format!("Unwind\n"), sink); + // sink.add_unwind(inst.clone()); } &Inst::DummyUse { reg } => { todo!() // allocs.next(reg); } &Inst::AddImm32 { rd, src1, src2 } => { let rd = allocs.next(rd.to_reg()); - println!("{src1} + {src2} => {:?}", rd); + put_string(&format!("{src1} + {src2} => {:?}", rd), sink); }, &Inst::AluRRR { alu_op, @@ -679,9 +682,8 @@ impl MachInstEmit for Inst { } => { let rs = allocs.next(rs); let rd = allocs.next_writable(rd); - println!("{rs:?} {imm12} => {rd:?}: {alu_op:?}"); + put_string(&format!("{rs:?} {imm12} => {rd:?}: {alu_op:?}\n"), sink); - // todo!() /* let rs = allocs.next(rs); let rd = allocs.next_writable(rd); let x = alu_op.op_code() @@ -697,8 +699,7 @@ impl MachInstEmit for Inst { from, flags, } => { - println!("$ => {rd:?} ; LOAD op={op:?} from={from:?} flags={flags:?}"); - // todo!() + put_string(&format!("$ => {rd:?} ; LOAD op={op:?} from={from:?} flags={flags:?}\n"), sink); /* let from = from.clone().with_allocs(&mut allocs); let rd = allocs.next_writable(rd); @@ -729,14 +730,12 @@ impl MachInstEmit for Inst { let src = allocs.next(src); let stack_offset = to.get_offset_with_state(state); if let Some(base_register) = to.get_base_register() { - println!("{src:?} : MSTORE({base_register:?} + {stack_offset})"); - sink.put_data(b"{src:?} : MSTORE({base_register:?} + {stack_offset})\n"); + put_string(&format!("{src:?} : MSTORE({base_register:?} + {stack_offset})\n"), sink); } else { - println!("{src:?} : MSTORE({stack_offset})"); - sink.put_data(b"{src:?} : MSTORE({stack_offset})\n"); + put_string(&format!("{src:?} : MSTORE({stack_offset})\n"), sink); } - // todo!() - /* let to = to.clone().with_allocs(&mut allocs); + /* let to = to.clone().with_allocs(&mut allocs); + let src = allocs.next(src); let base = to.get_base_register(); let offset = to.get_offset_with_state(state); @@ -768,8 +767,7 @@ impl MachInstEmit for Inst { &Inst::Ret { stack_bytes_to_pop, .. } => { - println!("ret"); - // todo!() + put_string(&format!("RETURN\n"), sink); /* if stack_bytes_to_pop != 0 { Inst::AdjustSp { amount: i64::from(stack_bytes_to_pop), @@ -824,8 +822,13 @@ impl MachInstEmit for Inst { .for_each(|i| i.emit(&[], sink, emit_info, state)); */ } &Inst::AdjustSp { amount } => { - println!("SP {} => SP", amount); - // todo!() + let amount = if amount > 0 { + format!("+ {}", amount) + } else { + format!("- {}", -amount) + }; + put_string(&format!("SP {amount} => SP\n"), sink); + /* if let Some(imm) = Imm12::maybe_from_u64(amount as u64) { Inst::AluRRImm12 { alu_op: AluOPRRI::Addi, @@ -903,8 +906,7 @@ impl MachInstEmit for Inst { ); */ } &Inst::CallInd { ref info } => { - println!("call {info:?}"); - // todo!() + put_string(&format!("CALL {info:?}\n"), sink); /* let rn = allocs.next(info.rn); if let Some(s) = state.take_stack_map() { sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); @@ -981,7 +983,8 @@ impl MachInstEmit for Inst { } &Inst::Jal { dest } => { - println!("jal {dest:?}"); + put_string(&format!("JUMP {dest:?}\n"), sink); + /* let code: u32 = 0b1101111; match dest { BranchTarget::Label(label) => { // TODO: the following two lines allow eg. optimizing out jump-to-here @@ -991,7 +994,7 @@ impl MachInstEmit for Inst { } BranchTarget::ResolvedOffset(offset) => { todo!() - /* let offset = offset as i64; + let offset = offset as i64; if offset != 0 { if LabelUse::Jal20.offset_in_range(offset) { let mut code = code.to_le_bytes(); @@ -1008,9 +1011,9 @@ impl MachInstEmit for Inst { } } else { // CondBr often generate Jal {dest : 0}, means otherwise no jump. - } */ + } } - } + }*/ } &Inst::CondBr { taken, @@ -1053,8 +1056,7 @@ impl MachInstEmit for Inst { } &Inst::Mov { rd, rm, ty } => { - println!("{rm:?} => {rd:?}"); - // todo!(); + put_string(&format!("{rm:?} => {rd:?}\n"), sink); /* debug_assert_eq!(rd.to_reg().class(), rm.class()); if rd.to_reg() == rm { return; @@ -2029,7 +2031,7 @@ impl MachInstEmit for Inst { offset, } => { let rd = allocs.next_writable(rd); - println!("{rd:?} => CALL {name:?}"); + put_string(&format!("{rd:?} => CALL {name:?}\n"), sink); /* let rd = allocs.next_writable(rd); // get the current pc. Inst::Auipc { diff --git a/cranelift/codegen/src/isa/zkasm/inst/mod.rs b/cranelift/codegen/src/isa/zkasm/inst/mod.rs index 9aadec03e034..bf4c4c556d66 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/mod.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/mod.rs @@ -975,7 +975,7 @@ impl MachInst for Inst { fn worst_case_size() -> CodeOffset { // calculate by test function zkasm_worst_case_instruction_size() - 116 + 1_000_000_000 } fn ref_type_regclass(_settings: &settings::Flags) -> RegClass { diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs index 3dd3ae2ccd2b..921c10eac52a 100644 --- a/cranelift/src/wasm.rs +++ b/cranelift/src/wasm.rs @@ -239,6 +239,7 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - let mut total_module_code_size = 0; let mut context = Context::new(); for (def_index, func) in dummy_environ.info.function_bodies.iter() { + println!("; Function {def_index:?}"); context.func = func.clone(); let mut saved_size = None; @@ -255,6 +256,8 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - .map_err(|err| anyhow::anyhow!("{}", pretty_error(&err.func, err.inner)))?; let code_info = compiled_code.code_info(); + println!("{}", std::str::from_utf8(compiled_code.code_buffer()).unwrap()); + if options.print_size { println!( "Function #{} code size: {} bytes", From 4e5aa2b72aa9c365ba7b65d9ee349418d07adda3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Gaspard?= Date: Thu, 24 Aug 2023 17:28:02 +0200 Subject: [PATCH 19/68] handle jumps and labels --- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 1 + cranelift/codegen/src/isa/zkasm/inst.isle | 4 ++++ cranelift/codegen/src/isa/zkasm/inst/emit.rs | 14 ++++++++------ cranelift/codegen/src/isa/zkasm/inst/mod.rs | 12 ++++++++++++ cranelift/codegen/src/machinst/mod.rs | 1 + cranelift/codegen/src/machinst/vcode.rs | 1 + 6 files changed, 27 insertions(+), 6 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 006a6b807d3d..92377395626a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -1158,6 +1158,7 @@ impl MachInst for Inst { } fn gen_block_start( + _block_index: usize, is_indirect_branch_target: bool, is_forward_edge_cfi_enabled: bool, ) -> Option { diff --git a/cranelift/codegen/src/isa/zkasm/inst.isle b/cranelift/codegen/src/isa/zkasm/inst.isle index 984d98e33d79..e3a6fd1508b1 100644 --- a/cranelift/codegen/src/isa/zkasm/inst.isle +++ b/cranelift/codegen/src/isa/zkasm/inst.isle @@ -5,6 +5,10 @@ (Nop0) (Nop4) + ;; Label to output at the beginning of a block + (Label + (imm usize)) + ;; load immediate (Lui (rd WritableReg) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 1ebc09e12e3c..d7e089ae0f7a 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -421,6 +421,7 @@ impl Inst { match self { Inst::Nop0 | Inst::Nop4 + | Inst::Label { .. } | Inst::BrTable { .. } | Inst::Auipc { .. } | Inst::Lui { .. } @@ -550,6 +551,9 @@ impl MachInstEmit for Inst { }; x.emit(&[], sink, emit_info, state) */ } + &Inst::Label { imm } => { + sink.put_data(format!("L{imm}:\n").as_bytes()) + } &Inst::RawData { ref data } => { // Right now we only put a u32 or u64 in this instruction. // It is not very long, no need to check if need `emit_island`. @@ -983,17 +987,15 @@ impl MachInstEmit for Inst { } &Inst::Jal { dest } => { - put_string(&format!("JUMP {dest:?}\n"), sink); - /* let code: u32 = 0b1101111; match dest { BranchTarget::Label(label) => { // TODO: the following two lines allow eg. optimizing out jump-to-here /* sink.use_label_at_offset(start_off, label, LabelUse::Jal20); sink.add_uncond_branch(start_off, start_off + 4, label); */ - sink.put_data(format!(":JMP(L{})", label.index()).as_bytes()); + sink.put_data(format!(":JMP(L{})\n", label.index()).as_bytes()); } BranchTarget::ResolvedOffset(offset) => { - todo!() + todo!() /* let offset = offset as i64; if offset != 0 { if LabelUse::Jal20.offset_in_range(offset) { @@ -1011,9 +1013,9 @@ impl MachInstEmit for Inst { } } else { // CondBr often generate Jal {dest : 0}, means otherwise no jump. - } + } */ } - }*/ + } } &Inst::CondBr { taken, diff --git a/cranelift/codegen/src/isa/zkasm/inst/mod.rs b/cranelift/codegen/src/isa/zkasm/inst/mod.rs index bf4c4c556d66..b2abc2d8987c 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/mod.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/mod.rs @@ -380,6 +380,7 @@ fn zkasm_get_operands VReg>(inst: &Inst, collector: &mut OperandC match inst { &Inst::Nop0 => {} &Inst::Nop4 => {} + &Inst::Label { .. } => {} &Inst::BrTable { index, tmp1, tmp2, .. } => { @@ -850,6 +851,14 @@ impl MachInst for Inst { Inst::DummyUse { reg } } + fn gen_block_start( + block_index: usize, + _is_indirect_branch_target: bool, + _is_forward_edge_cfi_enabled: bool, + ) -> Option { + Some(Inst::Label { imm: block_index }) + } + fn canonical_type_for_rc(rc: RegClass) -> Type { match rc { regalloc2::RegClass::Int => I64, @@ -1102,6 +1111,9 @@ impl Inst { &Inst::Nop4 => { format!("##fixed 4-size nop") } + &Inst::Label { imm } => { + format!("##label=L{imm}") + } &Inst::StackProbeLoop { guard_size, probe_count, diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index e08384c30ab2..1eda843fe50d 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -185,6 +185,7 @@ pub trait MachInst: Clone + Debug { /// block, if any. Note that the return value must not be subject to /// register allocation. fn gen_block_start( + _block_index: usize, _is_indirect_branch_target: bool, _is_forward_edge_cfi_enabled: bool, ) -> Option { diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 59c7328c3aa8..d5ebee420d2f 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -896,6 +896,7 @@ impl VCode { } if let Some(block_start) = I::gen_block_start( + block.index(), self.block_order.is_indirect_branch_target(block), is_forward_edge_cfi_enabled, ) { From 3a23cef76327974a3635e61bd4790300344ee1cc Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Thu, 24 Aug 2023 17:40:54 +0200 Subject: [PATCH 20/68] Add a few more instructions for counter --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 11 ++++++++--- cranelift/codegen/src/isa/zkasm/inst/mod.rs | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index d7e089ae0f7a..7da05c8743f8 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -652,7 +652,7 @@ impl MachInstEmit for Inst { } &Inst::AddImm32 { rd, src1, src2 } => { let rd = allocs.next(rd.to_reg()); - put_string(&format!("{src1} + {src2} => {:?}", rd), sink); + put_string(&format!("{src1} + {src2} => {:?}\n", rd), sink); }, &Inst::AluRRR { alu_op, @@ -660,7 +660,9 @@ impl MachInstEmit for Inst { rs1, rs2, } => { - todo!() /* let rs1 = allocs.next(rs1); + put_string(&format!("{rs1:?}, {rs2:?} => {:?} : {alu_op:?}\n", rd), sink); + + /* let rs1 = allocs.next(rs1); let rs2 = allocs.next(rs2); let rd = allocs.next_writable(rd); let (rs1, rs2) = if alu_op.reverse_rs() { @@ -1022,6 +1024,7 @@ impl MachInstEmit for Inst { not_taken, mut kind, } => { + todo!() /* kind.rs1 = allocs.next(kind.rs1); kind.rs2 = allocs.next(kind.rs2); match taken { @@ -1432,7 +1435,9 @@ impl MachInstEmit for Inst { ref b, ty, } => { - todo!() /* let a = alloc_value_regs(a, &mut allocs); + put_string(&format!("{a:?}, {b:?} => {:?} : CMP\n", rd), sink); + + /* let a = alloc_value_regs(a, &mut allocs); let b = alloc_value_regs(b, &mut allocs); let rd = allocs.next_writable(rd); let label_true = sink.get_label(); diff --git a/cranelift/codegen/src/isa/zkasm/inst/mod.rs b/cranelift/codegen/src/isa/zkasm/inst/mod.rs index b2abc2d8987c..0e877c7ac7f6 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/mod.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/mod.rs @@ -984,7 +984,7 @@ impl MachInst for Inst { fn worst_case_size() -> CodeOffset { // calculate by test function zkasm_worst_case_instruction_size() - 1_000_000_000 + 1_000_000 } fn ref_type_regclass(_settings: &settings::Flags) -> RegClass { From c431024b2ba23cc607ce0e19be9e2359dc1a19c8 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Mon, 28 Aug 2023 17:36:32 +0100 Subject: [PATCH 21/68] Allocate registers during Mov --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 63 ++++++++++---------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 7da05c8743f8..ba254d009520 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -1061,45 +1061,44 @@ impl MachInstEmit for Inst { } &Inst::Mov { rd, rm, ty } => { - put_string(&format!("{rm:?} => {rd:?}\n"), sink); - /* debug_assert_eq!(rd.to_reg().class(), rm.class()); if rd.to_reg() == rm { return; } let rm = allocs.next(rm); let rd = allocs.next_writable(rd); + put_string(&format!("{rm:?} => {rd:?}\n"), sink); - match rm.class() { - RegClass::Int => Inst::AluRRImm12 { - alu_op: AluOPRRI::Ori, - rd: rd, - rs: rm, - imm12: Imm12::zero(), - }, - RegClass::Float => Inst::FpuRRR { - alu_op: if ty == F32 { - FpuOPRRR::FsgnjS - } else { - FpuOPRRR::FsgnjD - }, - frm: None, - rd: rd, - rs1: rm, - rs2: rm, - }, - RegClass::Vector => Inst::VecAluRRImm5 { - op: VecAluOpRRImm5::VmvrV, - vd: rd, - vs2: rm, - // Imm 0 means copy 1 register. - imm: Imm5::maybe_from_i8(0).unwrap(), - mask: VecOpMasking::Disabled, - // Vstate for this instruction is ignored. - vstate: VState::from_type(ty), - }, - } - .emit(&[], sink, emit_info, state); */ + // match rm.class() { + // RegClass::Int => Inst::AluRRImm12 { + // alu_op: AluOPRRI::Ori, + // rd: rd, + // rs: rm, + // imm12: Imm12::zero(), + // }, + // RegClass::Float => Inst::FpuRRR { + // alu_op: if ty == F32 { + // FpuOPRRR::FsgnjS + // } else { + // FpuOPRRR::FsgnjD + // }, + // frm: None, + // rd: rd, + // rs1: rm, + // rs2: rm, + // }, + // RegClass::Vector => Inst::VecAluRRImm5 { + // op: VecAluOpRRImm5::VmvrV, + // vd: rd, + // vs2: rm, + // // Imm 0 means copy 1 register. + // imm: Imm5::maybe_from_i8(0).unwrap(), + // mask: VecOpMasking::Disabled, + // // Vstate for this instruction is ignored. + // vstate: VState::from_type(ty), + // }, + // } + // .emit(&[], sink, emit_info, state); } &Inst::MovFromPReg { rd, rm } => { From bf4f7cc093ffd908081d87a356d6e0cfa57741c7 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Mon, 28 Aug 2023 19:19:54 +0100 Subject: [PATCH 22/68] Use proper register names --- cranelift/codegen/src/isa/zkasm/abi.rs | 6 +-- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 39 ++++++++++---------- cranelift/codegen/src/isa/zkasm/inst/mod.rs | 4 +- cranelift/codegen/src/isa/zkasm/inst/regs.rs | 2 +- 4 files changed, 25 insertions(+), 26 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/abi.rs b/cranelift/codegen/src/isa/zkasm/abi.rs index 5b573813cc67..eca1eaefff03 100644 --- a/cranelift/codegen/src/isa/zkasm/abi.rs +++ b/cranelift/codegen/src/isa/zkasm/abi.rs @@ -86,7 +86,7 @@ impl ABIMachineSpec for Riscv64MachineDeps { /// Return required stack alignment in bytes. fn stack_align(_call_conv: isa::CallConv) -> u32 { - 16 + 1 } fn compute_arg_locs<'a, I>( @@ -104,8 +104,8 @@ impl ABIMachineSpec for Riscv64MachineDeps { // both start and end are included. let (x_start, x_end, f_start, f_end) = match (call_conv, args_or_rets) { (isa::CallConv::Tail, _) => (9, 29, 0, 31), - (_, ArgsOrRets::Args) => (10, 17, 10, 17), - (_, ArgsOrRets::Rets) => (10, 11, 10, 11), + (_, ArgsOrRets::Args) => (3, 5, 3, 5), + (_, ArgsOrRets::Rets) => (3, 5, 3, 5), }; let mut next_x_reg = x_start; let mut next_f_reg = f_start; diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index ba254d009520..626f7c35fd99 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -660,11 +660,12 @@ impl MachInstEmit for Inst { rs1, rs2, } => { - put_string(&format!("{rs1:?}, {rs2:?} => {:?} : {alu_op:?}\n", rd), sink); - - /* let rs1 = allocs.next(rs1); + let rs1 = allocs.next(rs1); let rs2 = allocs.next(rs2); let rd = allocs.next_writable(rd); + put_string(&format!("{}, {} => {} : {alu_op:?}\n", reg_name(rs1), reg_name(rs2), reg_name(rd.to_reg())), sink); + + /* let (rs1, rs2) = if alu_op.reverse_rs() { (rs2, rs1) } else { @@ -688,16 +689,14 @@ impl MachInstEmit for Inst { } => { let rs = allocs.next(rs); let rd = allocs.next_writable(rd); - put_string(&format!("{rs:?} {imm12} => {rd:?}: {alu_op:?}\n"), sink); + put_string(&format!("{} {imm12} => {}: {alu_op:?}\n", reg_name(rs), reg_name(rd.to_reg())), sink); - /* let rs = allocs.next(rs); - let rd = allocs.next_writable(rd); - let x = alu_op.op_code() - | reg_to_gpr_num(rd.to_reg()) << 7 - | alu_op.funct3() << 12 - | reg_to_gpr_num(rs) << 15 - | alu_op.imm12(imm12) << 20; - sink.put4(x); */ + // let x = alu_op.op_code() + // | reg_to_gpr_num(rd.to_reg()) << 7 + // | alu_op.funct3() << 12 + // | reg_to_gpr_num(rs) << 15 + // | alu_op.imm12(imm12) << 20; + // sink.put4(x); } &Inst::Load { rd, @@ -705,13 +704,11 @@ impl MachInstEmit for Inst { from, flags, } => { - put_string(&format!("$ => {rd:?} ; LOAD op={op:?} from={from:?} flags={flags:?}\n"), sink); - /* let from = from.clone().with_allocs(&mut allocs); - let rd = allocs.next_writable(rd); - + let from = from.clone().with_allocs(&mut allocs); let base = from.get_base_register(); let offset = from.get_offset_with_state(state); let offset_imm12 = Imm12::maybe_from_u64(offset as u64); + let rd = allocs.next_writable(rd); let (addr, imm12) = match (base, offset_imm12) { // If the offset fits into an imm12 we can directly encode it. @@ -723,7 +720,9 @@ impl MachInstEmit for Inst { (tmp.to_reg(), Imm12::zero()) } }; + put_string(&format!("$ => {} : LOAD({} {imm12})\n", reg_name(rd.to_reg()), reg_name(addr)), sink); + /* let srcloc = state.cur_srcloc(); if !srcloc.is_default() && !flags.notrap() { // Register the offset at which the actual load instruction starts. @@ -736,9 +735,9 @@ impl MachInstEmit for Inst { let src = allocs.next(src); let stack_offset = to.get_offset_with_state(state); if let Some(base_register) = to.get_base_register() { - put_string(&format!("{src:?} : MSTORE({base_register:?} + {stack_offset})\n"), sink); + put_string(&format!("{} : MSTORE({} + {stack_offset})\n", reg_name(src), reg_name(base_register)), sink); } else { - put_string(&format!("{src:?} : MSTORE({stack_offset})\n"), sink); + put_string(&format!("{} : MSTORE({stack_offset})\n", reg_name(src)), sink); } /* let to = to.clone().with_allocs(&mut allocs); let src = allocs.next(src); @@ -1067,7 +1066,7 @@ impl MachInstEmit for Inst { let rm = allocs.next(rm); let rd = allocs.next_writable(rd); - put_string(&format!("{rm:?} => {rd:?}\n"), sink); + put_string(&format!("{} => {}\n", reg_name(rm), reg_name(rd.to_reg())), sink); // match rm.class() { // RegClass::Int => Inst::AluRRImm12 { @@ -2037,7 +2036,7 @@ impl MachInstEmit for Inst { offset, } => { let rd = allocs.next_writable(rd); - put_string(&format!("{rd:?} => CALL {name:?}\n"), sink); + put_string(&format!("{} => CALL {name:?}\n", reg_name(rd.to_reg())), sink); /* let rd = allocs.next_writable(rd); // get the current pc. Inst::Auipc { diff --git a/cranelift/codegen/src/isa/zkasm/inst/mod.rs b/cranelift/codegen/src/isa/zkasm/inst/mod.rs index 0e877c7ac7f6..bb1c0b17b0bd 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/mod.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/mod.rs @@ -1005,9 +1005,9 @@ pub fn reg_name(reg: Reg) -> String { match reg.to_real_reg() { Some(real) => match real.class() { RegClass::Int => match real.hw_enc() { - 0 => "zero".into(), + 0 => "0".into(), 1 => "ra".into(), - 2 => "sp".into(), + 2 => "SP".into(), 3 => "gp".into(), 4 => "tp".into(), 5 => "t0".into(), diff --git a/cranelift/codegen/src/isa/zkasm/inst/regs.rs b/cranelift/codegen/src/isa/zkasm/inst/regs.rs index 417189fd202f..f2451778f9a4 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/regs.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/regs.rs @@ -143,7 +143,7 @@ pub fn crate_reg_eviroment(_flags: &settings::Flags) -> MachineEnv { let preferred_regs_by_class: [Vec; 3] = { // Registers are A, B, C, D, E. // TODO(akashin): Remove redundant registers. - let x_registers: Vec = (1..=15) + let x_registers: Vec = (1..=5) .map(|i| PReg::new(i, RegClass::Int)) .collect(); From ff8855d83eb60540e250b036726fb8757a7bad7c Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 29 Aug 2023 08:56:13 +0100 Subject: [PATCH 23/68] Fix a few more emit statements --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 626f7c35fd99..e26d53e06ea0 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -552,7 +552,7 @@ impl MachInstEmit for Inst { x.emit(&[], sink, emit_info, state) */ } &Inst::Label { imm } => { - sink.put_data(format!("L{imm}:\n").as_bytes()) + put_string(&format!("L{imm}:\n"), sink); } &Inst::RawData { ref data } => { // Right now we only put a u32 or u64 in this instruction. @@ -652,7 +652,7 @@ impl MachInstEmit for Inst { } &Inst::AddImm32 { rd, src1, src2 } => { let rd = allocs.next(rd.to_reg()); - put_string(&format!("{src1} + {src2} => {:?}\n", rd), sink); + put_string(&format!("{src1} + {src2} => {}\n", reg_name(rd)), sink); }, &Inst::AluRRR { alu_op, @@ -993,7 +993,7 @@ impl MachInstEmit for Inst { // TODO: the following two lines allow eg. optimizing out jump-to-here /* sink.use_label_at_offset(start_off, label, LabelUse::Jal20); sink.add_uncond_branch(start_off, start_off + 4, label); */ - sink.put_data(format!(":JMP(L{})\n", label.index()).as_bytes()); + put_string(&format!(":JMP(L{})\n", label.index()), sink); } BranchTarget::ResolvedOffset(offset) => { todo!() /* @@ -2036,7 +2036,7 @@ impl MachInstEmit for Inst { offset, } => { let rd = allocs.next_writable(rd); - put_string(&format!("{} => CALL {name:?}\n", reg_name(rd.to_reg())), sink); + put_string(&format!("CALL {name:?} => {}\n", reg_name(rd.to_reg())), sink); /* let rd = allocs.next_writable(rd); // get the current pc. Inst::Auipc { From 17927bff85d8cc6a1d4d74f16d66aaa916d82a26 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 29 Aug 2023 09:13:11 +0100 Subject: [PATCH 24/68] Rename a few more registers --- cranelift/codegen/src/isa/zkasm/abi.rs | 4 ++-- cranelift/codegen/src/isa/zkasm/inst/mod.rs | 14 +++++++++++--- cranelift/codegen/src/isa/zkasm/inst/regs.rs | 2 +- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/abi.rs b/cranelift/codegen/src/isa/zkasm/abi.rs index eca1eaefff03..e07a22784f9a 100644 --- a/cranelift/codegen/src/isa/zkasm/abi.rs +++ b/cranelift/codegen/src/isa/zkasm/abi.rs @@ -104,8 +104,8 @@ impl ABIMachineSpec for Riscv64MachineDeps { // both start and end are included. let (x_start, x_end, f_start, f_end) = match (call_conv, args_or_rets) { (isa::CallConv::Tail, _) => (9, 29, 0, 31), - (_, ArgsOrRets::Args) => (3, 5, 3, 5), - (_, ArgsOrRets::Rets) => (3, 5, 3, 5), + (_, ArgsOrRets::Args) => (5, 7, 5, 7), + (_, ArgsOrRets::Rets) => (5, 7, 5, 7), }; let mut next_x_reg = x_start; let mut next_f_reg = f_start; diff --git a/cranelift/codegen/src/isa/zkasm/inst/mod.rs b/cranelift/codegen/src/isa/zkasm/inst/mod.rs index bb1c0b17b0bd..84d08db8679d 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/mod.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/mod.rs @@ -1006,12 +1006,20 @@ pub fn reg_name(reg: Reg) -> String { Some(real) => match real.class() { RegClass::Int => match real.hw_enc() { 0 => "0".into(), - 1 => "ra".into(), + 1 => "RR".into(), 2 => "SP".into(), + // TODO(akashin): Do we have a global pointer register in ZK ASM? + // https://www.five-embeddev.com/quickref/global_pointer.html + // Supposed to be unallocatable. 3 => "gp".into(), + // TODO(akashin): Do we have a thread pointer register in ZK ASM? + // https://groups.google.com/a/groups.riscv.org/g/sw-dev/c/cov47bNy5gY?pli=1 + // Supposed to be unallocatable. 4 => "tp".into(), - 5 => "t0".into(), - 6..=7 => format!("t{}", real.hw_enc() - 5), + // Temporary registers. + 5 => "C".into(), + 6 => "D".into(), + 7 => "E".into(), 8 => "fp".into(), 9 => "s1".into(), 10..=17 => format!("a{}", real.hw_enc() - 10), diff --git a/cranelift/codegen/src/isa/zkasm/inst/regs.rs b/cranelift/codegen/src/isa/zkasm/inst/regs.rs index f2451778f9a4..9589dd99cb43 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/regs.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/regs.rs @@ -143,7 +143,7 @@ pub fn crate_reg_eviroment(_flags: &settings::Flags) -> MachineEnv { let preferred_regs_by_class: [Vec; 3] = { // Registers are A, B, C, D, E. // TODO(akashin): Remove redundant registers. - let x_registers: Vec = (1..=5) + let x_registers: Vec = (1..=7) .map(|i| PReg::new(i, RegClass::Int)) .collect(); From fac63c600fc684784e3d10ec2b14549885d0b830 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 29 Aug 2023 09:36:01 +0100 Subject: [PATCH 25/68] Allocate call return address --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index e26d53e06ea0..468648980ed1 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -911,8 +911,9 @@ impl MachInstEmit for Inst { ); */ } &Inst::CallInd { ref info } => { - put_string(&format!("CALL {info:?}\n"), sink); - /* let rn = allocs.next(info.rn); + let rn = allocs.next(info.rn); + put_string(&format!("CALL {}, {:?}\n", reg_name(rn), info.uses), sink); + /* if let Some(s) = state.take_stack_map() { sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); } @@ -2037,7 +2038,8 @@ impl MachInstEmit for Inst { } => { let rd = allocs.next_writable(rd); put_string(&format!("CALL {name:?} => {}\n", reg_name(rd.to_reg())), sink); - /* let rd = allocs.next_writable(rd); + + /* // get the current pc. Inst::Auipc { rd: rd, From ee1147bb3e288bcc16cee44362a223aa5e93ec38 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 29 Aug 2023 09:44:05 +0100 Subject: [PATCH 26/68] Fix ADD instruction --- cranelift/codegen/src/isa/zkasm/inst/args.rs | 4 ++-- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/args.rs b/cranelift/codegen/src/isa/zkasm/inst/args.rs index 03b0255bf9f5..86f8aba0f6a0 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/args.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/args.rs @@ -775,7 +775,7 @@ impl FpuOPRRR { impl AluOPRRR { pub(crate) const fn op_name(self) -> &'static str { match self { - Self::Add => "add", + Self::Add => "ADD", Self::Sub => "sub", Self::Sll => "sll", Self::Slt => "slt", @@ -1112,7 +1112,7 @@ impl AluOPRRI { pub(crate) fn op_name(self) -> &'static str { match self { - Self::Addi => "addi", + Self::Addi => "ADD", Self::Slti => "slti", Self::SltiU => "sltiu", Self::Xori => "xori", diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 468648980ed1..5a8b639a3d6c 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -663,7 +663,7 @@ impl MachInstEmit for Inst { let rs1 = allocs.next(rs1); let rs2 = allocs.next(rs2); let rd = allocs.next_writable(rd); - put_string(&format!("{}, {} => {} : {alu_op:?}\n", reg_name(rs1), reg_name(rs2), reg_name(rd.to_reg())), sink); + put_string(&format!("{}, {} => {} : {}\n", reg_name(rs1), reg_name(rs2), reg_name(rd.to_reg()), alu_op.op_name()), sink); /* let (rs1, rs2) = if alu_op.reverse_rs() { @@ -689,7 +689,12 @@ impl MachInstEmit for Inst { } => { let rs = allocs.next(rs); let rd = allocs.next_writable(rd); - put_string(&format!("{} {imm12} => {}: {alu_op:?}\n", reg_name(rs), reg_name(rd.to_reg())), sink); + match alu_op { + AluOPRRI::Addi => { + put_string(&format!("{} + {imm12} => {}\n", reg_name(rs), reg_name(rd.to_reg())), sink); + }, + _ => unreachable!(), + }; // let x = alu_op.op_code() // | reg_to_gpr_num(rd.to_reg()) << 7 From f5c868b1a572c303bdc268bf2a41584c860197d5 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 29 Aug 2023 09:53:06 +0100 Subject: [PATCH 27/68] Implement ASSERT calls --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 17 +++++++++++++---- cranelift/codegen/src/isa/zkasm/inst/mod.rs | 4 +++- cranelift/codegen/src/isa/zkasm/inst/regs.rs | 9 ++++----- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 5a8b639a3d6c..5e3b87baa59c 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -916,8 +916,17 @@ impl MachInstEmit for Inst { ); */ } &Inst::CallInd { ref info } => { - let rn = allocs.next(info.rn); - put_string(&format!("CALL {}, {:?}\n", reg_name(rn), info.uses), sink); + // let rn = allocs.next(info.rn); + // put_string(&format!("CALL {}, {:?}\n", reg_name(rn), info.uses), sink); + + Inst::Mov { + ty: types::I64, + rd: regs::writable_a0(), + rm: info.uses[0].preg, + } + .emit(&[], sink, emit_info, state); + put_string(&format!("{} : ASSERT\n", reg_name(info.uses[1].preg)), sink); + /* if let Some(s) = state.take_stack_map() { sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); @@ -2041,8 +2050,8 @@ impl MachInstEmit for Inst { ref name, offset, } => { - let rd = allocs.next_writable(rd); - put_string(&format!("CALL {name:?} => {}\n", reg_name(rd.to_reg())), sink); + // let rd = allocs.next_writable(rd); + // put_string(&format!("CALL {name:?} => {}\n", reg_name(rd.to_reg())), sink); /* // get the current pc. diff --git a/cranelift/codegen/src/isa/zkasm/inst/mod.rs b/cranelift/codegen/src/isa/zkasm/inst/mod.rs index 84d08db8679d..dde33f24f3da 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/mod.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/mod.rs @@ -1022,7 +1022,9 @@ pub fn reg_name(reg: Reg) -> String { 7 => "E".into(), 8 => "fp".into(), 9 => "s1".into(), - 10..=17 => format!("a{}", real.hw_enc() - 10), + 10 => "A".into(), + 11 => "B".into(), + 12..=17 => format!("a{}", real.hw_enc() - 10), 18..=27 => format!("s{}", real.hw_enc() - 16), 28..=31 => format!("t{}", real.hw_enc() - 25), _ => unreachable!(), diff --git a/cranelift/codegen/src/isa/zkasm/inst/regs.rs b/cranelift/codegen/src/isa/zkasm/inst/regs.rs index 9589dd99cb43..6832c4b386c6 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/regs.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/regs.rs @@ -14,24 +14,24 @@ use regalloc2::{MachineEnv, PReg, RegClass}; #[inline] pub fn a0() -> Reg { - x_reg(1) + x_reg(10) } #[inline] pub fn b0() -> Reg { - x_reg(2) + x_reg(11) } // second argument of function call #[inline] pub fn a1() -> Reg { - x_reg(11) + x_reg(5) } // third argument of function call #[inline] pub fn a2() -> Reg { - x_reg(12) + x_reg(6) } #[inline] @@ -142,7 +142,6 @@ pub fn writable_spilltmp_reg2() -> Writable { pub fn crate_reg_eviroment(_flags: &settings::Flags) -> MachineEnv { let preferred_regs_by_class: [Vec; 3] = { // Registers are A, B, C, D, E. - // TODO(akashin): Remove redundant registers. let x_registers: Vec = (1..=7) .map(|i| PReg::new(i, RegClass::Int)) .collect(); From 3db90c793069f7cb9eb09baf682c21267a7ea6d3 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 29 Aug 2023 10:04:09 +0100 Subject: [PATCH 28/68] Format integer in LOAD --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 5e3b87baa59c..a3a4176a311e 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -725,7 +725,7 @@ impl MachInstEmit for Inst { (tmp.to_reg(), Imm12::zero()) } }; - put_string(&format!("$ => {} : LOAD({} {imm12})\n", reg_name(rd.to_reg()), reg_name(addr)), sink); + put_string(&format!("$ => {} : LOAD({} + {imm12})\n", reg_name(rd.to_reg()), reg_name(addr)), sink); /* let srcloc = state.cur_srcloc(); From a6d8a9c87082adc61ab8b5254e30ade1e48252c6 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 29 Aug 2023 10:17:27 +0100 Subject: [PATCH 29/68] Rewrite function calling conventions --- cranelift/codegen/src/isa/zkasm/abi.rs | 38 ++++++++++---------- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 4 ++- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/abi.rs b/cranelift/codegen/src/isa/zkasm/abi.rs index e07a22784f9a..8186dd9b342a 100644 --- a/cranelift/codegen/src/isa/zkasm/abi.rs +++ b/cranelift/codegen/src/isa/zkasm/abi.rs @@ -347,38 +347,38 @@ impl ABIMachineSpec for Riscv64MachineDeps { // sd fp,0(sp) ;; store old fp. // mv fp,sp ;; set fp to sp. let mut insts = SmallVec::new(); - insts.push(Inst::AdjustSp { amount: -16 }); - insts.push(Self::gen_store_stack( - StackAMode::SPOffset(8, I64), - link_reg(), - I64, - )); + insts.push(Inst::AdjustSp { amount: -1 }); insts.push(Self::gen_store_stack( StackAMode::SPOffset(0, I64), - fp_reg(), + link_reg(), I64, )); - insts.push(Inst::Mov { - rd: writable_fp_reg(), - rm: stack_reg(), - ty: I64, - }); + // insts.push(Self::gen_store_stack( + // StackAMode::SPOffset(0, I64), + // fp_reg(), + // I64, + // )); + // insts.push(Inst::Mov { + // rd: writable_fp_reg(), + // rm: stack_reg(), + // ty: I64, + // }); insts } /// reverse of gen_prologue_frame_setup. fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec { let mut insts = SmallVec::new(); - insts.push(Self::gen_load_stack( - StackAMode::SPOffset(8, I64), - writable_link_reg(), - I64, - )); insts.push(Self::gen_load_stack( StackAMode::SPOffset(0, I64), - writable_fp_reg(), + writable_link_reg(), I64, )); - insts.push(Inst::AdjustSp { amount: 16 }); + // insts.push(Self::gen_load_stack( + // StackAMode::SPOffset(0, I64), + // writable_fp_reg(), + // I64, + // )); + insts.push(Inst::AdjustSp { amount: 1 }); insts } diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index a3a4176a311e..0fb564095b85 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -777,7 +777,9 @@ impl MachInstEmit for Inst { &Inst::Ret { stack_bytes_to_pop, .. } => { - put_string(&format!("RETURN\n"), sink); + // put_string(&format!("RETURN\n"), sink); + put_string(&format!(":JMP(RR)\n"), sink); + /* if stack_bytes_to_pop != 0 { Inst::AdjustSp { amount: i64::from(stack_bytes_to_pop), From d88a1ce4f176ad07af4ae1a2539130f0a7318738 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 29 Aug 2023 10:19:23 +0100 Subject: [PATCH 30/68] Remove space after colon --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 0fb564095b85..a3bc12f43ea3 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -725,7 +725,7 @@ impl MachInstEmit for Inst { (tmp.to_reg(), Imm12::zero()) } }; - put_string(&format!("$ => {} : LOAD({} + {imm12})\n", reg_name(rd.to_reg()), reg_name(addr)), sink); + put_string(&format!("$ => {} :LOAD({} + {imm12})\n", reg_name(rd.to_reg()), reg_name(addr)), sink); /* let srcloc = state.cur_srcloc(); @@ -740,9 +740,9 @@ impl MachInstEmit for Inst { let src = allocs.next(src); let stack_offset = to.get_offset_with_state(state); if let Some(base_register) = to.get_base_register() { - put_string(&format!("{} : MSTORE({} + {stack_offset})\n", reg_name(src), reg_name(base_register)), sink); + put_string(&format!("{} :MSTORE({} + {stack_offset})\n", reg_name(src), reg_name(base_register)), sink); } else { - put_string(&format!("{} : MSTORE({stack_offset})\n", reg_name(src)), sink); + put_string(&format!("{} :MSTORE({stack_offset})\n", reg_name(src)), sink); } /* let to = to.clone().with_allocs(&mut allocs); let src = allocs.next(src); @@ -927,7 +927,7 @@ impl MachInstEmit for Inst { rm: info.uses[0].preg, } .emit(&[], sink, emit_info, state); - put_string(&format!("{} : ASSERT\n", reg_name(info.uses[1].preg)), sink); + put_string(&format!("{} :ASSERT\n", reg_name(info.uses[1].preg)), sink); /* if let Some(s) = state.take_stack_map() { From bae400dcf613e3e512c6b64099acda9cd7f16a29 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 29 Aug 2023 10:20:13 +0100 Subject: [PATCH 31/68] LOAD -> MLOAD --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index a3bc12f43ea3..f85d724f4bd2 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -725,7 +725,7 @@ impl MachInstEmit for Inst { (tmp.to_reg(), Imm12::zero()) } }; - put_string(&format!("$ => {} :LOAD({} + {imm12})\n", reg_name(rd.to_reg()), reg_name(addr)), sink); + put_string(&format!("$ => {} :MLOAD({} + {imm12})\n", reg_name(rd.to_reg()), reg_name(addr)), sink); /* let srcloc = state.cur_srcloc(); From 26da8e1dc14e0cb37cb6cea023b87c3c16a69c79 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 29 Aug 2023 10:45:53 +0100 Subject: [PATCH 32/68] Add preamble and postamble --- cranelift/src/wasm.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs index 921c10eac52a..b57f8474af9e 100644 --- a/cranelift/src/wasm.rs +++ b/cranelift/src/wasm.rs @@ -235,11 +235,18 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - vprintln!(options.verbose, ""); } + println!("start:"); + let start_func = dummy_environ.info.start_func.expect("Must have a start function"); + println!(" zkPC + 2 => RR"); + // TODO(akashin): Figure out why we need to do -1 here. + println!(" :JMP(function_{})", start_func.index() - 1); + println!(" :JMP(finalizeExecution)"); + let num_func_imports = dummy_environ.get_num_func_imports(); let mut total_module_code_size = 0; let mut context = Context::new(); for (def_index, func) in dummy_environ.info.function_bodies.iter() { - println!("; Function {def_index:?}"); + println!("function_{}:", def_index.index()); context.func = func.clone(); let mut saved_size = None; @@ -256,7 +263,10 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - .map_err(|err| anyhow::anyhow!("{}", pretty_error(&err.func, err.inner)))?; let code_info = compiled_code.code_info(); - println!("{}", std::str::from_utf8(compiled_code.code_buffer()).unwrap()); + println!( + "{}", + std::str::from_utf8(compiled_code.code_buffer()).unwrap() + ); if options.print_size { println!( @@ -313,6 +323,12 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - context.clear(); } + let postamble = "finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) +"; + println!("{postamble}"); + if !options.check_translation && options.print_size { println!("Total module code size: {} bytes", total_module_code_size); let total_bytecode_size: usize = dummy_environ.func_bytecode_sizes.iter().sum(); From e0290bd77f8876855957bf53b02706f81cc6143b Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 29 Aug 2023 14:32:34 +0100 Subject: [PATCH 33/68] Run cargo fmt --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 3667 +++++++++-------- cranelift/codegen/src/isa/zkasm/inst/mod.rs | 6 +- cranelift/codegen/src/isa/zkasm/inst/regs.rs | 32 +- .../src/isa/zkasm/inst/unwind/systemv.rs | 2 +- cranelift/codegen/src/isa/zkasm/lower/isle.rs | 4 +- cranelift/src/wasm.rs | 5 +- 6 files changed, 1873 insertions(+), 1843 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index f85d724f4bd2..c5b48d123a6a 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -544,12 +544,12 @@ impl MachInstEmit for Inst { // Addi x0, x0, 0 &Inst::Nop4 => { todo!() /* let x = Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: Writable::from_reg(zero_reg()), - rs: zero_reg(), - imm12: Imm12::zero(), - }; - x.emit(&[], sink, emit_info, state) */ + alu_op: AluOPRRI::Addi, + rd: Writable::from_reg(zero_reg()), + rs: zero_reg(), + imm12: Imm12::zero(), + }; + x.emit(&[], sink, emit_info, state) */ } &Inst::Label { imm } => { put_string(&format!("L{imm}:\n"), sink); @@ -565,22 +565,22 @@ impl MachInstEmit for Inst { } &Inst::Lui { rd, ref imm } => { todo!() /* let rd = allocs.next_writable(rd); - let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.as_u32() << 12); - sink.put4(x); */ + let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.as_u32() << 12); + sink.put4(x); */ } &Inst::LoadConst32 { rd, imm } => { todo!() /* let rd = allocs.next_writable(rd); - LoadConstant::U32(imm) - .load_constant(rd, &mut |_| rd) - .into_iter() - .for_each(|inst| inst.emit(&[], sink, emit_info, state)); */ + LoadConstant::U32(imm) + .load_constant(rd, &mut |_| rd) + .into_iter() + .for_each(|inst| inst.emit(&[], sink, emit_info, state)); */ } &Inst::LoadConst64 { rd, imm } => { todo!() /* let rd = allocs.next_writable(rd); - LoadConstant::U64(imm) - .load_constant(rd, &mut |_| rd) - .into_iter() - .for_each(|inst| inst.emit(&[], sink, emit_info, state)); */ + LoadConstant::U64(imm) + .load_constant(rd, &mut |_| rd) + .into_iter() + .for_each(|inst| inst.emit(&[], sink, emit_info, state)); */ } &Inst::FpuRR { frm, @@ -589,18 +589,18 @@ impl MachInstEmit for Inst { rs, } => { todo!() /* let rs = allocs.next(rs); - let rd = allocs.next_writable(rd); - let x = alu_op.op_code() - | reg_to_gpr_num(rd.to_reg()) << 7 - | alu_op.funct3(frm) << 12 - | reg_to_gpr_num(rs) << 15 - | alu_op.rs2_funct5() << 20 - | alu_op.funct7() << 25; - let srcloc = state.cur_srcloc(); - if !srcloc.is_default() && alu_op.is_convert_to_int() { - sink.add_trap(TrapCode::BadConversionToInteger); - } - sink.put4(x); */ + let rd = allocs.next_writable(rd); + let x = alu_op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | alu_op.funct3(frm) << 12 + | reg_to_gpr_num(rs) << 15 + | alu_op.rs2_funct5() << 20 + | alu_op.funct7() << 25; + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && alu_op.is_convert_to_int() { + sink.add_trap(TrapCode::BadConversionToInteger); + } + sink.put4(x); */ } &Inst::FpuRRRR { alu_op, @@ -611,18 +611,18 @@ impl MachInstEmit for Inst { frm, } => { todo!() /* let rs1 = allocs.next(rs1); - let rs2 = allocs.next(rs2); - let rs3 = allocs.next(rs3); - let rd = allocs.next_writable(rd); - let x = alu_op.op_code() - | reg_to_gpr_num(rd.to_reg()) << 7 - | alu_op.funct3(frm) << 12 - | reg_to_gpr_num(rs1) << 15 - | reg_to_gpr_num(rs2) << 20 - | alu_op.funct2() << 25 - | reg_to_gpr_num(rs3) << 27; - - sink.put4(x); */ + let rs2 = allocs.next(rs2); + let rs3 = allocs.next(rs3); + let rd = allocs.next_writable(rd); + let x = alu_op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | alu_op.funct3(frm) << 12 + | reg_to_gpr_num(rs1) << 15 + | reg_to_gpr_num(rs2) << 20 + | alu_op.funct2() << 25 + | reg_to_gpr_num(rs3) << 27; + + sink.put4(x); */ } &Inst::FpuRRR { alu_op, @@ -632,16 +632,16 @@ impl MachInstEmit for Inst { rs2, } => { todo!() /* let rs1 = allocs.next(rs1); - let rs2 = allocs.next(rs2); - let rd = allocs.next_writable(rd); + let rs2 = allocs.next(rs2); + let rd = allocs.next_writable(rd); - let x: u32 = alu_op.op_code() - | reg_to_gpr_num(rd.to_reg()) << 7 - | (alu_op.funct3(frm)) << 12 - | reg_to_gpr_num(rs1) << 15 - | reg_to_gpr_num(rs2) << 20 - | alu_op.funct7() << 25; - sink.put4(x); */ + let x: u32 = alu_op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | (alu_op.funct3(frm)) << 12 + | reg_to_gpr_num(rs1) << 15 + | reg_to_gpr_num(rs2) << 20 + | alu_op.funct7() << 25; + sink.put4(x); */ } &Inst::Unwind { ref inst } => { put_string(&format!("Unwind\n"), sink); @@ -653,7 +653,7 @@ impl MachInstEmit for Inst { &Inst::AddImm32 { rd, src1, src2 } => { let rd = allocs.next(rd.to_reg()); put_string(&format!("{src1} + {src2} => {}\n", reg_name(rd)), sink); - }, + } &Inst::AluRRR { alu_op, rd, @@ -663,7 +663,16 @@ impl MachInstEmit for Inst { let rs1 = allocs.next(rs1); let rs2 = allocs.next(rs2); let rd = allocs.next_writable(rd); - put_string(&format!("{}, {} => {} : {}\n", reg_name(rs1), reg_name(rs2), reg_name(rd.to_reg()), alu_op.op_name()), sink); + put_string( + &format!( + "{}, {} => {} : {}\n", + reg_name(rs1), + reg_name(rs2), + reg_name(rd.to_reg()), + alu_op.op_name() + ), + sink, + ); /* let (rs1, rs2) = if alu_op.reverse_rs() { @@ -691,8 +700,11 @@ impl MachInstEmit for Inst { let rd = allocs.next_writable(rd); match alu_op { AluOPRRI::Addi => { - put_string(&format!("{} + {imm12} => {}\n", reg_name(rs), reg_name(rd.to_reg())), sink); - }, + put_string( + &format!("{} + {imm12} => {}\n", reg_name(rs), reg_name(rd.to_reg())), + sink, + ); + } _ => unreachable!(), }; @@ -725,9 +737,16 @@ impl MachInstEmit for Inst { (tmp.to_reg(), Imm12::zero()) } }; - put_string(&format!("$ => {} :MLOAD({} + {imm12})\n", reg_name(rd.to_reg()), reg_name(addr)), sink); + put_string( + &format!( + "$ => {} :MLOAD({} + {imm12})\n", + reg_name(rd.to_reg()), + reg_name(addr) + ), + sink, + ); - /* + /* let srcloc = state.cur_srcloc(); if !srcloc.is_default() && !flags.notrap() { // Register the offset at which the actual load instruction starts. @@ -740,9 +759,19 @@ impl MachInstEmit for Inst { let src = allocs.next(src); let stack_offset = to.get_offset_with_state(state); if let Some(base_register) = to.get_base_register() { - put_string(&format!("{} :MSTORE({} + {stack_offset})\n", reg_name(src), reg_name(base_register)), sink); + put_string( + &format!( + "{} :MSTORE({} + {stack_offset})\n", + reg_name(src), + reg_name(base_register) + ), + sink, + ); } else { - put_string(&format!("{} :MSTORE({stack_offset})\n", reg_name(src)), sink); + put_string( + &format!("{} :MSTORE({stack_offset})\n", reg_name(src)), + sink, + ); } /* let to = to.clone().with_allocs(&mut allocs); let src = allocs.next(src); @@ -799,39 +828,39 @@ impl MachInstEmit for Inst { to_bits: _to_bits, } => { todo!() /* let rn = allocs.next(rn); - let rd = allocs.next_writable(rd); - let mut insts = SmallInstVec::new(); - let shift_bits = (64 - from_bits) as i16; - let is_u8 = || from_bits == 8 && signed == false; - if is_u8() { - // special for u8. - insts.push(Inst::AluRRImm12 { - alu_op: AluOPRRI::Andi, - rd, - rs: rn, - imm12: Imm12::from_bits(255), - }); - } else { - insts.push(Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd, - rs: rn, - imm12: Imm12::from_bits(shift_bits), - }); - insts.push(Inst::AluRRImm12 { - alu_op: if signed { - AluOPRRI::Srai + let rd = allocs.next_writable(rd); + let mut insts = SmallInstVec::new(); + let shift_bits = (64 - from_bits) as i16; + let is_u8 = || from_bits == 8 && signed == false; + if is_u8() { + // special for u8. + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd, + rs: rn, + imm12: Imm12::from_bits(255), + }); } else { - AluOPRRI::Srli - }, - rd, - rs: rd.to_reg(), - imm12: Imm12::from_bits(shift_bits), - }); - } - insts - .into_iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); */ + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd, + rs: rn, + imm12: Imm12::from_bits(shift_bits), + }); + insts.push(Inst::AluRRImm12 { + alu_op: if signed { + AluOPRRI::Srai + } else { + AluOPRRI::Srli + }, + rd, + rs: rd.to_reg(), + imm12: Imm12::from_bits(shift_bits), + }); + } + insts + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); */ } &Inst::AdjustSp { amount } => { let amount = if amount > 0 { @@ -841,7 +870,7 @@ impl MachInstEmit for Inst { }; put_string(&format!("SP {amount} => SP\n"), sink); - /* if let Some(imm) = Imm12::maybe_from_u64(amount as u64) { + /* if let Some(imm) = Imm12::maybe_from_u64(amount as u64) { Inst::AluRRImm12 { alu_op: AluOPRRI::Addi, rd: writable_stack_reg(), @@ -866,56 +895,56 @@ impl MachInstEmit for Inst { &Inst::Call { ref info } => { // call todo!() /* match info.dest { - ExternalName::User { .. } => { - if info.opcode.is_call() { - sink.add_call_site(info.opcode); - } - sink.add_reloc(Reloc::RiscvCall, &info.dest, 0); - if let Some(s) = state.take_stack_map() { - sink.add_stack_map(StackMapExtent::UpcomingBytes(8), s); - } - Inst::construct_auipc_and_jalr( - Some(writable_link_reg()), - writable_link_reg(), - 0, - ) - .into_iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - } - ExternalName::LibCall(..) - | ExternalName::TestCase { .. } - | ExternalName::KnownSymbol(..) => { - // use indirect call. it is more simple. - // load ext name. - Inst::LoadExtName { - rd: writable_spilltmp_reg2(), - name: Box::new(info.dest.clone()), - offset: 0, - } - .emit(&[], sink, emit_info, state); - - if let Some(s) = state.take_stack_map() { - sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); - } - if info.opcode.is_call() { - sink.add_call_site(info.opcode); - } - // call - Inst::Jalr { - rd: writable_link_reg(), - base: spilltmp_reg2(), - offset: Imm12::zero(), + ExternalName::User { .. } => { + if info.opcode.is_call() { + sink.add_call_site(info.opcode); + } + sink.add_reloc(Reloc::RiscvCall, &info.dest, 0); + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(8), s); + } + Inst::construct_auipc_and_jalr( + Some(writable_link_reg()), + writable_link_reg(), + 0, + ) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + ExternalName::LibCall(..) + | ExternalName::TestCase { .. } + | ExternalName::KnownSymbol(..) => { + // use indirect call. it is more simple. + // load ext name. + Inst::LoadExtName { + rd: writable_spilltmp_reg2(), + name: Box::new(info.dest.clone()), + offset: 0, + } + .emit(&[], sink, emit_info, state); + + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); + } + if info.opcode.is_call() { + sink.add_call_site(info.opcode); + } + // call + Inst::Jalr { + rd: writable_link_reg(), + base: spilltmp_reg2(), + offset: Imm12::zero(), + } + .emit(&[], sink, emit_info, state); + } } - .emit(&[], sink, emit_info, state); - } - } - let callee_pop_size = i64::from(info.callee_pop_size); - state.virtual_sp_offset -= callee_pop_size; - trace!( - "call adjusts virtual sp offset by {callee_pop_size} -> {}", - state.virtual_sp_offset - ); */ + let callee_pop_size = i64::from(info.callee_pop_size); + state.virtual_sp_offset -= callee_pop_size; + trace!( + "call adjusts virtual sp offset by {callee_pop_size} -> {}", + state.virtual_sp_offset + ); */ } &Inst::CallInd { ref info } => { // let rn = allocs.next(info.rn); @@ -957,51 +986,51 @@ impl MachInstEmit for Inst { ref info, } => { todo!() /* emit_return_call_common_sequence( - &mut allocs, - sink, - emit_info, - state, - info.new_stack_arg_size, - info.old_stack_arg_size, - &info.uses, - ); + &mut allocs, + sink, + emit_info, + state, + info.new_stack_arg_size, + info.old_stack_arg_size, + &info.uses, + ); - sink.add_call_site(ir::Opcode::ReturnCall); - sink.add_reloc(Reloc::RiscvCall, &callee, 0); - Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0) - .into_iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); + sink.add_call_site(ir::Opcode::ReturnCall); + sink.add_reloc(Reloc::RiscvCall, &callee, 0); + Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); - // `emit_return_call_common_sequence` emits an island if - // necessary, so we can safely disable the worst-case-size check - // in this case. - start_off = sink.cur_offset(); */ + // `emit_return_call_common_sequence` emits an island if + // necessary, so we can safely disable the worst-case-size check + // in this case. + start_off = sink.cur_offset(); */ } &Inst::ReturnCallInd { callee, ref info } => { todo!() /* let callee = allocs.next(callee); - emit_return_call_common_sequence( - &mut allocs, - sink, - emit_info, - state, - info.new_stack_arg_size, - info.old_stack_arg_size, - &info.uses, - ); + emit_return_call_common_sequence( + &mut allocs, + sink, + emit_info, + state, + info.new_stack_arg_size, + info.old_stack_arg_size, + &info.uses, + ); - Inst::Jalr { - rd: writable_zero_reg(), - base: callee, - offset: Imm12::zero(), - } - .emit(&[], sink, emit_info, state); + Inst::Jalr { + rd: writable_zero_reg(), + base: callee, + offset: Imm12::zero(), + } + .emit(&[], sink, emit_info, state); - // `emit_return_call_common_sequence` emits an island if - // necessary, so we can safely disable the worst-case-size check - // in this case. - start_off = sink.cur_offset(); */ + // `emit_return_call_common_sequence` emits an island if + // necessary, so we can safely disable the worst-case-size check + // in this case. + start_off = sink.cur_offset(); */ } &Inst::Jal { dest } => { @@ -1014,24 +1043,24 @@ impl MachInstEmit for Inst { } BranchTarget::ResolvedOffset(offset) => { todo!() /* - let offset = offset as i64; - if offset != 0 { - if LabelUse::Jal20.offset_in_range(offset) { - let mut code = code.to_le_bytes(); - LabelUse::Jal20.patch_raw_offset(&mut code, offset); - sink.put_data(&code[..]); - } else { - Inst::construct_auipc_and_jalr( - None, - writable_spilltmp_reg(), - offset, - ) - .into_iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - } - } else { - // CondBr often generate Jal {dest : 0}, means otherwise no jump. - } */ + let offset = offset as i64; + if offset != 0 { + if LabelUse::Jal20.offset_in_range(offset) { + let mut code = code.to_le_bytes(); + LabelUse::Jal20.patch_raw_offset(&mut code, offset); + sink.put_data(&code[..]); + } else { + Inst::construct_auipc_and_jalr( + None, + writable_spilltmp_reg(), + offset, + ) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + } else { + // CondBr often generate Jal {dest : 0}, means otherwise no jump. + } */ } } } @@ -1040,40 +1069,39 @@ impl MachInstEmit for Inst { not_taken, mut kind, } => { - todo!() /* kind.rs1 = allocs.next(kind.rs1); - kind.rs2 = allocs.next(kind.rs2); - match taken { - BranchTarget::Label(label) => { - let code = kind.emit(); - let code_inverse = kind.inverse().emit().to_le_bytes(); - sink.use_label_at_offset(start_off, label, LabelUse::B12); - sink.add_cond_branch(start_off, start_off + 4, label, &code_inverse); - sink.put4(code); - } - BranchTarget::ResolvedOffset(offset) => { - assert!(offset != 0); - if LabelUse::B12.offset_in_range(offset as i64) { - let code = kind.emit(); - let mut code = code.to_le_bytes(); - LabelUse::B12.patch_raw_offset(&mut code, offset as i64); - sink.put_data(&code[..]) - } else { - let mut code = kind.emit().to_le_bytes(); - // jump over the condbr , 4 bytes. - LabelUse::B12.patch_raw_offset(&mut code[..], 4); - sink.put_data(&code[..]); - Inst::construct_auipc_and_jalr( - None, - writable_spilltmp_reg(), - offset as i64, - ) - .into_iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); + kind.rs2 = allocs.next(kind.rs2); + match taken { + BranchTarget::Label(label) => { + let code = kind.emit(); + let code_inverse = kind.inverse().emit().to_le_bytes(); + sink.use_label_at_offset(start_off, label, LabelUse::B12); + sink.add_cond_branch(start_off, start_off + 4, label, &code_inverse); + sink.put4(code); + } + BranchTarget::ResolvedOffset(offset) => { + assert!(offset != 0); + if LabelUse::B12.offset_in_range(offset as i64) { + let code = kind.emit(); + let mut code = code.to_le_bytes(); + LabelUse::B12.patch_raw_offset(&mut code, offset as i64); + sink.put_data(&code[..]) + } else { + let mut code = kind.emit().to_le_bytes(); + // jump over the condbr , 4 bytes. + LabelUse::B12.patch_raw_offset(&mut code[..], 4); + sink.put_data(&code[..]); + Inst::construct_auipc_and_jalr( + None, + writable_spilltmp_reg(), + offset as i64, + ) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + } } - } - } - Inst::Jal { dest: not_taken }.emit(&[], sink, emit_info, state); */ + Inst::Jal { dest: not_taken }.emit(&[], sink, emit_info, state); */ } &Inst::Mov { rd, rm, ty } => { @@ -1083,7 +1111,10 @@ impl MachInstEmit for Inst { let rm = allocs.next(rm); let rd = allocs.next_writable(rd); - put_string(&format!("{} => {}\n", reg_name(rm), reg_name(rd.to_reg())), sink); + put_string( + &format!("{} => {}\n", reg_name(rm), reg_name(rd.to_reg())), + sink, + ); // match rm.class() { // RegClass::Int => Inst::AluRRImm12 { @@ -1119,14 +1150,14 @@ impl MachInstEmit for Inst { &Inst::MovFromPReg { rd, rm } => { todo!() /* debug_assert!([px_reg(2), px_reg(8)].contains(&rm)); - let rd = allocs.next_writable(rd); - let x = Inst::AluRRImm12 { - alu_op: AluOPRRI::Ori, - rd, - rs: Reg::from(rm), - imm12: Imm12::zero(), - }; - x.emit(&[], sink, emit_info, state); */ + let rd = allocs.next_writable(rd); + let x = Inst::AluRRImm12 { + alu_op: AluOPRRI::Ori, + rd, + rs: Reg::from(rm), + imm12: Imm12::zero(), + }; + x.emit(&[], sink, emit_info, state); */ } &Inst::BrTable { @@ -1136,156 +1167,156 @@ impl MachInstEmit for Inst { ref targets, } => { todo!() /* let index = allocs.next(index); - let tmp1 = allocs.next_writable(tmp1); - let tmp2 = allocs.next_writable(tmp2); - let ext_index = writable_spilltmp_reg(); - - // The default target is passed in as the 0th element of `targets` - // separate it here for clarity. - let default_target = targets[0]; - let targets = &targets[1..]; - - // We emit a bounds check on the index, if the index is larger than the number of - // jump table entries, we jump to the default block. Otherwise we compute a jump - // offset by multiplying the index by 8 (the size of each entry) and then jump to - // that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially. - // - // Build the following sequence: - // - // extend_index: - // zext.w ext_index, index - // bounds_check: - // li tmp, n_labels - // bltu ext_index, tmp, compute_target - // jump_to_default_block: - // auipc pc, 0 - // jalr zero, pc, default_block - // compute_target: - // auipc pc, 0 - // slli tmp, ext_index, 3 - // add pc, pc, tmp - // jalr zero, pc, 0x10 - // jump_table: - // ; This repeats for each entry in the jumptable - // auipc pc, 0 - // jalr zero, pc, block_target - - // Extend the index to 64 bits. - // - // This prevents us branching on the top 32 bits of the index, which - // are undefined. - Inst::Extend { - rd: ext_index, - rn: index, - signed: false, - from_bits: 32, - to_bits: 64, - } - .emit(&[], sink, emit_info, state); + let tmp1 = allocs.next_writable(tmp1); + let tmp2 = allocs.next_writable(tmp2); + let ext_index = writable_spilltmp_reg(); + + // The default target is passed in as the 0th element of `targets` + // separate it here for clarity. + let default_target = targets[0]; + let targets = &targets[1..]; + + // We emit a bounds check on the index, if the index is larger than the number of + // jump table entries, we jump to the default block. Otherwise we compute a jump + // offset by multiplying the index by 8 (the size of each entry) and then jump to + // that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially. + // + // Build the following sequence: + // + // extend_index: + // zext.w ext_index, index + // bounds_check: + // li tmp, n_labels + // bltu ext_index, tmp, compute_target + // jump_to_default_block: + // auipc pc, 0 + // jalr zero, pc, default_block + // compute_target: + // auipc pc, 0 + // slli tmp, ext_index, 3 + // add pc, pc, tmp + // jalr zero, pc, 0x10 + // jump_table: + // ; This repeats for each entry in the jumptable + // auipc pc, 0 + // jalr zero, pc, block_target + + // Extend the index to 64 bits. + // + // This prevents us branching on the top 32 bits of the index, which + // are undefined. + Inst::Extend { + rd: ext_index, + rn: index, + signed: false, + from_bits: 32, + to_bits: 64, + } + .emit(&[], sink, emit_info, state); - // Bounds check. - // - // Check if the index passed in is larger than the number of jumptable - // entries that we have. If it is, we fallthrough to a jump into the - // default block. - Inst::load_constant_u32(tmp2, targets.len() as u64, &mut |_| tmp2) - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - Inst::CondBr { - taken: BranchTarget::offset(Inst::INSTRUCTION_SIZE * 3), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::UnsignedLessThan, - rs1: ext_index.to_reg(), - rs2: tmp2.to_reg(), - }, - } - .emit(&[], sink, emit_info, state); - sink.use_label_at_offset( - sink.cur_offset(), - default_target.as_label().unwrap(), - LabelUse::PCRel32, - ); - Inst::construct_auipc_and_jalr(None, tmp2, 0) - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); + // Bounds check. + // + // Check if the index passed in is larger than the number of jumptable + // entries that we have. If it is, we fallthrough to a jump into the + // default block. + Inst::load_constant_u32(tmp2, targets.len() as u64, &mut |_| tmp2) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::CondBr { + taken: BranchTarget::offset(Inst::INSTRUCTION_SIZE * 3), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::UnsignedLessThan, + rs1: ext_index.to_reg(), + rs2: tmp2.to_reg(), + }, + } + .emit(&[], sink, emit_info, state); + sink.use_label_at_offset( + sink.cur_offset(), + default_target.as_label().unwrap(), + LabelUse::PCRel32, + ); + Inst::construct_auipc_and_jalr(None, tmp2, 0) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); - // Compute the jump table offset. - // We need to emit a PC relative offset, + // Compute the jump table offset. + // We need to emit a PC relative offset, - // Get the current PC. - Inst::Auipc { - rd: tmp1, - imm: Imm20::from_bits(0), - } - .emit(&[], sink, emit_info, state); + // Get the current PC. + Inst::Auipc { + rd: tmp1, + imm: Imm20::from_bits(0), + } + .emit(&[], sink, emit_info, state); - // Multiply the index by 8, since that is the size in - // bytes of each jump table entry - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd: tmp2, - rs: ext_index.to_reg(), - imm12: Imm12::from_bits(3), - } - .emit(&[], sink, emit_info, state); + // Multiply the index by 8, since that is the size in + // bytes of each jump table entry + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp2, + rs: ext_index.to_reg(), + imm12: Imm12::from_bits(3), + } + .emit(&[], sink, emit_info, state); - // Calculate the base of the jump, PC + the offset from above. - Inst::AluRRR { - alu_op: AluOPRRR::Add, - rd: tmp1, - rs1: tmp1.to_reg(), - rs2: tmp2.to_reg(), - } - .emit(&[], sink, emit_info, state); + // Calculate the base of the jump, PC + the offset from above. + Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: tmp1, + rs1: tmp1.to_reg(), + rs2: tmp2.to_reg(), + } + .emit(&[], sink, emit_info, state); - // Jump to the middle of the jump table. - // We add a 16 byte offset here, since we used 4 instructions - // since the AUIPC that was used to get the PC. - Inst::Jalr { - rd: writable_zero_reg(), - base: tmp1.to_reg(), - offset: Imm12::from_bits((4 * Inst::INSTRUCTION_SIZE) as i16), - } - .emit(&[], sink, emit_info, state); + // Jump to the middle of the jump table. + // We add a 16 byte offset here, since we used 4 instructions + // since the AUIPC that was used to get the PC. + Inst::Jalr { + rd: writable_zero_reg(), + base: tmp1.to_reg(), + offset: Imm12::from_bits((4 * Inst::INSTRUCTION_SIZE) as i16), + } + .emit(&[], sink, emit_info, state); - // Emit the jump table. - // - // Each entry is a aupc + jalr to the target block. We also start with a island - // if necessary. - - // Each entry in the jump table is 2 instructions, so 8 bytes. Check if - // we need to emit a jump table here to support that jump. - let distance = (targets.len() * 2 * Inst::INSTRUCTION_SIZE as usize) as u32; - if sink.island_needed(distance) { - sink.emit_island(&mut state.ctrl_plane); - } + // Emit the jump table. + // + // Each entry is a aupc + jalr to the target block. We also start with a island + // if necessary. - // Emit the jumps back to back - for target in targets.iter() { - sink.use_label_at_offset( - sink.cur_offset(), - target.as_label().unwrap(), - LabelUse::PCRel32, - ); + // Each entry in the jump table is 2 instructions, so 8 bytes. Check if + // we need to emit a jump table here to support that jump. + let distance = (targets.len() * 2 * Inst::INSTRUCTION_SIZE as usize) as u32; + if sink.island_needed(distance) { + sink.emit_island(&mut state.ctrl_plane); + } - Inst::construct_auipc_and_jalr(None, tmp2, 0) - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - } + // Emit the jumps back to back + for target in targets.iter() { + sink.use_label_at_offset( + sink.cur_offset(), + target.as_label().unwrap(), + LabelUse::PCRel32, + ); + + Inst::construct_auipc_and_jalr(None, tmp2, 0) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } - // We've just emitted an island that is safe up to *here*. - // Mark it as such so that we don't needlessly emit additional islands. - start_off = sink.cur_offset(); */ + // We've just emitted an island that is safe up to *here*. + // Mark it as such so that we don't needlessly emit additional islands. + start_off = sink.cur_offset(); */ } &Inst::VirtualSPOffsetAdj { amount } => { todo!() /* crate::trace!( - "virtual sp offset adjusted by {} -> {}", - amount, - state.virtual_sp_offset + amount - ); - state.virtual_sp_offset += amount; */ + "virtual sp offset adjusted by {} -> {}", + amount, + state.virtual_sp_offset + amount + ); + state.virtual_sp_offset += amount; */ } &Inst::Atomic { op, @@ -1295,95 +1326,95 @@ impl MachInstEmit for Inst { amo, } => { todo!() /* let addr = allocs.next(addr); - let src = allocs.next(src); - let rd = allocs.next_writable(rd); - let srcloc = state.cur_srcloc(); - if !srcloc.is_default() { - sink.add_trap(TrapCode::HeapOutOfBounds); - } - let x = op.op_code() - | reg_to_gpr_num(rd.to_reg()) << 7 - | op.funct3() << 12 - | reg_to_gpr_num(addr) << 15 - | reg_to_gpr_num(src) << 20 - | op.funct7(amo) << 25; + let src = allocs.next(src); + let rd = allocs.next_writable(rd); + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() { + sink.add_trap(TrapCode::HeapOutOfBounds); + } + let x = op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | op.funct3() << 12 + | reg_to_gpr_num(addr) << 15 + | reg_to_gpr_num(src) << 20 + | op.funct7(amo) << 25; - sink.put4(x); */ + sink.put4(x); */ } &Inst::Fence { pred, succ } => { todo!() /* let x = 0b0001111 - | 0b00000 << 7 - | 0b000 << 12 - | 0b00000 << 15 - | (succ as u32) << 20 - | (pred as u32) << 24; + | 0b00000 << 7 + | 0b000 << 12 + | 0b00000 << 15 + | (succ as u32) << 20 + | (pred as u32) << 24; - sink.put4(x); */ + sink.put4(x); */ } &Inst::FenceI => todo!(), // sink.put4(0x0000100f), &Inst::Auipc { rd, imm } => { todo!() /* let rd = allocs.next_writable(rd); - let x = enc_auipc(rd, imm); - sink.put4(x); */ + let x = enc_auipc(rd, imm); + sink.put4(x); */ } &Inst::LoadAddr { rd, mem } => { todo!() /* let mem = mem.with_allocs(&mut allocs); - let rd = allocs.next_writable(rd); - - let base = mem.get_base_register(); - let offset = mem.get_offset_with_state(state); - let offset_imm12 = Imm12::maybe_from_u64(offset as u64); - - match (mem, base, offset_imm12) { - (_, Some(rs), Some(imm12)) => { - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd, - rs, - imm12, - } - .emit(&[], sink, emit_info, state); - } - (_, Some(rs), None) => { - LoadConstant::U64(offset as u64) - .load_constant_and_add(rd, rs) - .into_iter() - .for_each(|inst| inst.emit(&[], sink, emit_info, state)); - } - (AMode::Const(addr), None, _) => { - // Get an address label for the constant and recurse. - let label = sink.get_label_for_constant(addr); - Inst::LoadAddr { - rd, - mem: AMode::Label(label), - } - .emit(&[], sink, emit_info, state); - } - (AMode::Label(label), None, _) => { - // Get the current PC. - sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20); - let inst = Inst::Auipc { - rd, - imm: Imm20::from_bits(0), - }; - inst.emit(&[], sink, emit_info, state); - - // Emit an add to the address with a relocation. - // This later gets patched up with the correct offset. - sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd, - rs: rd.to_reg(), - imm12: Imm12::zero(), - } - .emit(&[], sink, emit_info, state); - } - (amode, _, _) => { - unimplemented!("LoadAddr: {:?}", amode); - } - } */ + let rd = allocs.next_writable(rd); + + let base = mem.get_base_register(); + let offset = mem.get_offset_with_state(state); + let offset_imm12 = Imm12::maybe_from_u64(offset as u64); + + match (mem, base, offset_imm12) { + (_, Some(rs), Some(imm12)) => { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd, + rs, + imm12, + } + .emit(&[], sink, emit_info, state); + } + (_, Some(rs), None) => { + LoadConstant::U64(offset as u64) + .load_constant_and_add(rd, rs) + .into_iter() + .for_each(|inst| inst.emit(&[], sink, emit_info, state)); + } + (AMode::Const(addr), None, _) => { + // Get an address label for the constant and recurse. + let label = sink.get_label_for_constant(addr); + Inst::LoadAddr { + rd, + mem: AMode::Label(label), + } + .emit(&[], sink, emit_info, state); + } + (AMode::Label(label), None, _) => { + // Get the current PC. + sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20); + let inst = Inst::Auipc { + rd, + imm: Imm20::from_bits(0), + }; + inst.emit(&[], sink, emit_info, state); + + // Emit an add to the address with a relocation. + // This later gets patched up with the correct offset. + sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd, + rs: rd.to_reg(), + imm12: Imm12::zero(), + } + .emit(&[], sink, emit_info, state); + } + (amode, _, _) => { + unimplemented!("LoadAddr: {:?}", amode); + } + } */ } &Inst::Select { @@ -1394,55 +1425,55 @@ impl MachInstEmit for Inst { ty: _ty, } => { todo!() /* let condition = allocs.next(condition); - let x = alloc_value_regs(x, &mut allocs); - let y = alloc_value_regs(y, &mut allocs); - let dst: Vec<_> = dst - .clone() - .into_iter() - .map(|r| allocs.next_writable(r)) - .collect(); - - let mut insts = SmallInstVec::new(); - let label_false = sink.get_label(); - insts.push(Inst::CondBr { - taken: BranchTarget::Label(label_false), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::Equal, - rs1: condition, - rs2: zero_reg(), - }, - }); - // here is the true - // select the first value - insts.extend(gen_moves(&dst[..], x.regs())); - let label_jump_over = sink.get_label(); - insts.push(Inst::Jal { - dest: BranchTarget::Label(label_jump_over), - }); - // here is false - insts - .drain(..) - .for_each(|i: Inst| i.emit(&[], sink, emit_info, state)); - sink.bind_label(label_false, &mut state.ctrl_plane); - // select second value1 - insts.extend(gen_moves(&dst[..], y.regs())); - insts - .into_iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ - } - &Inst::Jalr { rd, base, offset } => { - todo!() /* let rd = allocs.next_writable(rd); - let x = enc_jalr(rd, base, offset); - sink.put4(x); */ - } - &Inst::ECall => { - todo!() // sink.put4(0x00000073); - } - &Inst::EBreak => { - todo!() // sink.put4(0x00100073); - } + let x = alloc_value_regs(x, &mut allocs); + let y = alloc_value_regs(y, &mut allocs); + let dst: Vec<_> = dst + .clone() + .into_iter() + .map(|r| allocs.next_writable(r)) + .collect(); + + let mut insts = SmallInstVec::new(); + let label_false = sink.get_label(); + insts.push(Inst::CondBr { + taken: BranchTarget::Label(label_false), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: condition, + rs2: zero_reg(), + }, + }); + // here is the true + // select the first value + insts.extend(gen_moves(&dst[..], x.regs())); + let label_jump_over = sink.get_label(); + insts.push(Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + }); + // here is false + insts + .drain(..) + .for_each(|i: Inst| i.emit(&[], sink, emit_info, state)); + sink.bind_label(label_false, &mut state.ctrl_plane); + // select second value1 + insts.extend(gen_moves(&dst[..], y.regs())); + insts + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ + } + &Inst::Jalr { rd, base, offset } => { + todo!() /* let rd = allocs.next_writable(rd); + let x = enc_jalr(rd, base, offset); + sink.put4(x); */ + } + &Inst::ECall => { + todo!() // sink.put4(0x00000073); + } + &Inst::EBreak => { + todo!() // sink.put4(0x00100073); + } &Inst::Icmp { cc, rd, @@ -1452,7 +1483,7 @@ impl MachInstEmit for Inst { } => { put_string(&format!("{a:?}, {b:?} => {:?} : CMP\n", rd), sink); - /* let a = alloc_value_regs(a, &mut allocs); + /* let a = alloc_value_regs(a, &mut allocs); let b = alloc_value_regs(b, &mut allocs); let rd = allocs.next_writable(rd); let label_true = sink.get_label(); @@ -1487,306 +1518,306 @@ impl MachInstEmit for Inst { ty, } => { todo!() /* let offset = allocs.next(offset); - let e = allocs.next(e); - let addr = allocs.next(addr); - let v = allocs.next(v); - let t0 = allocs.next_writable(t0); - let dst = allocs.next_writable(dst); - - // # addr holds address of memory location - // # e holds expected value - // # v holds desired value - // # dst holds return value - // cas: - // lr.w dst, (addr) # Load original value. - // bne dst, e, fail # Doesn’t match, so fail. - // sc.w t0, v, (addr) # Try to update. - // bnez t0 , cas # if store not ok,retry. - // fail: - let fail_label = sink.get_label(); - let cas_lebel = sink.get_label(); - sink.bind_label(cas_lebel, &mut state.ctrl_plane); - Inst::Atomic { - op: AtomicOP::load_op(ty), - rd: dst, - addr, - src: zero_reg(), - amo: AMO::SeqCst, - } - .emit(&[], sink, emit_info, state); - if ty.bits() < 32 { - AtomicOP::extract(dst, offset, dst.to_reg(), ty) - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - } else if ty.bits() == 32 { - Inst::Extend { - rd: dst, - rn: dst.to_reg(), - signed: false, - from_bits: 32, - to_bits: 64, - } - .emit(&[], sink, emit_info, state); - } - Inst::CondBr { - taken: BranchTarget::Label(fail_label), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::NotEqual, - rs1: e, - rs2: dst.to_reg(), - }, - } - .emit(&[], sink, emit_info, state); - let store_value = if ty.bits() < 32 { - // reload value to t0. - Inst::Atomic { - op: AtomicOP::load_op(ty), - rd: t0, - addr, - src: zero_reg(), - amo: AMO::SeqCst, - } - .emit(&[], sink, emit_info, state); - // set reset part. - AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty) - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - t0.to_reg() - } else { - v - }; - Inst::Atomic { - op: AtomicOP::store_op(ty), - rd: t0, - addr, - src: store_value, - amo: AMO::SeqCst, - } - .emit(&[], sink, emit_info, state); - // check is our value stored. - Inst::CondBr { - taken: BranchTarget::Label(cas_lebel), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::NotEqual, - rs1: t0.to_reg(), - rs2: zero_reg(), - }, - } - .emit(&[], sink, emit_info, state); - sink.bind_label(fail_label, &mut state.ctrl_plane); */ - } - &Inst::AtomicRmwLoop { - offset, - op, - dst, - ty, - p, - x, - t0, - } => { - todo!() /* let offset = allocs.next(offset); - let p = allocs.next(p); - let x = allocs.next(x); - let t0 = allocs.next_writable(t0); - let dst = allocs.next_writable(dst); - let retry = sink.get_label(); - sink.bind_label(retry, &mut state.ctrl_plane); - // load old value. - Inst::Atomic { - op: AtomicOP::load_op(ty), - rd: dst, - addr: p, - src: zero_reg(), - amo: AMO::SeqCst, - } - .emit(&[], sink, emit_info, state); - // - - let store_value: Reg = match op { - crate::ir::AtomicRmwOp::Add - | crate::ir::AtomicRmwOp::Sub - | crate::ir::AtomicRmwOp::And - | crate::ir::AtomicRmwOp::Or - | crate::ir::AtomicRmwOp::Xor => { - AtomicOP::extract(dst, offset, dst.to_reg(), ty) - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - Inst::AluRRR { - alu_op: match op { - crate::ir::AtomicRmwOp::Add => AluOPRRR::Add, - crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub, - crate::ir::AtomicRmwOp::And => AluOPRRR::And, - crate::ir::AtomicRmwOp::Or => AluOPRRR::Or, - crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor, - _ => unreachable!(), - }, - rd: t0, - rs1: dst.to_reg(), - rs2: x, - } - .emit(&[], sink, emit_info, state); + let e = allocs.next(e); + let addr = allocs.next(addr); + let v = allocs.next(v); + let t0 = allocs.next_writable(t0); + let dst = allocs.next_writable(dst); + + // # addr holds address of memory location + // # e holds expected value + // # v holds desired value + // # dst holds return value + // cas: + // lr.w dst, (addr) # Load original value. + // bne dst, e, fail # Doesn’t match, so fail. + // sc.w t0, v, (addr) # Try to update. + // bnez t0 , cas # if store not ok,retry. + // fail: + let fail_label = sink.get_label(); + let cas_lebel = sink.get_label(); + sink.bind_label(cas_lebel, &mut state.ctrl_plane); Inst::Atomic { op: AtomicOP::load_op(ty), - rd: writable_spilltmp_reg2(), - addr: p, + rd: dst, + addr, src: zero_reg(), amo: AMO::SeqCst, } .emit(&[], sink, emit_info, state); - AtomicOP::merge( - writable_spilltmp_reg2(), - writable_spilltmp_reg(), - offset, - t0.to_reg(), - ty, - ) - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - spilltmp_reg2() - } - crate::ir::AtomicRmwOp::Nand => { if ty.bits() < 32 { AtomicOP::extract(dst, offset, dst.to_reg(), ty) .iter() .for_each(|i| i.emit(&[], sink, emit_info, state)); + } else if ty.bits() == 32 { + Inst::Extend { + rd: dst, + rn: dst.to_reg(), + signed: false, + from_bits: 32, + to_bits: 64, + } + .emit(&[], sink, emit_info, state); } - Inst::AluRRR { - alu_op: AluOPRRR::And, - rd: t0, - rs1: x, - rs2: dst.to_reg(), + Inst::CondBr { + taken: BranchTarget::Label(fail_label), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: e, + rs2: dst.to_reg(), + }, } .emit(&[], sink, emit_info, state); - Inst::construct_bit_not(t0, t0.to_reg()).emit(&[], sink, emit_info, state); - if ty.bits() < 32 { + let store_value = if ty.bits() < 32 { + // reload value to t0. Inst::Atomic { op: AtomicOP::load_op(ty), - rd: writable_spilltmp_reg2(), - addr: p, + rd: t0, + addr, src: zero_reg(), amo: AMO::SeqCst, } .emit(&[], sink, emit_info, state); - AtomicOP::merge( - writable_spilltmp_reg2(), - writable_spilltmp_reg(), - offset, - t0.to_reg(), - ty, - ) - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - spilltmp_reg2() - } else { + // set reset part. + AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); t0.to_reg() - } - } - - crate::ir::AtomicRmwOp::Umin - | crate::ir::AtomicRmwOp::Umax - | crate::ir::AtomicRmwOp::Smin - | crate::ir::AtomicRmwOp::Smax => { - let label_select_dst = sink.get_label(); - let label_select_done = sink.get_label(); - if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax - { - AtomicOP::extract(dst, offset, dst.to_reg(), ty) } else { - AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty) + v + }; + Inst::Atomic { + op: AtomicOP::store_op(ty), + rd: t0, + addr, + src: store_value, + amo: AMO::SeqCst, } - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - Inst::lower_br_icmp( - match op { - crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan, - crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan, - crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan, - crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan, - _ => unreachable!(), + .emit(&[], sink, emit_info, state); + // check is our value stored. + Inst::CondBr { + taken: BranchTarget::Label(cas_lebel), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: t0.to_reg(), + rs2: zero_reg(), }, - ValueRegs::one(dst.to_reg()), - ValueRegs::one(x), - BranchTarget::Label(label_select_dst), - BranchTarget::zero(), - ty, - ) - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - // here we select x. - Inst::gen_move(t0, x, I64).emit(&[], sink, emit_info, state); - Inst::Jal { - dest: BranchTarget::Label(label_select_done), } .emit(&[], sink, emit_info, state); - sink.bind_label(label_select_dst, &mut state.ctrl_plane); - Inst::gen_move(t0, dst.to_reg(), I64).emit(&[], sink, emit_info, state); - sink.bind_label(label_select_done, &mut state.ctrl_plane); + sink.bind_label(fail_label, &mut state.ctrl_plane); */ + } + &Inst::AtomicRmwLoop { + offset, + op, + dst, + ty, + p, + x, + t0, + } => { + todo!() /* let offset = allocs.next(offset); + let p = allocs.next(p); + let x = allocs.next(x); + let t0 = allocs.next_writable(t0); + let dst = allocs.next_writable(dst); + let retry = sink.get_label(); + sink.bind_label(retry, &mut state.ctrl_plane); + // load old value. Inst::Atomic { op: AtomicOP::load_op(ty), - rd: writable_spilltmp_reg2(), + rd: dst, addr: p, src: zero_reg(), amo: AMO::SeqCst, } .emit(&[], sink, emit_info, state); - AtomicOP::merge( - writable_spilltmp_reg2(), - writable_spilltmp_reg(), - offset, - t0.to_reg(), - ty, - ) - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - spilltmp_reg2() - } - crate::ir::AtomicRmwOp::Xchg => { - AtomicOP::extract(dst, offset, dst.to_reg(), ty) - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); + // + + let store_value: Reg = match op { + crate::ir::AtomicRmwOp::Add + | crate::ir::AtomicRmwOp::Sub + | crate::ir::AtomicRmwOp::And + | crate::ir::AtomicRmwOp::Or + | crate::ir::AtomicRmwOp::Xor => { + AtomicOP::extract(dst, offset, dst.to_reg(), ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::AluRRR { + alu_op: match op { + crate::ir::AtomicRmwOp::Add => AluOPRRR::Add, + crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub, + crate::ir::AtomicRmwOp::And => AluOPRRR::And, + crate::ir::AtomicRmwOp::Or => AluOPRRR::Or, + crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor, + _ => unreachable!(), + }, + rd: t0, + rs1: dst.to_reg(), + rs2: x, + } + .emit(&[], sink, emit_info, state); + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: writable_spilltmp_reg2(), + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + AtomicOP::merge( + writable_spilltmp_reg2(), + writable_spilltmp_reg(), + offset, + t0.to_reg(), + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + spilltmp_reg2() + } + crate::ir::AtomicRmwOp::Nand => { + if ty.bits() < 32 { + AtomicOP::extract(dst, offset, dst.to_reg(), ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: t0, + rs1: x, + rs2: dst.to_reg(), + } + .emit(&[], sink, emit_info, state); + Inst::construct_bit_not(t0, t0.to_reg()).emit(&[], sink, emit_info, state); + if ty.bits() < 32 { + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: writable_spilltmp_reg2(), + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + AtomicOP::merge( + writable_spilltmp_reg2(), + writable_spilltmp_reg(), + offset, + t0.to_reg(), + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + spilltmp_reg2() + } else { + t0.to_reg() + } + } + + crate::ir::AtomicRmwOp::Umin + | crate::ir::AtomicRmwOp::Umax + | crate::ir::AtomicRmwOp::Smin + | crate::ir::AtomicRmwOp::Smax => { + let label_select_dst = sink.get_label(); + let label_select_done = sink.get_label(); + if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax + { + AtomicOP::extract(dst, offset, dst.to_reg(), ty) + } else { + AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty) + } + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::lower_br_icmp( + match op { + crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan, + crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan, + crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan, + crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan, + _ => unreachable!(), + }, + ValueRegs::one(dst.to_reg()), + ValueRegs::one(x), + BranchTarget::Label(label_select_dst), + BranchTarget::zero(), + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + // here we select x. + Inst::gen_move(t0, x, I64).emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_select_done), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_select_dst, &mut state.ctrl_plane); + Inst::gen_move(t0, dst.to_reg(), I64).emit(&[], sink, emit_info, state); + sink.bind_label(label_select_done, &mut state.ctrl_plane); + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: writable_spilltmp_reg2(), + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + AtomicOP::merge( + writable_spilltmp_reg2(), + writable_spilltmp_reg(), + offset, + t0.to_reg(), + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + spilltmp_reg2() + } + crate::ir::AtomicRmwOp::Xchg => { + AtomicOP::extract(dst, offset, dst.to_reg(), ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: writable_spilltmp_reg2(), + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + AtomicOP::merge( + writable_spilltmp_reg2(), + writable_spilltmp_reg(), + offset, + x, + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + spilltmp_reg2() + } + }; + Inst::Atomic { - op: AtomicOP::load_op(ty), - rd: writable_spilltmp_reg2(), + op: AtomicOP::store_op(ty), + rd: t0, addr: p, - src: zero_reg(), + src: store_value, amo: AMO::SeqCst, } .emit(&[], sink, emit_info, state); - AtomicOP::merge( - writable_spilltmp_reg2(), - writable_spilltmp_reg(), - offset, - x, - ty, - ) - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - spilltmp_reg2() - } - }; - Inst::Atomic { - op: AtomicOP::store_op(ty), - rd: t0, - addr: p, - src: store_value, - amo: AMO::SeqCst, - } - .emit(&[], sink, emit_info, state); - - // if store is not ok,retry. - Inst::CondBr { - taken: BranchTarget::Label(retry), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::NotEqual, - rs1: t0.to_reg(), - rs2: zero_reg(), - }, - } - .emit(&[], sink, emit_info, state); */ + // if store is not ok,retry. + Inst::CondBr { + taken: BranchTarget::Label(retry), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: t0.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); */ } &Inst::IntSelect { @@ -1797,51 +1828,51 @@ impl MachInstEmit for Inst { ty, } => { todo!() /* let x = alloc_value_regs(x, &mut allocs); - let y = alloc_value_regs(y, &mut allocs); - let dst: Vec<_> = dst.iter().map(|r| allocs.next_writable(*r)).collect(); - let label_true = sink.get_label(); - let label_false = sink.get_label(); - let label_done = sink.get_label(); - Inst::lower_br_icmp( - op.to_int_cc(), - x, - y, - BranchTarget::Label(label_true), - BranchTarget::Label(label_false), - ty, - ) - .into_iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - - let gen_move = |dst: &Vec>, - val: &ValueRegs, - sink: &mut MachBuffer, - state: &mut EmitState| { - let mut insts = SmallInstVec::new(); - insts.push(Inst::Mov { - rd: dst[0], - rm: val.regs()[0], - ty: I64, - }); - if ty.bits() == 128 { - insts.push(Inst::Mov { - rd: dst[1], - rm: val.regs()[1], + let y = alloc_value_regs(y, &mut allocs); + let dst: Vec<_> = dst.iter().map(|r| allocs.next_writable(*r)).collect(); + let label_true = sink.get_label(); + let label_false = sink.get_label(); + let label_done = sink.get_label(); + Inst::lower_br_icmp( + op.to_int_cc(), + x, + y, + BranchTarget::Label(label_true), + BranchTarget::Label(label_false), ty, - }); - } - insts + ) .into_iter() .for_each(|i| i.emit(&[], sink, emit_info, state)); - }; - //here is true , use x. - sink.bind_label(label_true, &mut state.ctrl_plane); - gen_move(&dst, &x, sink, state); - Inst::gen_jump(label_done).emit(&[], sink, emit_info, state); - // here is false use y - sink.bind_label(label_false, &mut state.ctrl_plane); - gen_move(&dst, &y, sink, state); - sink.bind_label(label_done, &mut state.ctrl_plane); */ + + let gen_move = |dst: &Vec>, + val: &ValueRegs, + sink: &mut MachBuffer, + state: &mut EmitState| { + let mut insts = SmallInstVec::new(); + insts.push(Inst::Mov { + rd: dst[0], + rm: val.regs()[0], + ty: I64, + }); + if ty.bits() == 128 { + insts.push(Inst::Mov { + rd: dst[1], + rm: val.regs()[1], + ty, + }); + } + insts + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + }; + //here is true , use x. + sink.bind_label(label_true, &mut state.ctrl_plane); + gen_move(&dst, &x, sink, state); + Inst::gen_jump(label_done).emit(&[], sink, emit_info, state); + // here is false use y + sink.bind_label(label_false, &mut state.ctrl_plane); + gen_move(&dst, &y, sink, state); + sink.bind_label(label_done, &mut state.ctrl_plane); */ } &Inst::SelectReg { @@ -1851,29 +1882,29 @@ impl MachInstEmit for Inst { rs2, } => { todo!() /* let mut condition = condition.clone(); - condition.rs1 = allocs.next(condition.rs1); - condition.rs2 = allocs.next(condition.rs2); - let rs1 = allocs.next(rs1); - let rs2 = allocs.next(rs2); - let rd = allocs.next_writable(rd); - let label_true = sink.get_label(); - let label_jump_over = sink.get_label(); - let ty = Inst::canonical_type_for_rc(rs1.class()); - - sink.use_label_at_offset(sink.cur_offset(), label_true, LabelUse::B12); - let x = condition.emit(); - sink.put4(x); - // here is false , use rs2 - Inst::gen_move(rd, rs2, ty).emit(&[], sink, emit_info, state); - // and jump over - Inst::Jal { - dest: BranchTarget::Label(label_jump_over), - } - .emit(&[], sink, emit_info, state); - // here condition is true , use rs1 - sink.bind_label(label_true, &mut state.ctrl_plane); - Inst::gen_move(rd, rs1, ty).emit(&[], sink, emit_info, state); - sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ + condition.rs1 = allocs.next(condition.rs1); + condition.rs2 = allocs.next(condition.rs2); + let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let rd = allocs.next_writable(rd); + let label_true = sink.get_label(); + let label_jump_over = sink.get_label(); + let ty = Inst::canonical_type_for_rc(rs1.class()); + + sink.use_label_at_offset(sink.cur_offset(), label_true, LabelUse::B12); + let x = condition.emit(); + sink.put4(x); + // here is false , use rs2 + Inst::gen_move(rd, rs2, ty).emit(&[], sink, emit_info, state); + // and jump over + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here condition is true , use rs1 + sink.bind_label(label_true, &mut state.ctrl_plane); + Inst::gen_move(rd, rs1, ty).emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ } &Inst::FcvtToInt { is_sat, @@ -1885,166 +1916,166 @@ impl MachInstEmit for Inst { tmp, } => { todo!() /* let rs = allocs.next(rs); - let tmp = allocs.next_writable(tmp); - let rd = allocs.next_writable(rd); - let label_nan = sink.get_label(); - let label_jump_over = sink.get_label(); - // get if nan. - Inst::emit_not_nan(rd, rs, in_type).emit(&[], sink, emit_info, state); - // jump to nan. - Inst::CondBr { - taken: BranchTarget::Label(label_nan), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::Equal, - rs2: zero_reg(), - rs1: rd.to_reg(), - }, - } - .emit(&[], sink, emit_info, state); + let tmp = allocs.next_writable(tmp); + let rd = allocs.next_writable(rd); + let label_nan = sink.get_label(); + let label_jump_over = sink.get_label(); + // get if nan. + Inst::emit_not_nan(rd, rs, in_type).emit(&[], sink, emit_info, state); + // jump to nan. + Inst::CondBr { + taken: BranchTarget::Label(label_nan), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs2: zero_reg(), + rs1: rd.to_reg(), + }, + } + .emit(&[], sink, emit_info, state); - if !is_sat { - let f32_bounds = f32_cvt_to_int_bounds(is_signed, out_type.bits() as u8); - let f64_bounds = f64_cvt_to_int_bounds(is_signed, out_type.bits() as u8); - if in_type == F32 { - Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.0), |_| { - writable_spilltmp_reg() - }) - } else { - Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.0), |_| { - writable_spilltmp_reg() - }) - } - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - - let le_op = if in_type == F32 { - FpuOPRRR::FleS - } else { - FpuOPRRR::FleD - }; - - // rd := rs <= tmp - Inst::FpuRRR { - alu_op: le_op, - frm: None, - rd, - rs1: rs, - rs2: tmp.to_reg(), - } - .emit(&[], sink, emit_info, state); - Inst::TrapIf { - test: rd.to_reg(), - trap_code: TrapCode::IntegerOverflow, - } - .emit(&[], sink, emit_info, state); + if !is_sat { + let f32_bounds = f32_cvt_to_int_bounds(is_signed, out_type.bits() as u8); + let f64_bounds = f64_cvt_to_int_bounds(is_signed, out_type.bits() as u8); + if in_type == F32 { + Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.0), |_| { + writable_spilltmp_reg() + }) + } else { + Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.0), |_| { + writable_spilltmp_reg() + }) + } + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); - if in_type == F32 { - Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.1), |_| { - writable_spilltmp_reg() - }) - } else { - Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.1), |_| { - writable_spilltmp_reg() - }) - } - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - - // rd := rs >= tmp - Inst::FpuRRR { - alu_op: le_op, - frm: None, - rd, - rs1: tmp.to_reg(), - rs2: rs, - } - .emit(&[], sink, emit_info, state); + let le_op = if in_type == F32 { + FpuOPRRR::FleS + } else { + FpuOPRRR::FleD + }; + + // rd := rs <= tmp + Inst::FpuRRR { + alu_op: le_op, + frm: None, + rd, + rs1: rs, + rs2: tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + Inst::TrapIf { + test: rd.to_reg(), + trap_code: TrapCode::IntegerOverflow, + } + .emit(&[], sink, emit_info, state); - Inst::TrapIf { - test: rd.to_reg(), - trap_code: TrapCode::IntegerOverflow, - } - .emit(&[], sink, emit_info, state); - } - // convert to int normally. - Inst::FpuRR { - frm: Some(FRM::RTZ), - alu_op: FpuOPRR::float_convert_2_int_op(in_type, is_signed, out_type), - rd, - rs, - } - .emit(&[], sink, emit_info, state); - if out_type.bits() < 32 && is_signed { - // load value part mask. - Inst::load_constant_u32( - writable_spilltmp_reg(), - if 16 == out_type.bits() { - (u16::MAX >> 1) as u64 - } else { - // I8 - (u8::MAX >> 1) as u64 - }, - &mut |_| writable_spilltmp_reg2(), - ) - .into_iter() - .for_each(|x| x.emit(&[], sink, emit_info, state)); - // keep value part. - Inst::AluRRR { - alu_op: AluOPRRR::And, - rd: writable_spilltmp_reg(), - rs1: rd.to_reg(), - rs2: spilltmp_reg(), - } - .emit(&[], sink, emit_info, state); - // extact sign bit. - Inst::AluRRImm12 { - alu_op: AluOPRRI::Srli, - rd: rd, - rs: rd.to_reg(), - imm12: Imm12::from_bits(31), - } - .emit(&[], sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd: rd, - rs: rd.to_reg(), - imm12: Imm12::from_bits(if 16 == out_type.bits() { - 15 - } else { - // I8 - 7 - }), - } - .emit(&[], sink, emit_info, state); - // make result,sign bit and value part. - Inst::AluRRR { - alu_op: AluOPRRR::Or, - rd: rd, - rs1: rd.to_reg(), - rs2: spilltmp_reg(), - } - .emit(&[], sink, emit_info, state); - } + if in_type == F32 { + Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.1), |_| { + writable_spilltmp_reg() + }) + } else { + Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.1), |_| { + writable_spilltmp_reg() + }) + } + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); - // I already have the result,jump over. - Inst::Jal { - dest: BranchTarget::Label(label_jump_over), - } - .emit(&[], sink, emit_info, state); - // here is nan , move 0 into rd register - sink.bind_label(label_nan, &mut state.ctrl_plane); - if is_sat { - Inst::load_imm12(rd, Imm12::from_bits(0)).emit(&[], sink, emit_info, state); - } else { - // here is ud2. - Inst::Udf { - trap_code: TrapCode::BadConversionToInteger, - } - .emit(&[], sink, emit_info, state); - } - // bind jump_over - sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ + // rd := rs >= tmp + Inst::FpuRRR { + alu_op: le_op, + frm: None, + rd, + rs1: tmp.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + + Inst::TrapIf { + test: rd.to_reg(), + trap_code: TrapCode::IntegerOverflow, + } + .emit(&[], sink, emit_info, state); + } + // convert to int normally. + Inst::FpuRR { + frm: Some(FRM::RTZ), + alu_op: FpuOPRR::float_convert_2_int_op(in_type, is_signed, out_type), + rd, + rs, + } + .emit(&[], sink, emit_info, state); + if out_type.bits() < 32 && is_signed { + // load value part mask. + Inst::load_constant_u32( + writable_spilltmp_reg(), + if 16 == out_type.bits() { + (u16::MAX >> 1) as u64 + } else { + // I8 + (u8::MAX >> 1) as u64 + }, + &mut |_| writable_spilltmp_reg2(), + ) + .into_iter() + .for_each(|x| x.emit(&[], sink, emit_info, state)); + // keep value part. + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_spilltmp_reg(), + rs1: rd.to_reg(), + rs2: spilltmp_reg(), + } + .emit(&[], sink, emit_info, state); + // extact sign bit. + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: rd, + rs: rd.to_reg(), + imm12: Imm12::from_bits(31), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: rd, + rs: rd.to_reg(), + imm12: Imm12::from_bits(if 16 == out_type.bits() { + 15 + } else { + // I8 + 7 + }), + } + .emit(&[], sink, emit_info, state); + // make result,sign bit and value part. + Inst::AluRRR { + alu_op: AluOPRRR::Or, + rd: rd, + rs1: rd.to_reg(), + rs2: spilltmp_reg(), + } + .emit(&[], sink, emit_info, state); + } + + // I already have the result,jump over. + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here is nan , move 0 into rd register + sink.bind_label(label_nan, &mut state.ctrl_plane); + if is_sat { + Inst::load_imm12(rd, Imm12::from_bits(0)).emit(&[], sink, emit_info, state); + } else { + // here is ud2. + Inst::Udf { + trap_code: TrapCode::BadConversionToInteger, + } + .emit(&[], sink, emit_info, state); + } + // bind jump_over + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ } &Inst::LoadExtName { @@ -2091,87 +2122,87 @@ impl MachInstEmit for Inst { trap_code, } => { todo!() /* let rs1 = allocs.next(rs1); - let rs2 = allocs.next(rs2); - let label_trap = sink.get_label(); - let label_jump_over = sink.get_label(); - Inst::CondBr { - taken: BranchTarget::Label(label_trap), - not_taken: BranchTarget::Label(label_jump_over), - kind: IntegerCompare { kind: cc, rs1, rs2 }, - } - .emit(&[], sink, emit_info, state); - // trap - sink.bind_label(label_trap, &mut state.ctrl_plane); - Inst::Udf { trap_code }.emit(&[], sink, emit_info, state); - sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ + let rs2 = allocs.next(rs2); + let label_trap = sink.get_label(); + let label_jump_over = sink.get_label(); + Inst::CondBr { + taken: BranchTarget::Label(label_trap), + not_taken: BranchTarget::Label(label_jump_over), + kind: IntegerCompare { kind: cc, rs1, rs2 }, + } + .emit(&[], sink, emit_info, state); + // trap + sink.bind_label(label_trap, &mut state.ctrl_plane); + Inst::Udf { trap_code }.emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ } &Inst::TrapIf { test, trap_code } => { todo!() /* let test = allocs.next(test); - let label_trap = sink.get_label(); - let label_jump_over = sink.get_label(); - Inst::CondBr { - taken: BranchTarget::Label(label_trap), - not_taken: BranchTarget::Label(label_jump_over), - kind: IntegerCompare { - kind: IntCC::NotEqual, - rs1: test, - rs2: zero_reg(), - }, - } - .emit(&[], sink, emit_info, state); - // trap - sink.bind_label(label_trap, &mut state.ctrl_plane); - Inst::Udf { - trap_code: trap_code, - } - .emit(&[], sink, emit_info, state); - sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ + let label_trap = sink.get_label(); + let label_jump_over = sink.get_label(); + Inst::CondBr { + taken: BranchTarget::Label(label_trap), + not_taken: BranchTarget::Label(label_jump_over), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: test, + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // trap + sink.bind_label(label_trap, &mut state.ctrl_plane); + Inst::Udf { + trap_code: trap_code, + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ } &Inst::Udf { trap_code } => { todo!() /* sink.add_trap(trap_code); - if let Some(s) = state.take_stack_map() { - sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); - } - sink.put_data(Inst::TRAP_OPCODE); */ + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); + } + sink.put_data(Inst::TRAP_OPCODE); */ } &Inst::AtomicLoad { rd, ty, p } => { todo!() /* let p = allocs.next(p); - let rd = allocs.next_writable(rd); - // emit the fence. - Inst::Fence { - pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, - succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, - } - .emit(&[], sink, emit_info, state); - // load. - Inst::Load { - rd: rd, - op: LoadOP::from_type(ty), - flags: MemFlags::new(), - from: AMode::RegOffset(p, 0, ty), - } - .emit(&[], sink, emit_info, state); - Inst::Fence { - pred: Inst::FENCE_REQ_R, - succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, - } - .emit(&[], sink, emit_info, state); */ + let rd = allocs.next_writable(rd); + // emit the fence. + Inst::Fence { + pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, + succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, + } + .emit(&[], sink, emit_info, state); + // load. + Inst::Load { + rd: rd, + op: LoadOP::from_type(ty), + flags: MemFlags::new(), + from: AMode::RegOffset(p, 0, ty), + } + .emit(&[], sink, emit_info, state); + Inst::Fence { + pred: Inst::FENCE_REQ_R, + succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, + } + .emit(&[], sink, emit_info, state); */ } &Inst::AtomicStore { src, ty, p } => { todo!() /* let src = allocs.next(src); - let p = allocs.next(p); - Inst::Fence { - pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, - succ: Inst::FENCE_REQ_W, - } - .emit(&[], sink, emit_info, state); - Inst::Store { - to: AMode::RegOffset(p, 0, ty), - op: StoreOP::from_type(ty), - flags: MemFlags::new(), - src, - } - .emit(&[], sink, emit_info, state); */ + let p = allocs.next(p); + Inst::Fence { + pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, + succ: Inst::FENCE_REQ_W, + } + .emit(&[], sink, emit_info, state); + Inst::Store { + to: AMode::RegOffset(p, 0, ty), + op: StoreOP::from_type(ty), + flags: MemFlags::new(), + src, + } + .emit(&[], sink, emit_info, state); */ } &Inst::FloatRound { op, @@ -2182,138 +2213,138 @@ impl MachInstEmit for Inst { ty, } => { todo!() /* // this code is port from glibc ceil floor ... implementation. - let rs = allocs.next(rs); - let int_tmp = allocs.next_writable(int_tmp); - let f_tmp = allocs.next_writable(f_tmp); - let rd = allocs.next_writable(rd); - let label_nan = sink.get_label(); - let label_x = sink.get_label(); - let label_jump_over = sink.get_label(); - // check if is nan. - Inst::emit_not_nan(int_tmp, rs, ty).emit(&[], sink, emit_info, state); - Inst::CondBr { - taken: BranchTarget::Label(label_nan), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::Equal, - rs1: int_tmp.to_reg(), - rs2: zero_reg(), - }, - } - .emit(&[], sink, emit_info, state); - fn max_value_need_round(ty: Type) -> u64 { - match ty { - F32 => { - let x: u64 = 1 << f32::MANTISSA_DIGITS; - let x = x as f32; - let x = u32::from_le_bytes(x.to_le_bytes()); - x as u64 - } - F64 => { - let x: u64 = 1 << f64::MANTISSA_DIGITS; - let x = x as f64; - u64::from_le_bytes(x.to_le_bytes()) - } - _ => unreachable!(), - } - } - // load max value need to round. - if ty == F32 { - Inst::load_fp_constant32(f_tmp, max_value_need_round(ty) as u32, &mut |_| { - writable_spilltmp_reg() - }) - } else { - Inst::load_fp_constant64(f_tmp, max_value_need_round(ty), &mut |_| { - writable_spilltmp_reg() - }) - } - .into_iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); + let rs = allocs.next(rs); + let int_tmp = allocs.next_writable(int_tmp); + let f_tmp = allocs.next_writable(f_tmp); + let rd = allocs.next_writable(rd); + let label_nan = sink.get_label(); + let label_x = sink.get_label(); + let label_jump_over = sink.get_label(); + // check if is nan. + Inst::emit_not_nan(int_tmp, rs, ty).emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_nan), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: int_tmp.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + fn max_value_need_round(ty: Type) -> u64 { + match ty { + F32 => { + let x: u64 = 1 << f32::MANTISSA_DIGITS; + let x = x as f32; + let x = u32::from_le_bytes(x.to_le_bytes()); + x as u64 + } + F64 => { + let x: u64 = 1 << f64::MANTISSA_DIGITS; + let x = x as f64; + u64::from_le_bytes(x.to_le_bytes()) + } + _ => unreachable!(), + } + } + // load max value need to round. + if ty == F32 { + Inst::load_fp_constant32(f_tmp, max_value_need_round(ty) as u32, &mut |_| { + writable_spilltmp_reg() + }) + } else { + Inst::load_fp_constant64(f_tmp, max_value_need_round(ty), &mut |_| { + writable_spilltmp_reg() + }) + } + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); - // get abs value. - Inst::emit_fabs(rd, rs, ty).emit(&[], sink, emit_info, state); - - // branch if f_tmp < rd - Inst::FpuRRR { - frm: None, - alu_op: if ty == F32 { - FpuOPRRR::FltS - } else { - FpuOPRRR::FltD - }, - rd: int_tmp, - rs1: f_tmp.to_reg(), - rs2: rd.to_reg(), - } - .emit(&[], sink, emit_info, state); + // get abs value. + Inst::emit_fabs(rd, rs, ty).emit(&[], sink, emit_info, state); - Inst::CondBr { - taken: BranchTarget::Label(label_x), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::NotEqual, - rs1: int_tmp.to_reg(), - rs2: zero_reg(), - }, - } - .emit(&[], sink, emit_info, state); + // branch if f_tmp < rd + Inst::FpuRRR { + frm: None, + alu_op: if ty == F32 { + FpuOPRRR::FltS + } else { + FpuOPRRR::FltD + }, + rd: int_tmp, + rs1: f_tmp.to_reg(), + rs2: rd.to_reg(), + } + .emit(&[], sink, emit_info, state); - //convert to int. - Inst::FpuRR { - alu_op: FpuOPRR::float_convert_2_int_op(ty, true, I64), - frm: Some(op.to_frm()), - rd: int_tmp, - rs: rs, - } - .emit(&[], sink, emit_info, state); - //convert back. - Inst::FpuRR { - alu_op: FpuOPRR::int_convert_2_float_op(I64, true, ty), - frm: Some(op.to_frm()), - rd, - rs: int_tmp.to_reg(), - } - .emit(&[], sink, emit_info, state); - // copy sign. - Inst::FpuRRR { - alu_op: if ty == F32 { - FpuOPRRR::FsgnjS - } else { - FpuOPRRR::FsgnjD - }, - frm: None, - rd, - rs1: rd.to_reg(), - rs2: rs, - } - .emit(&[], sink, emit_info, state); - // jump over. - Inst::Jal { - dest: BranchTarget::Label(label_jump_over), - } - .emit(&[], sink, emit_info, state); - // here is nan. - sink.bind_label(label_nan, &mut state.ctrl_plane); - Inst::FpuRRR { - alu_op: if ty == F32 { - FpuOPRRR::FaddS - } else { - FpuOPRRR::FaddD - }, - frm: None, - rd: rd, - rs1: rs, - rs2: rs, - } - .emit(&[], sink, emit_info, state); - Inst::Jal { - dest: BranchTarget::Label(label_jump_over), - } - .emit(&[], sink, emit_info, state); - // here select origin x. - sink.bind_label(label_x, &mut state.ctrl_plane); - Inst::gen_move(rd, rs, ty).emit(&[], sink, emit_info, state); - sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ + Inst::CondBr { + taken: BranchTarget::Label(label_x), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: int_tmp.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + + //convert to int. + Inst::FpuRR { + alu_op: FpuOPRR::float_convert_2_int_op(ty, true, I64), + frm: Some(op.to_frm()), + rd: int_tmp, + rs: rs, + } + .emit(&[], sink, emit_info, state); + //convert back. + Inst::FpuRR { + alu_op: FpuOPRR::int_convert_2_float_op(I64, true, ty), + frm: Some(op.to_frm()), + rd, + rs: int_tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + // copy sign. + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FsgnjS + } else { + FpuOPRRR::FsgnjD + }, + frm: None, + rd, + rs1: rd.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + // jump over. + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here is nan. + sink.bind_label(label_nan, &mut state.ctrl_plane); + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FaddS + } else { + FpuOPRRR::FaddD + }, + frm: None, + rd: rd, + rs1: rs, + rs2: rs, + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here select origin x. + sink.bind_label(label_x, &mut state.ctrl_plane); + Inst::gen_move(rd, rs, ty).emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ } &Inst::FloatSelect { @@ -2325,123 +2356,123 @@ impl MachInstEmit for Inst { ty, } => { todo!() /* let rs1 = allocs.next(rs1); - let rs2 = allocs.next(rs2); - let tmp = allocs.next_writable(tmp); - let rd = allocs.next_writable(rd); - let label_nan = sink.get_label(); - let label_jump_over = sink.get_label(); - // check if rs1 is nan. - Inst::emit_not_nan(tmp, rs1, ty).emit(&[], sink, emit_info, state); - Inst::CondBr { - taken: BranchTarget::Label(label_nan), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::Equal, - rs1: tmp.to_reg(), - rs2: zero_reg(), - }, - } - .emit(&[], sink, emit_info, state); - // check if rs2 is nan. - Inst::emit_not_nan(tmp, rs2, ty).emit(&[], sink, emit_info, state); - Inst::CondBr { - taken: BranchTarget::Label(label_nan), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::Equal, - rs1: tmp.to_reg(), - rs2: zero_reg(), - }, - } - .emit(&[], sink, emit_info, state); - // here rs1 and rs2 is not nan. - Inst::FpuRRR { - alu_op: op.to_fpuoprrr(ty), - frm: None, - rd: rd, - rs1: rs1, - rs2: rs2, - } - .emit(&[], sink, emit_info, state); - // special handle for +0 or -0. - { - // check is rs1 and rs2 all equal to zero. - let label_done = sink.get_label(); - { - // if rs1 == 0 - let mut insts = Inst::emit_if_float_not_zero( - tmp, - rs1, - ty, - BranchTarget::Label(label_done), - BranchTarget::zero(), - ); - insts.extend(Inst::emit_if_float_not_zero( - tmp, - rs2, - ty, - BranchTarget::Label(label_done), - BranchTarget::zero(), - )); - insts - .iter() + let rs2 = allocs.next(rs2); + let tmp = allocs.next_writable(tmp); + let rd = allocs.next_writable(rd); + let label_nan = sink.get_label(); + let label_jump_over = sink.get_label(); + // check if rs1 is nan. + Inst::emit_not_nan(tmp, rs1, ty).emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_nan), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // check if rs2 is nan. + Inst::emit_not_nan(tmp, rs2, ty).emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_nan), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // here rs1 and rs2 is not nan. + Inst::FpuRRR { + alu_op: op.to_fpuoprrr(ty), + frm: None, + rd: rd, + rs1: rs1, + rs2: rs2, + } + .emit(&[], sink, emit_info, state); + // special handle for +0 or -0. + { + // check is rs1 and rs2 all equal to zero. + let label_done = sink.get_label(); + { + // if rs1 == 0 + let mut insts = Inst::emit_if_float_not_zero( + tmp, + rs1, + ty, + BranchTarget::Label(label_done), + BranchTarget::zero(), + ); + insts.extend(Inst::emit_if_float_not_zero( + tmp, + rs2, + ty, + BranchTarget::Label(label_done), + BranchTarget::zero(), + )); + insts + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + Inst::FpuRR { + alu_op: FpuOPRR::move_f_to_x_op(ty), + frm: None, + rd: tmp, + rs: rs1, + } + .emit(&[], sink, emit_info, state); + Inst::FpuRR { + alu_op: FpuOPRR::move_f_to_x_op(ty), + frm: None, + rd: writable_spilltmp_reg(), + rs: rs2, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRR { + alu_op: if op == FloatSelectOP::Max { + AluOPRRR::And + } else { + AluOPRRR::Or + }, + rd: tmp, + rs1: tmp.to_reg(), + rs2: spilltmp_reg(), + } + .emit(&[], sink, emit_info, state); + // move back to rd. + Inst::FpuRR { + alu_op: FpuOPRR::move_x_to_f_op(ty), + frm: None, + rd, + rs: tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + // + sink.bind_label(label_done, &mut state.ctrl_plane); + } + // we have the reuslt,jump over. + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here is nan. + sink.bind_label(label_nan, &mut state.ctrl_plane); + op.snan_bits(tmp, ty) + .into_iter() .for_each(|i| i.emit(&[], sink, emit_info, state)); - } - Inst::FpuRR { - alu_op: FpuOPRR::move_f_to_x_op(ty), - frm: None, - rd: tmp, - rs: rs1, - } - .emit(&[], sink, emit_info, state); - Inst::FpuRR { - alu_op: FpuOPRR::move_f_to_x_op(ty), - frm: None, - rd: writable_spilltmp_reg(), - rs: rs2, - } - .emit(&[], sink, emit_info, state); - Inst::AluRRR { - alu_op: if op == FloatSelectOP::Max { - AluOPRRR::And - } else { - AluOPRRR::Or - }, - rd: tmp, - rs1: tmp.to_reg(), - rs2: spilltmp_reg(), - } - .emit(&[], sink, emit_info, state); - // move back to rd. - Inst::FpuRR { - alu_op: FpuOPRR::move_x_to_f_op(ty), - frm: None, - rd, - rs: tmp.to_reg(), - } - .emit(&[], sink, emit_info, state); - // - sink.bind_label(label_done, &mut state.ctrl_plane); - } - // we have the reuslt,jump over. - Inst::Jal { - dest: BranchTarget::Label(label_jump_over), - } - .emit(&[], sink, emit_info, state); - // here is nan. - sink.bind_label(label_nan, &mut state.ctrl_plane); - op.snan_bits(tmp, ty) - .into_iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - // move to rd. - Inst::FpuRR { - alu_op: FpuOPRR::move_x_to_f_op(ty), - frm: None, - rd, - rs: tmp.to_reg(), - } - .emit(&[], sink, emit_info, state); - sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ + // move to rd. + Inst::FpuRR { + alu_op: FpuOPRR::move_x_to_f_op(ty), + frm: None, + rd, + rs: tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over, &mut state.ctrl_plane); */ } &Inst::Popcnt { sum, @@ -2451,161 +2482,161 @@ impl MachInstEmit for Inst { ty, } => { todo!() /* let rs = allocs.next(rs); - let tmp = allocs.next_writable(tmp); - let step = allocs.next_writable(step); - let sum = allocs.next_writable(sum); - // load 0 to sum , init. - Inst::gen_move(sum, zero_reg(), I64).emit(&[], sink, emit_info, state); - // load - Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit( - &[], - sink, - emit_info, - state, - ); - // - Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd: tmp, - rs: tmp.to_reg(), - imm12: Imm12::from_bits((ty.bits() - 1) as i16), - } - .emit(&[], sink, emit_info, state); - let label_done = sink.get_label(); - let label_loop = sink.get_label(); - sink.bind_label(label_loop, &mut state.ctrl_plane); - Inst::CondBr { - taken: BranchTarget::Label(label_done), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::SignedLessThanOrEqual, - rs1: step.to_reg(), - rs2: zero_reg(), - }, - } - .emit(&[], sink, emit_info, state); - // test and add sum. - { - Inst::AluRRR { - alu_op: AluOPRRR::And, - rd: writable_spilltmp_reg2(), - rs1: tmp.to_reg(), - rs2: rs, - } - .emit(&[], sink, emit_info, state); - let label_over = sink.get_label(); - Inst::CondBr { - taken: BranchTarget::Label(label_over), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::Equal, - rs1: zero_reg(), - rs2: spilltmp_reg2(), - }, - } - .emit(&[], sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: sum, - rs: sum.to_reg(), - imm12: Imm12::from_bits(1), - } - .emit(&[], sink, emit_info, state); - sink.bind_label(label_over, &mut state.ctrl_plane); - } - // set step and tmp. - { - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: step, - rs: step.to_reg(), - imm12: Imm12::from_bits(-1), - } - .emit(&[], sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Srli, - rd: tmp, - rs: tmp.to_reg(), - imm12: Imm12::from_bits(1), - } - .emit(&[], sink, emit_info, state); - Inst::Jal { - dest: BranchTarget::Label(label_loop), - } - .emit(&[], sink, emit_info, state); - } - sink.bind_label(label_done, &mut state.ctrl_plane); */ + let tmp = allocs.next_writable(tmp); + let step = allocs.next_writable(step); + let sum = allocs.next_writable(sum); + // load 0 to sum , init. + Inst::gen_move(sum, zero_reg(), I64).emit(&[], sink, emit_info, state); + // load + Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit( + &[], + sink, + emit_info, + state, + ); + // + Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits((ty.bits() - 1) as i16), + } + .emit(&[], sink, emit_info, state); + let label_done = sink.get_label(); + let label_loop = sink.get_label(); + sink.bind_label(label_loop, &mut state.ctrl_plane); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::SignedLessThanOrEqual, + rs1: step.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // test and add sum. + { + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_spilltmp_reg2(), + rs1: tmp.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + let label_over = sink.get_label(); + Inst::CondBr { + taken: BranchTarget::Label(label_over), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: zero_reg(), + rs2: spilltmp_reg2(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: sum, + rs: sum.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_over, &mut state.ctrl_plane); + } + // set step and tmp. + { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: step, + rs: step.to_reg(), + imm12: Imm12::from_bits(-1), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_loop), + } + .emit(&[], sink, emit_info, state); + } + sink.bind_label(label_done, &mut state.ctrl_plane); */ } &Inst::Rev8 { rs, rd, tmp, step } => { todo!() /* let rs = allocs.next(rs); - let tmp = allocs.next_writable(tmp); - let step = allocs.next_writable(step); - let rd = allocs.next_writable(rd); - // init. - Inst::gen_move(rd, zero_reg(), I64).emit(&[], sink, emit_info, state); - Inst::gen_move(tmp, rs, I64).emit(&[], sink, emit_info, state); - // load 56 to step. - Inst::load_imm12(step, Imm12::from_bits(56)).emit(&[], sink, emit_info, state); - let label_done = sink.get_label(); - let label_loop = sink.get_label(); - sink.bind_label(label_loop, &mut state.ctrl_plane); - Inst::CondBr { - taken: BranchTarget::Label(label_done), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::SignedLessThan, - rs1: step.to_reg(), - rs2: zero_reg(), - }, - } - .emit(&[], sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Andi, - rd: writable_spilltmp_reg(), - rs: tmp.to_reg(), - imm12: Imm12::from_bits(255), - } - .emit(&[], sink, emit_info, state); - Inst::AluRRR { - alu_op: AluOPRRR::Sll, - rd: writable_spilltmp_reg(), - rs1: spilltmp_reg(), - rs2: step.to_reg(), - } - .emit(&[], sink, emit_info, state); + let tmp = allocs.next_writable(tmp); + let step = allocs.next_writable(step); + let rd = allocs.next_writable(rd); + // init. + Inst::gen_move(rd, zero_reg(), I64).emit(&[], sink, emit_info, state); + Inst::gen_move(tmp, rs, I64).emit(&[], sink, emit_info, state); + // load 56 to step. + Inst::load_imm12(step, Imm12::from_bits(56)).emit(&[], sink, emit_info, state); + let label_done = sink.get_label(); + let label_loop = sink.get_label(); + sink.bind_label(label_loop, &mut state.ctrl_plane); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::SignedLessThan, + rs1: step.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd: writable_spilltmp_reg(), + rs: tmp.to_reg(), + imm12: Imm12::from_bits(255), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRR { + alu_op: AluOPRRR::Sll, + rd: writable_spilltmp_reg(), + rs1: spilltmp_reg(), + rs2: step.to_reg(), + } + .emit(&[], sink, emit_info, state); - Inst::AluRRR { - alu_op: AluOPRRR::Or, - rd: rd, - rs1: rd.to_reg(), - rs2: spilltmp_reg(), - } - .emit(&[], sink, emit_info, state); - { - // reset step - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: step, - rs: step.to_reg(), - imm12: Imm12::from_bits(-8), - } - .emit(&[], sink, emit_info, state); - //reset tmp. - Inst::AluRRImm12 { - alu_op: AluOPRRI::Srli, - rd: tmp, - rs: tmp.to_reg(), - imm12: Imm12::from_bits(8), - } - .emit(&[], sink, emit_info, state); - // loop. - Inst::Jal { - dest: BranchTarget::Label(label_loop), - } - } - .emit(&[], sink, emit_info, state); - sink.bind_label(label_done, &mut state.ctrl_plane); */ + Inst::AluRRR { + alu_op: AluOPRRR::Or, + rd: rd, + rs1: rd.to_reg(), + rs2: spilltmp_reg(), + } + .emit(&[], sink, emit_info, state); + { + // reset step + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: step, + rs: step.to_reg(), + imm12: Imm12::from_bits(-8), + } + .emit(&[], sink, emit_info, state); + //reset tmp. + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(8), + } + .emit(&[], sink, emit_info, state); + // loop. + Inst::Jal { + dest: BranchTarget::Label(label_loop), + } + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_done, &mut state.ctrl_plane); */ } &Inst::Cltz { sum, @@ -2616,95 +2647,95 @@ impl MachInstEmit for Inst { ty, } => { todo!() /* let rs = allocs.next(rs); - let tmp = allocs.next_writable(tmp); - let step = allocs.next_writable(step); - let sum = allocs.next_writable(sum); - // load 0 to sum , init. - Inst::gen_move(sum, zero_reg(), I64).emit(&[], sink, emit_info, state); - // load - Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit( - &[], - sink, - emit_info, - state, - ); - // - Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); - if leading { - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd: tmp, - rs: tmp.to_reg(), - imm12: Imm12::from_bits((ty.bits() - 1) as i16), - } - .emit(&[], sink, emit_info, state); - } - let label_done = sink.get_label(); - let label_loop = sink.get_label(); - sink.bind_label(label_loop, &mut state.ctrl_plane); - Inst::CondBr { - taken: BranchTarget::Label(label_done), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::SignedLessThanOrEqual, - rs1: step.to_reg(), - rs2: zero_reg(), - }, - } - .emit(&[], sink, emit_info, state); - // test and add sum. - { - Inst::AluRRR { - alu_op: AluOPRRR::And, - rd: writable_spilltmp_reg2(), - rs1: tmp.to_reg(), - rs2: rs, - } - .emit(&[], sink, emit_info, state); - Inst::CondBr { - taken: BranchTarget::Label(label_done), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::NotEqual, - rs1: zero_reg(), - rs2: spilltmp_reg2(), - }, - } - .emit(&[], sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: sum, - rs: sum.to_reg(), - imm12: Imm12::from_bits(1), - } - .emit(&[], sink, emit_info, state); - } - // set step and tmp. - { - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: step, - rs: step.to_reg(), - imm12: Imm12::from_bits(-1), - } - .emit(&[], sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: if leading { - AluOPRRI::Srli - } else { - AluOPRRI::Slli - }, - rd: tmp, - rs: tmp.to_reg(), - imm12: Imm12::from_bits(1), - } - .emit(&[], sink, emit_info, state); - Inst::Jal { - dest: BranchTarget::Label(label_loop), - } - .emit(&[], sink, emit_info, state); - } - sink.bind_label(label_done, &mut state.ctrl_plane); */ + let tmp = allocs.next_writable(tmp); + let step = allocs.next_writable(step); + let sum = allocs.next_writable(sum); + // load 0 to sum , init. + Inst::gen_move(sum, zero_reg(), I64).emit(&[], sink, emit_info, state); + // load + Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit( + &[], + sink, + emit_info, + state, + ); + // + Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); + if leading { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits((ty.bits() - 1) as i16), + } + .emit(&[], sink, emit_info, state); + } + let label_done = sink.get_label(); + let label_loop = sink.get_label(); + sink.bind_label(label_loop, &mut state.ctrl_plane); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::SignedLessThanOrEqual, + rs1: step.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // test and add sum. + { + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_spilltmp_reg2(), + rs1: tmp.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: zero_reg(), + rs2: spilltmp_reg2(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: sum, + rs: sum.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + } + // set step and tmp. + { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: step, + rs: step.to_reg(), + imm12: Imm12::from_bits(-1), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: if leading { + AluOPRRI::Srli + } else { + AluOPRRI::Slli + }, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_loop), + } + .emit(&[], sink, emit_info, state); + } + sink.bind_label(label_done, &mut state.ctrl_plane); */ } &Inst::Brev8 { rs, @@ -2715,149 +2746,149 @@ impl MachInstEmit for Inst { rd, } => { todo!() /* let rs = allocs.next(rs); - let step = allocs.next_writable(step); - let tmp = allocs.next_writable(tmp); - let tmp2 = allocs.next_writable(tmp2); - let rd = allocs.next_writable(rd); - Inst::gen_move(rd, zero_reg(), I64).emit(&[], sink, emit_info, state); - Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit( - &[], - sink, - emit_info, - state, - ); - // - Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd: tmp, - rs: tmp.to_reg(), - imm12: Imm12::from_bits((ty.bits() - 1) as i16), - } - .emit(&[], sink, emit_info, state); - Inst::load_imm12(tmp2, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd: tmp2, - rs: tmp2.to_reg(), - imm12: Imm12::from_bits((ty.bits() - 8) as i16), - } - .emit(&[], sink, emit_info, state); - - let label_done = sink.get_label(); - let label_loop = sink.get_label(); - sink.bind_label(label_loop, &mut state.ctrl_plane); - Inst::CondBr { - taken: BranchTarget::Label(label_done), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::SignedLessThanOrEqual, - rs1: step.to_reg(), - rs2: zero_reg(), - }, - } - .emit(&[], sink, emit_info, state); - // test and set bit. - { - Inst::AluRRR { - alu_op: AluOPRRR::And, - rd: writable_spilltmp_reg2(), - rs1: tmp.to_reg(), - rs2: rs, - } - .emit(&[], sink, emit_info, state); - let label_over = sink.get_label(); - Inst::CondBr { - taken: BranchTarget::Label(label_over), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::Equal, - rs1: zero_reg(), - rs2: spilltmp_reg2(), - }, - } - .emit(&[], sink, emit_info, state); - Inst::AluRRR { - alu_op: AluOPRRR::Or, - rd: rd, - rs1: rd.to_reg(), - rs2: tmp2.to_reg(), - } - .emit(&[], sink, emit_info, state); - sink.bind_label(label_over, &mut state.ctrl_plane); - } - // set step and tmp. - { - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: step, - rs: step.to_reg(), - imm12: Imm12::from_bits(-1), - } - .emit(&[], sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Srli, - rd: tmp, - rs: tmp.to_reg(), - imm12: Imm12::from_bits(1), - } - .emit(&[], sink, emit_info, state); - { - // reset tmp2 - // if (step %=8 == 0) then tmp2 = tmp2 >> 15 - // if (step %=8 != 0) then tmp2 = tmp2 << 1 - let label_over = sink.get_label(); - let label_sll_1 = sink.get_label(); - Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_bits(8)).emit( + let step = allocs.next_writable(step); + let tmp = allocs.next_writable(tmp); + let tmp2 = allocs.next_writable(tmp2); + let rd = allocs.next_writable(rd); + Inst::gen_move(rd, zero_reg(), I64).emit(&[], sink, emit_info, state); + Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit( &[], sink, emit_info, state, ); - Inst::AluRRR { - alu_op: AluOPRRR::Rem, - rd: writable_spilltmp_reg2(), - rs1: step.to_reg(), - rs2: spilltmp_reg2(), + // + Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits((ty.bits() - 1) as i16), + } + .emit(&[], sink, emit_info, state); + Inst::load_imm12(tmp2, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp2, + rs: tmp2.to_reg(), + imm12: Imm12::from_bits((ty.bits() - 8) as i16), } .emit(&[], sink, emit_info, state); + + let label_done = sink.get_label(); + let label_loop = sink.get_label(); + sink.bind_label(label_loop, &mut state.ctrl_plane); Inst::CondBr { - taken: BranchTarget::Label(label_sll_1), + taken: BranchTarget::Label(label_done), not_taken: BranchTarget::zero(), kind: IntegerCompare { - kind: IntCC::NotEqual, - rs1: spilltmp_reg2(), + kind: IntCC::SignedLessThanOrEqual, + rs1: step.to_reg(), rs2: zero_reg(), }, } .emit(&[], sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Srli, - rd: tmp2, - rs: tmp2.to_reg(), - imm12: Imm12::from_bits(15), - } - .emit(&[], sink, emit_info, state); - Inst::Jal { - dest: BranchTarget::Label(label_over), + // test and set bit. + { + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_spilltmp_reg2(), + rs1: tmp.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + let label_over = sink.get_label(); + Inst::CondBr { + taken: BranchTarget::Label(label_over), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: zero_reg(), + rs2: spilltmp_reg2(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRR { + alu_op: AluOPRRR::Or, + rd: rd, + rs1: rd.to_reg(), + rs2: tmp2.to_reg(), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_over, &mut state.ctrl_plane); } - .emit(&[], sink, emit_info, state); - sink.bind_label(label_sll_1, &mut state.ctrl_plane); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd: tmp2, - rs: tmp2.to_reg(), - imm12: Imm12::from_bits(1), + // set step and tmp. + { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: step, + rs: step.to_reg(), + imm12: Imm12::from_bits(-1), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + { + // reset tmp2 + // if (step %=8 == 0) then tmp2 = tmp2 >> 15 + // if (step %=8 != 0) then tmp2 = tmp2 << 1 + let label_over = sink.get_label(); + let label_sll_1 = sink.get_label(); + Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_bits(8)).emit( + &[], + sink, + emit_info, + state, + ); + Inst::AluRRR { + alu_op: AluOPRRR::Rem, + rd: writable_spilltmp_reg2(), + rs1: step.to_reg(), + rs2: spilltmp_reg2(), + } + .emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_sll_1), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: spilltmp_reg2(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: tmp2, + rs: tmp2.to_reg(), + imm12: Imm12::from_bits(15), + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_over), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_sll_1, &mut state.ctrl_plane); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp2, + rs: tmp2.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_over, &mut state.ctrl_plane); + } + Inst::Jal { + dest: BranchTarget::Label(label_loop), + } + .emit(&[], sink, emit_info, state); } - .emit(&[], sink, emit_info, state); - sink.bind_label(label_over, &mut state.ctrl_plane); - } - Inst::Jal { - dest: BranchTarget::Label(label_loop), - } - .emit(&[], sink, emit_info, state); - } - sink.bind_label(label_done, &mut state.ctrl_plane); */ + sink.bind_label(label_done, &mut state.ctrl_plane); */ } &Inst::StackProbeLoop { guard_size, @@ -2865,58 +2896,58 @@ impl MachInstEmit for Inst { tmp: guard_size_tmp, } => { todo!() /* let step = writable_spilltmp_reg(); - Inst::load_constant_u64( - step, - (guard_size as u64) * (probe_count as u64), - &mut |_| step, - ) - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - Inst::load_constant_u64(guard_size_tmp, guard_size as u64, &mut |_| guard_size_tmp) - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - - let loop_start = sink.get_label(); - let label_done = sink.get_label(); - sink.bind_label(loop_start, &mut state.ctrl_plane); - Inst::CondBr { - taken: BranchTarget::Label(label_done), - not_taken: BranchTarget::zero(), - kind: IntegerCompare { - kind: IntCC::UnsignedLessThanOrEqual, - rs1: step.to_reg(), - rs2: guard_size_tmp.to_reg(), - }, - } - .emit(&[], sink, emit_info, state); - // compute address. - Inst::AluRRR { - alu_op: AluOPRRR::Sub, - rd: writable_spilltmp_reg2(), - rs1: stack_reg(), - rs2: step.to_reg(), - } - .emit(&[], sink, emit_info, state); - Inst::Store { - to: AMode::RegOffset(spilltmp_reg2(), 0, I8), - op: StoreOP::Sb, - flags: MemFlags::new(), - src: zero_reg(), - } - .emit(&[], sink, emit_info, state); - // reset step. - Inst::AluRRR { - alu_op: AluOPRRR::Sub, - rd: step, - rs1: step.to_reg(), - rs2: guard_size_tmp.to_reg(), - } - .emit(&[], sink, emit_info, state); - Inst::Jal { - dest: BranchTarget::Label(loop_start), - } - .emit(&[], sink, emit_info, state); - sink.bind_label(label_done, &mut state.ctrl_plane); */ + Inst::load_constant_u64( + step, + (guard_size as u64) * (probe_count as u64), + &mut |_| step, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::load_constant_u64(guard_size_tmp, guard_size as u64, &mut |_| guard_size_tmp) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + let loop_start = sink.get_label(); + let label_done = sink.get_label(); + sink.bind_label(loop_start, &mut state.ctrl_plane); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::UnsignedLessThanOrEqual, + rs1: step.to_reg(), + rs2: guard_size_tmp.to_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // compute address. + Inst::AluRRR { + alu_op: AluOPRRR::Sub, + rd: writable_spilltmp_reg2(), + rs1: stack_reg(), + rs2: step.to_reg(), + } + .emit(&[], sink, emit_info, state); + Inst::Store { + to: AMode::RegOffset(spilltmp_reg2(), 0, I8), + op: StoreOP::Sb, + flags: MemFlags::new(), + src: zero_reg(), + } + .emit(&[], sink, emit_info, state); + // reset step. + Inst::AluRRR { + alu_op: AluOPRRR::Sub, + rd: step, + rs1: step.to_reg(), + rs2: guard_size_tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(loop_start), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_done, &mut state.ctrl_plane); */ } &Inst::VecAluRRRImm5 { op, @@ -2928,13 +2959,13 @@ impl MachInstEmit for Inst { .. } => { todo!() /* let vs2 = allocs.next(vs2); - let vd_src = allocs.next(vd_src); - let vd = allocs.next_writable(vd); - let mask = mask.with_allocs(&mut allocs); + let vd_src = allocs.next(vd_src); + let vd = allocs.next_writable(vd); + let mask = mask.with_allocs(&mut allocs); - debug_assert_eq!(vd.to_reg(), vd_src); + debug_assert_eq!(vd.to_reg(), vd_src); - sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, mask)); */ + sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, mask)); */ } &Inst::VecAluRRRR { op, @@ -2946,14 +2977,14 @@ impl MachInstEmit for Inst { .. } => { todo!() /* let vs1 = allocs.next(vs1); - let vs2 = allocs.next(vs2); - let vd_src = allocs.next(vd_src); - let vd = allocs.next_writable(vd); - let mask = mask.with_allocs(&mut allocs); + let vs2 = allocs.next(vs2); + let vd_src = allocs.next(vd_src); + let vd = allocs.next_writable(vd); + let mask = mask.with_allocs(&mut allocs); - debug_assert_eq!(vd.to_reg(), vd_src); + debug_assert_eq!(vd.to_reg(), vd_src); - sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, mask)); */ + sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, mask)); */ } &Inst::VecAluRRR { op, @@ -2964,11 +2995,11 @@ impl MachInstEmit for Inst { .. } => { todo!() /* let vs1 = allocs.next(vs1); - let vs2 = allocs.next(vs2); - let vd = allocs.next_writable(vd); - let mask = mask.with_allocs(&mut allocs); + let vs2 = allocs.next(vs2); + let vd = allocs.next_writable(vd); + let mask = mask.with_allocs(&mut allocs); - sink.put4(encode_valu(op, vd, vs1, vs2, mask)); */ + sink.put4(encode_valu(op, vd, vs1, vs2, mask)); */ } &Inst::VecAluRRImm5 { op, @@ -2979,10 +3010,10 @@ impl MachInstEmit for Inst { .. } => { todo!() /* let vs2 = allocs.next(vs2); - let vd = allocs.next_writable(vd); - let mask = mask.with_allocs(&mut allocs); + let vd = allocs.next_writable(vd); + let mask = mask.with_allocs(&mut allocs); - sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, mask)); */ + sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, mask)); */ } &Inst::VecAluRR { op, @@ -2992,10 +3023,10 @@ impl MachInstEmit for Inst { .. } => { todo!() /* let vs = allocs.next(vs); - let vd = allocs.next_writable(vd); - let mask = mask.with_allocs(&mut allocs); + let vd = allocs.next_writable(vd); + let mask = mask.with_allocs(&mut allocs); - sink.put4(encode_valu_rr(op, vd, vs, mask)); */ + sink.put4(encode_valu_rr(op, vd, vs, mask)); */ } &Inst::VecAluRImm5 { op, @@ -3005,22 +3036,22 @@ impl MachInstEmit for Inst { .. } => { todo!() /* let vd = allocs.next_writable(vd); - let mask = mask.with_allocs(&mut allocs); + let mask = mask.with_allocs(&mut allocs); - sink.put4(encode_valu_r_imm(op, vd, imm, mask)); */ + sink.put4(encode_valu_r_imm(op, vd, imm, mask)); */ } &Inst::VecSetState { rd, ref vstate } => { todo!() /* let rd = allocs.next_writable(rd); - sink.put4(encode_vcfg_imm( - 0x57, - rd.to_reg(), - vstate.avl.unwrap_static(), - &vstate.vtype, - )); + sink.put4(encode_vcfg_imm( + 0x57, + rd.to_reg(), + vstate.avl.unwrap_static(), + &vstate.vtype, + )); - // Update the current vector emit state. - state.vstate = EmitVState::Known(vstate.clone()); */ + // Update the current vector emit state. + state.vstate = EmitVState::Known(vstate.clone()); */ } &Inst::VecLoad { @@ -3032,47 +3063,47 @@ impl MachInstEmit for Inst { .. } => { todo!() /* let from = from.clone().with_allocs(&mut allocs); - let to = allocs.next_writable(to); - let mask = mask.with_allocs(&mut allocs); - - // Vector Loads don't support immediate offsets, so we need to load it into a register. - let addr = match from { - VecAMode::UnitStride { base } => { - let base_reg = base.get_base_register(); - let offset = base.get_offset_with_state(state); - - // Reg+0 Offset can be directly encoded - if let (Some(base_reg), 0) = (base_reg, offset) { - base_reg - } else { - // Otherwise load the address it into a reg and load from it. - let tmp = writable_spilltmp_reg(); - Inst::LoadAddr { - rd: tmp, - mem: base.clone(), + let to = allocs.next_writable(to); + let mask = mask.with_allocs(&mut allocs); + + // Vector Loads don't support immediate offsets, so we need to load it into a register. + let addr = match from { + VecAMode::UnitStride { base } => { + let base_reg = base.get_base_register(); + let offset = base.get_offset_with_state(state); + + // Reg+0 Offset can be directly encoded + if let (Some(base_reg), 0) = (base_reg, offset) { + base_reg + } else { + // Otherwise load the address it into a reg and load from it. + let tmp = writable_spilltmp_reg(); + Inst::LoadAddr { + rd: tmp, + mem: base.clone(), + } + .emit(&[], sink, emit_info, state); + tmp.to_reg() + } } - .emit(&[], sink, emit_info, state); - tmp.to_reg() - } - } - }; + }; - let srcloc = state.cur_srcloc(); - if !srcloc.is_default() && !flags.notrap() { - // Register the offset at which the actual load instruction starts. - sink.add_trap(TrapCode::HeapOutOfBounds); - } + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); + } - sink.put4(encode_vmem_load( - 0x07, - to.to_reg(), - eew, - addr, - from.lumop(), - mask, - from.mop(), - from.nf(), - )); */ + sink.put4(encode_vmem_load( + 0x07, + to.to_reg(), + eew, + addr, + from.lumop(), + mask, + from.mop(), + from.nf(), + )); */ } &Inst::VecStore { @@ -3084,47 +3115,47 @@ impl MachInstEmit for Inst { .. } => { todo!() /* let to = to.clone().with_allocs(&mut allocs); - let from = allocs.next(from); - let mask = mask.with_allocs(&mut allocs); - - // Vector Stores don't support immediate offsets, so we need to load it into a register. - let addr = match to { - VecAMode::UnitStride { base } => { - let base_reg = base.get_base_register(); - let offset = base.get_offset_with_state(state); - - // Reg+0 Offset can be directly encoded - if let (Some(base_reg), 0) = (base_reg, offset) { - base_reg - } else { - // Otherwise load the address it into a reg and load from it. - let tmp = writable_spilltmp_reg(); - Inst::LoadAddr { - rd: tmp, - mem: base.clone(), + let from = allocs.next(from); + let mask = mask.with_allocs(&mut allocs); + + // Vector Stores don't support immediate offsets, so we need to load it into a register. + let addr = match to { + VecAMode::UnitStride { base } => { + let base_reg = base.get_base_register(); + let offset = base.get_offset_with_state(state); + + // Reg+0 Offset can be directly encoded + if let (Some(base_reg), 0) = (base_reg, offset) { + base_reg + } else { + // Otherwise load the address it into a reg and load from it. + let tmp = writable_spilltmp_reg(); + Inst::LoadAddr { + rd: tmp, + mem: base.clone(), + } + .emit(&[], sink, emit_info, state); + tmp.to_reg() + } } - .emit(&[], sink, emit_info, state); - tmp.to_reg() - } - } - }; + }; - let srcloc = state.cur_srcloc(); - if !srcloc.is_default() && !flags.notrap() { - // Register the offset at which the actual load instruction starts. - sink.add_trap(TrapCode::HeapOutOfBounds); - } + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); + } - sink.put4(encode_vmem_store( - 0x27, - from, - eew, - addr, - to.sumop(), - mask, - to.mop(), - to.nf(), - )); */ + sink.put4(encode_vmem_store( + 0x27, + from, + eew, + addr, + to.sumop(), + mask, + to.mop(), + to.nf(), + )); */ } }; let end_off = sink.cur_offset(); diff --git a/cranelift/codegen/src/isa/zkasm/inst/mod.rs b/cranelift/codegen/src/isa/zkasm/inst/mod.rs index dde33f24f3da..71d6043c421c 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/mod.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/mod.rs @@ -703,7 +703,7 @@ fn zkasm_get_operands VReg>(inst: &Inst, collector: &mut OperandC Inst::AddImm32 { rd, src1, src2 } => { collector.reg_def(*rd); - }, + } &Inst::VecAluRRRImm5 { op, @@ -1447,10 +1447,10 @@ impl Inst { } } - Inst::AddImm32 { rd, src1, src2 } => { + Inst::AddImm32 { rd, src1, src2 } => { let rd = format_reg(rd.to_reg(), allocs); format!("{src1} + {src2} => {rd};") - }, + } &Inst::FpuRR { frm, diff --git a/cranelift/codegen/src/isa/zkasm/inst/regs.rs b/cranelift/codegen/src/isa/zkasm/inst/regs.rs index 6832c4b386c6..f48ef41f4413 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/regs.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/regs.rs @@ -142,35 +142,33 @@ pub fn writable_spilltmp_reg2() -> Writable { pub fn crate_reg_eviroment(_flags: &settings::Flags) -> MachineEnv { let preferred_regs_by_class: [Vec; 3] = { // Registers are A, B, C, D, E. - let x_registers: Vec = (1..=7) - .map(|i| PReg::new(i, RegClass::Int)) - .collect(); + let x_registers: Vec = (1..=7).map(|i| PReg::new(i, RegClass::Int)).collect(); let f_registers: Vec = Vec::new(); - // (0..=7) - // .chain(10..=17) - // .chain(28..=31) - // .map(|i| PReg::new(i, RegClass::Float)) - // .collect(); + // (0..=7) + // .chain(10..=17) + // .chain(28..=31) + // .map(|i| PReg::new(i, RegClass::Float)) + // .collect(); let v_registers: Vec = Vec::new(); - // (0..=31).map(|i| PReg::new(i, RegClass::Vector)).collect(); + // (0..=31).map(|i| PReg::new(i, RegClass::Vector)).collect(); [x_registers, f_registers, v_registers] }; let non_preferred_regs_by_class: [Vec; 3] = { let x_registers: Vec = Vec::new(); - // (9..=9) - // .chain(18..=27) - // .map(|i| PReg::new(i, RegClass::Int)) - // .collect(); + // (9..=9) + // .chain(18..=27) + // .map(|i| PReg::new(i, RegClass::Int)) + // .collect(); let f_registers: Vec = Vec::new(); - // (8..=9) - // .chain(18..=27) - // .map(|i| PReg::new(i, RegClass::Float)) - // .collect(); + // (8..=9) + // .chain(18..=27) + // .map(|i| PReg::new(i, RegClass::Float)) + // .collect(); let v_registers = vec![]; diff --git a/cranelift/codegen/src/isa/zkasm/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/zkasm/inst/unwind/systemv.rs index ac5f587ac97d..d050560ffec0 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/unwind/systemv.rs @@ -1,7 +1,7 @@ //! Unwind information for System V ABI (Riscv64). -use crate::isa::zkasm::inst::regs; use crate::isa::unwind::systemv::RegisterMappingError; +use crate::isa::zkasm::inst::regs; use crate::machinst::Reg; use gimli::{write::CommonInformationEntry, Encoding, Format, Register}; use regalloc2::RegClass; diff --git a/cranelift/codegen/src/isa/zkasm/lower/isle.rs b/cranelift/codegen/src/isa/zkasm/lower/isle.rs index a4b2b42defdc..cd8cab87b477 100644 --- a/cranelift/codegen/src/isa/zkasm/lower/isle.rs +++ b/cranelift/codegen/src/isa/zkasm/lower/isle.rs @@ -9,9 +9,7 @@ use generated_code::{Context, ExtendOp, MInst}; use self::generated_code::{VecAluOpRR, VecLmul}; use super::{writable_zero_reg, zero_reg}; use crate::isa::zkasm::abi::Riscv64ABICallSite; -use crate::isa::zkasm::lower::args::{ - FReg, VReg, WritableFReg, WritableVReg, WritableXReg, XReg, -}; +use crate::isa::zkasm::lower::args::{FReg, VReg, WritableFReg, WritableVReg, WritableXReg, XReg}; use crate::isa::zkasm::Riscv64Backend; use crate::machinst::Reg; use crate::machinst::{isle::*, MachInst, SmallInstVec}; diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs index b57f8474af9e..5a8b24daeef9 100644 --- a/cranelift/src/wasm.rs +++ b/cranelift/src/wasm.rs @@ -236,7 +236,10 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - } println!("start:"); - let start_func = dummy_environ.info.start_func.expect("Must have a start function"); + let start_func = dummy_environ + .info + .start_func + .expect("Must have a start function"); println!(" zkPC + 2 => RR"); // TODO(akashin): Figure out why we need to do -1 here. println!(" :JMP(function_{})", start_func.index() - 1); From 89ea5a34bbbb5783c8543327609556fd7371687d Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 29 Aug 2023 14:59:11 +0100 Subject: [PATCH 34/68] Fix number representations --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 79 ++++++++++---------- 1 file changed, 40 insertions(+), 39 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index c5b48d123a6a..aed6ac59793d 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -652,7 +652,11 @@ impl MachInstEmit for Inst { } &Inst::AddImm32 { rd, src1, src2 } => { let rd = allocs.next(rd.to_reg()); - put_string(&format!("{src1} + {src2} => {}\n", reg_name(rd)), sink); + // TODO(akashin): Should we have a function for `bits` field? + put_string( + &format!("{} + {} => {}\n", src1.bits, src2.bits, reg_name(rd)), + sink, + ); } &Inst::AluRRR { alu_op, @@ -701,7 +705,12 @@ impl MachInstEmit for Inst { match alu_op { AluOPRRI::Addi => { put_string( - &format!("{} + {imm12} => {}\n", reg_name(rs), reg_name(rd.to_reg())), + &format!( + "{} + {} => {}\n", + reg_name(rs), + imm12.bits, + reg_name(rd.to_reg()) + ), sink, ); } @@ -739,41 +748,24 @@ impl MachInstEmit for Inst { }; put_string( &format!( - "$ => {} :MLOAD({} + {imm12})\n", + "$ => {} :MLOAD({} + {})\n", reg_name(rd.to_reg()), - reg_name(addr) + reg_name(addr), + imm12.bits, ), sink, ); - /* - let srcloc = state.cur_srcloc(); - if !srcloc.is_default() && !flags.notrap() { - // Register the offset at which the actual load instruction starts. - sink.add_trap(TrapCode::HeapOutOfBounds); - } - - sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12)); */ + // let srcloc = state.cur_srcloc(); + // if !srcloc.is_default() && !flags.notrap() { + // // Register the offset at which the actual load instruction starts. + // sink.add_trap(TrapCode::HeapOutOfBounds); + // } + // + // sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12)); } &Inst::Store { op, src, flags, to } => { - let src = allocs.next(src); - let stack_offset = to.get_offset_with_state(state); - if let Some(base_register) = to.get_base_register() { - put_string( - &format!( - "{} :MSTORE({} + {stack_offset})\n", - reg_name(src), - reg_name(base_register) - ), - sink, - ); - } else { - put_string( - &format!("{} :MSTORE({stack_offset})\n", reg_name(src)), - sink, - ); - } - /* let to = to.clone().with_allocs(&mut allocs); + let to = to.clone().with_allocs(&mut allocs); let src = allocs.next(src); let base = to.get_base_register(); @@ -790,14 +782,23 @@ impl MachInstEmit for Inst { (tmp.to_reg(), Imm12::zero()) } }; + put_string( + &format!( + "{} :MSTORE({} + {})\n", + reg_name(src), + reg_name(addr), + imm12.bits + ), + sink, + ); - let srcloc = state.cur_srcloc(); - if !srcloc.is_default() && !flags.notrap() { - // Register the offset at which the actual load instruction starts. - sink.add_trap(TrapCode::HeapOutOfBounds); - } - - sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12)); */ + // let srcloc = state.cur_srcloc(); + // if !srcloc.is_default() && !flags.notrap() { + // // Register the offset at which the actual load instruction starts. + // sink.add_trap(TrapCode::HeapOutOfBounds); + // } + // + // sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12)); } &Inst::Args { .. } => { // Nothing: this is a pseudoinstruction that serves @@ -864,9 +865,9 @@ impl MachInstEmit for Inst { } &Inst::AdjustSp { amount } => { let amount = if amount > 0 { - format!("+ {}", amount) + format!("- {}", amount) } else { - format!("- {}", -amount) + format!("+ {}", -amount) }; put_string(&format!("SP {amount} => SP\n"), sink); From 68c7c40921ee55faaca787a826bde42e6d72dcd3 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 29 Aug 2023 15:02:46 +0100 Subject: [PATCH 35/68] Indent instructions --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index aed6ac59793d..4ae1d6dde9e1 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -502,6 +502,7 @@ impl Inst { } fn put_string(s: &str, sink: &mut MachBuffer) { + sink.put_data(" ".as_bytes()); sink.put_data(s.as_bytes()); } @@ -552,7 +553,7 @@ impl MachInstEmit for Inst { x.emit(&[], sink, emit_info, state) */ } &Inst::Label { imm } => { - put_string(&format!("L{imm}:\n"), sink); + sink.put_data(format!("L{imm}:\n").as_bytes()); } &Inst::RawData { ref data } => { // Right now we only put a u32 or u64 in this instruction. From 844dadef677b45f93f9cdd53304738ac4d34eccb Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 29 Aug 2023 15:45:09 +0100 Subject: [PATCH 36/68] Add methods for counter testcase --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 94 +++++++++++++------- cranelift/codegen/src/isa/zkasm/inst/regs.rs | 2 +- 2 files changed, 61 insertions(+), 35 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 4ae1d6dde9e1..ccb318c7a01f 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -715,7 +715,29 @@ impl MachInstEmit for Inst { sink, ); } - _ => unreachable!(), + AluOPRRI::Slli => { + put_string( + &format!( + "{} << {} => {}\n", + reg_name(rs), + imm12.bits, + reg_name(rd.to_reg()) + ), + sink, + ); + } + AluOPRRI::Srli => { + put_string( + &format!( + "{} >> {} => {}\n", + reg_name(rs), + imm12.bits, + reg_name(rd.to_reg()) + ), + sink, + ); + } + _ => unreachable!("Op {:?} is not implemented", alu_op), }; // let x = alu_op.op_code() @@ -1071,39 +1093,43 @@ impl MachInstEmit for Inst { not_taken, mut kind, } => { - todo!() /* kind.rs1 = allocs.next(kind.rs1); - kind.rs2 = allocs.next(kind.rs2); - match taken { - BranchTarget::Label(label) => { - let code = kind.emit(); - let code_inverse = kind.inverse().emit().to_le_bytes(); - sink.use_label_at_offset(start_off, label, LabelUse::B12); - sink.add_cond_branch(start_off, start_off + 4, label, &code_inverse); - sink.put4(code); - } - BranchTarget::ResolvedOffset(offset) => { - assert!(offset != 0); - if LabelUse::B12.offset_in_range(offset as i64) { - let code = kind.emit(); - let mut code = code.to_le_bytes(); - LabelUse::B12.patch_raw_offset(&mut code, offset as i64); - sink.put_data(&code[..]) - } else { - let mut code = kind.emit().to_le_bytes(); - // jump over the condbr , 4 bytes. - LabelUse::B12.patch_raw_offset(&mut code[..], 4); - sink.put_data(&code[..]); - Inst::construct_auipc_and_jalr( - None, - writable_spilltmp_reg(), - offset as i64, - ) - .into_iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - } - } - } - Inst::Jal { dest: not_taken }.emit(&[], sink, emit_info, state); */ + kind.rs1 = allocs.next(kind.rs1); + kind.rs2 = allocs.next(kind.rs2); + match taken { + BranchTarget::Label(label) => { + put_string(&format!(":JMP(L{})\n", label.index()), sink); + + // let code = kind.emit(); + // let code_inverse = kind.inverse().emit().to_le_bytes(); + // sink.use_label_at_offset(start_off, label, LabelUse::B12); + // sink.add_cond_branch(start_off, start_off + 4, label, &code_inverse); + // sink.put4(code); + } + BranchTarget::ResolvedOffset(offset) => { + assert!(offset != 0); + todo!(); + + // if LabelUse::B12.offset_in_range(offset as i64) { + // let code = kind.emit(); + // let mut code = code.to_le_bytes(); + // LabelUse::B12.patch_raw_offset(&mut code, offset as i64); + // sink.put_data(&code[..]) + // } else { + // let mut code = kind.emit().to_le_bytes(); + // // jump over the condbr , 4 bytes. + // LabelUse::B12.patch_raw_offset(&mut code[..], 4); + // sink.put_data(&code[..]); + // Inst::construct_auipc_and_jalr( + // None, + // writable_spilltmp_reg(), + // offset as i64, + // ) + // .into_iter() + // .for_each(|i| i.emit(&[], sink, emit_info, state)); + // } + } + } + Inst::Jal { dest: not_taken }.emit(&[], sink, emit_info, state); } &Inst::Mov { rd, rm, ty } => { diff --git a/cranelift/codegen/src/isa/zkasm/inst/regs.rs b/cranelift/codegen/src/isa/zkasm/inst/regs.rs index f48ef41f4413..4a6ca34baae7 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/regs.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/regs.rs @@ -142,7 +142,7 @@ pub fn writable_spilltmp_reg2() -> Writable { pub fn crate_reg_eviroment(_flags: &settings::Flags) -> MachineEnv { let preferred_regs_by_class: [Vec; 3] = { // Registers are A, B, C, D, E. - let x_registers: Vec = (1..=7).map(|i| PReg::new(i, RegClass::Int)).collect(); + let x_registers: Vec = (1..=16).map(|i| PReg::new(i, RegClass::Int)).collect(); let f_registers: Vec = Vec::new(); // (0..=7) From 069a586a309b5201a59e5bc44a685800d97a546a Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 29 Aug 2023 15:45:53 +0100 Subject: [PATCH 37/68] Fix formatting issue --- cranelift/codegen/meta/src/isa/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/codegen/meta/src/isa/mod.rs b/cranelift/codegen/meta/src/isa/mod.rs index 1440e07c0abd..37906d557352 100644 --- a/cranelift/codegen/meta/src/isa/mod.rs +++ b/cranelift/codegen/meta/src/isa/mod.rs @@ -15,7 +15,7 @@ pub enum Isa { Arm64, S390x, Riscv64, - ZkAsm + ZkAsm, } impl Isa { From efb6d32d2a51b9cbe74e69b6dfecb16684ea198b Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 29 Aug 2023 16:24:45 +0100 Subject: [PATCH 38/68] Implement conditionals --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 24 +++++++++++--------- cranelift/codegen/src/isa/zkasm/inst/regs.rs | 2 +- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index ccb318c7a01f..a6f4274455e2 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -667,15 +667,11 @@ impl MachInstEmit for Inst { } => { let rs1 = allocs.next(rs1); let rs2 = allocs.next(rs2); + debug_assert_eq!(rs1, a0()); + debug_assert_eq!(rs2, b0()); let rd = allocs.next_writable(rd); put_string( - &format!( - "{}, {} => {} : {}\n", - reg_name(rs1), - reg_name(rs2), - reg_name(rd.to_reg()), - alu_op.op_name() - ), + &format!("$ => {} :{}\n", reg_name(rd.to_reg()), alu_op.op_name()), sink, ); @@ -1095,9 +1091,10 @@ impl MachInstEmit for Inst { } => { kind.rs1 = allocs.next(kind.rs1); kind.rs2 = allocs.next(kind.rs2); + debug_assert_eq!(kind.rs2, zero_reg()); match taken { BranchTarget::Label(label) => { - put_string(&format!(":JMP(L{})\n", label.index()), sink); + put_string(&format!("{} :JMPZ(L{})\n", reg_name(kind.rs1), label.index()), sink); // let code = kind.emit(); // let code_inverse = kind.inverse().emit().to_le_bytes(); @@ -1129,6 +1126,7 @@ impl MachInstEmit for Inst { // } } } + // TODO(akashin): Can also merge this as an else in jump. Inst::Jal { dest: not_taken }.emit(&[], sink, emit_info, state); } @@ -1509,11 +1507,15 @@ impl MachInstEmit for Inst { ref b, ty, } => { - put_string(&format!("{a:?}, {b:?} => {:?} : CMP\n", rd), sink); - - /* let a = alloc_value_regs(a, &mut allocs); + let a = alloc_value_regs(a, &mut allocs); let b = alloc_value_regs(b, &mut allocs); let rd = allocs.next_writable(rd); + put_string( + &format!("{:?}, {:?} => {} :CMP\n", a, b, reg_name(rd.to_reg())), + sink, + ); + + /* let label_true = sink.get_label(); let label_false = sink.get_label(); Inst::lower_br_icmp( diff --git a/cranelift/codegen/src/isa/zkasm/inst/regs.rs b/cranelift/codegen/src/isa/zkasm/inst/regs.rs index 4a6ca34baae7..f48ef41f4413 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/regs.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/regs.rs @@ -142,7 +142,7 @@ pub fn writable_spilltmp_reg2() -> Writable { pub fn crate_reg_eviroment(_flags: &settings::Flags) -> MachineEnv { let preferred_regs_by_class: [Vec; 3] = { // Registers are A, B, C, D, E. - let x_registers: Vec = (1..=16).map(|i| PReg::new(i, RegClass::Int)).collect(); + let x_registers: Vec = (1..=7).map(|i| PReg::new(i, RegClass::Int)).collect(); let f_registers: Vec = Vec::new(); // (0..=7) From 5398fa2fcbc92462f5455bc7ed97e899f30f421b Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Wed, 30 Aug 2023 09:15:51 +0100 Subject: [PATCH 39/68] Tweak the set of available registers --- cranelift/codegen/src/isa/zkasm/abi.rs | 232 +++++++++---------- cranelift/codegen/src/isa/zkasm/inst/regs.rs | 5 +- 2 files changed, 120 insertions(+), 117 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/abi.rs b/cranelift/codegen/src/isa/zkasm/abi.rs index 8186dd9b342a..942577f859f6 100644 --- a/cranelift/codegen/src/isa/zkasm/abi.rs +++ b/cranelift/codegen/src/isa/zkasm/abi.rs @@ -762,60 +762,60 @@ const fn default_clobbers() -> PRegSet { .with(px_reg(30)) .with(px_reg(31)) // F Regs - .with(pf_reg(0)) - .with(pf_reg(1)) - .with(pf_reg(2)) - .with(pf_reg(3)) - .with(pf_reg(4)) - .with(pf_reg(5)) - .with(pf_reg(6)) - .with(pf_reg(7)) - .with(pf_reg(9)) - .with(pf_reg(10)) - .with(pf_reg(11)) - .with(pf_reg(12)) - .with(pf_reg(13)) - .with(pf_reg(14)) - .with(pf_reg(15)) - .with(pf_reg(16)) - .with(pf_reg(17)) - .with(pf_reg(28)) - .with(pf_reg(29)) - .with(pf_reg(30)) - .with(pf_reg(31)) + // .with(pf_reg(0)) + // .with(pf_reg(1)) + // .with(pf_reg(2)) + // .with(pf_reg(3)) + // .with(pf_reg(4)) + // .with(pf_reg(5)) + // .with(pf_reg(6)) + // .with(pf_reg(7)) + // .with(pf_reg(9)) + // .with(pf_reg(10)) + // .with(pf_reg(11)) + // .with(pf_reg(12)) + // .with(pf_reg(13)) + // .with(pf_reg(14)) + // .with(pf_reg(15)) + // .with(pf_reg(16)) + // .with(pf_reg(17)) + // .with(pf_reg(28)) + // .with(pf_reg(29)) + // .with(pf_reg(30)) + // .with(pf_reg(31)) // V Regs - All vector regs get clobbered - .with(pv_reg(0)) - .with(pv_reg(1)) - .with(pv_reg(2)) - .with(pv_reg(3)) - .with(pv_reg(4)) - .with(pv_reg(5)) - .with(pv_reg(6)) - .with(pv_reg(7)) - .with(pv_reg(8)) - .with(pv_reg(9)) - .with(pv_reg(10)) - .with(pv_reg(11)) - .with(pv_reg(12)) - .with(pv_reg(13)) - .with(pv_reg(14)) - .with(pv_reg(15)) - .with(pv_reg(16)) - .with(pv_reg(17)) - .with(pv_reg(18)) - .with(pv_reg(19)) - .with(pv_reg(20)) - .with(pv_reg(21)) - .with(pv_reg(22)) - .with(pv_reg(23)) - .with(pv_reg(24)) - .with(pv_reg(25)) - .with(pv_reg(26)) - .with(pv_reg(27)) - .with(pv_reg(28)) - .with(pv_reg(29)) - .with(pv_reg(30)) - .with(pv_reg(31)) + // .with(pv_reg(0)) + // .with(pv_reg(1)) + // .with(pv_reg(2)) + // .with(pv_reg(3)) + // .with(pv_reg(4)) + // .with(pv_reg(5)) + // .with(pv_reg(6)) + // .with(pv_reg(7)) + // .with(pv_reg(8)) + // .with(pv_reg(9)) + // .with(pv_reg(10)) + // .with(pv_reg(11)) + // .with(pv_reg(12)) + // .with(pv_reg(13)) + // .with(pv_reg(14)) + // .with(pv_reg(15)) + // .with(pv_reg(16)) + // .with(pv_reg(17)) + // .with(pv_reg(18)) + // .with(pv_reg(19)) + // .with(pv_reg(20)) + // .with(pv_reg(21)) + // .with(pv_reg(22)) + // .with(pv_reg(23)) + // .with(pv_reg(24)) + // .with(pv_reg(25)) + // .with(pv_reg(26)) + // .with(pv_reg(27)) + // .with(pv_reg(28)) + // .with(pv_reg(29)) + // .with(pv_reg(30)) + // .with(pv_reg(31)) } const DEFAULT_CLOBBERS: PRegSet = default_clobbers(); @@ -858,70 +858,70 @@ const fn tail_clobbers() -> PRegSet { // allocatable. // // F Regs - .with(pf_reg(0)) - .with(pf_reg(1)) - .with(pf_reg(2)) - .with(pf_reg(3)) - .with(pf_reg(4)) - .with(pf_reg(5)) - .with(pf_reg(6)) - .with(pf_reg(7)) - .with(pf_reg(9)) - .with(pf_reg(10)) - .with(pf_reg(11)) - .with(pf_reg(12)) - .with(pf_reg(13)) - .with(pf_reg(14)) - .with(pf_reg(15)) - .with(pf_reg(16)) - .with(pf_reg(17)) - .with(pf_reg(18)) - .with(pf_reg(19)) - .with(pf_reg(20)) - .with(pf_reg(21)) - .with(pf_reg(22)) - .with(pf_reg(23)) - .with(pf_reg(24)) - .with(pf_reg(25)) - .with(pf_reg(26)) - .with(pf_reg(27)) - .with(pf_reg(28)) - .with(pf_reg(29)) - .with(pf_reg(30)) - .with(pf_reg(31)) + // .with(pf_reg(0)) + // .with(pf_reg(1)) + // .with(pf_reg(2)) + // .with(pf_reg(3)) + // .with(pf_reg(4)) + // .with(pf_reg(5)) + // .with(pf_reg(6)) + // .with(pf_reg(7)) + // .with(pf_reg(9)) + // .with(pf_reg(10)) + // .with(pf_reg(11)) + // .with(pf_reg(12)) + // .with(pf_reg(13)) + // .with(pf_reg(14)) + // .with(pf_reg(15)) + // .with(pf_reg(16)) + // .with(pf_reg(17)) + // .with(pf_reg(18)) + // .with(pf_reg(19)) + // .with(pf_reg(20)) + // .with(pf_reg(21)) + // .with(pf_reg(22)) + // .with(pf_reg(23)) + // .with(pf_reg(24)) + // .with(pf_reg(25)) + // .with(pf_reg(26)) + // .with(pf_reg(27)) + // .with(pf_reg(28)) + // .with(pf_reg(29)) + // .with(pf_reg(30)) + // .with(pf_reg(31)) // V Regs - .with(pv_reg(0)) - .with(pv_reg(1)) - .with(pv_reg(2)) - .with(pv_reg(3)) - .with(pv_reg(4)) - .with(pv_reg(5)) - .with(pv_reg(6)) - .with(pv_reg(7)) - .with(pv_reg(8)) - .with(pv_reg(9)) - .with(pv_reg(10)) - .with(pv_reg(11)) - .with(pv_reg(12)) - .with(pv_reg(13)) - .with(pv_reg(14)) - .with(pv_reg(15)) - .with(pv_reg(16)) - .with(pv_reg(17)) - .with(pv_reg(18)) - .with(pv_reg(19)) - .with(pv_reg(20)) - .with(pv_reg(21)) - .with(pv_reg(22)) - .with(pv_reg(23)) - .with(pv_reg(24)) - .with(pv_reg(25)) - .with(pv_reg(26)) - .with(pv_reg(27)) - .with(pv_reg(28)) - .with(pv_reg(29)) - .with(pv_reg(30)) - .with(pv_reg(31)) + // .with(pv_reg(0)) + // .with(pv_reg(1)) + // .with(pv_reg(2)) + // .with(pv_reg(3)) + // .with(pv_reg(4)) + // .with(pv_reg(5)) + // .with(pv_reg(6)) + // .with(pv_reg(7)) + // .with(pv_reg(8)) + // .with(pv_reg(9)) + // .with(pv_reg(10)) + // .with(pv_reg(11)) + // .with(pv_reg(12)) + // .with(pv_reg(13)) + // .with(pv_reg(14)) + // .with(pv_reg(15)) + // .with(pv_reg(16)) + // .with(pv_reg(17)) + // .with(pv_reg(18)) + // .with(pv_reg(19)) + // .with(pv_reg(20)) + // .with(pv_reg(21)) + // .with(pv_reg(22)) + // .with(pv_reg(23)) + // .with(pv_reg(24)) + // .with(pv_reg(25)) + // .with(pv_reg(26)) + // .with(pv_reg(27)) + // .with(pv_reg(28)) + // .with(pv_reg(29)) + // .with(pv_reg(30)) + // .with(pv_reg(31)) } const TAIL_CLOBBERS: PRegSet = tail_clobbers(); diff --git a/cranelift/codegen/src/isa/zkasm/inst/regs.rs b/cranelift/codegen/src/isa/zkasm/inst/regs.rs index f48ef41f4413..33777caf2629 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/regs.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/regs.rs @@ -142,7 +142,10 @@ pub fn writable_spilltmp_reg2() -> Writable { pub fn crate_reg_eviroment(_flags: &settings::Flags) -> MachineEnv { let preferred_regs_by_class: [Vec; 3] = { // Registers are A, B, C, D, E. - let x_registers: Vec = (1..=7).map(|i| PReg::new(i, RegClass::Int)).collect(); + let x_registers: Vec = (5..=7) + .chain(10..20) + .map(|i| PReg::new(i, RegClass::Int)) + .collect(); let f_registers: Vec = Vec::new(); // (0..=7) From 18f3d5007ee5acb98add95e734152a8ff1ce0f06 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Wed, 30 Aug 2023 09:54:25 +0100 Subject: [PATCH 40/68] Fix MLOAD/MSTORE with negative immediates --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 24 ++++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index a6f4274455e2..af30d47d0f90 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -506,6 +506,15 @@ fn put_string(s: &str, sink: &mut MachBuffer) { sink.put_data(s.as_bytes()); } +fn access_reg_with_offset(reg: Reg, offset: i16) -> String { + let name = reg_name(reg); + match offset.cmp(&0) { + core::cmp::Ordering::Less => format!("{name} - {}", -offset), + core::cmp::Ordering::Equal => name, + core::cmp::Ordering::Greater => format!("{name} + {}", offset), + } +} + #[allow(unused)] impl MachInstEmit for Inst { type State = EmitState; @@ -767,10 +776,9 @@ impl MachInstEmit for Inst { }; put_string( &format!( - "$ => {} :MLOAD({} + {})\n", + "$ => {} :MLOAD({})\n", reg_name(rd.to_reg()), - reg_name(addr), - imm12.bits, + access_reg_with_offset(addr, imm12.bits), ), sink, ); @@ -803,10 +811,9 @@ impl MachInstEmit for Inst { }; put_string( &format!( - "{} :MSTORE({} + {})\n", + "{} :MSTORE({})\n", reg_name(src), - reg_name(addr), - imm12.bits + access_reg_with_offset(addr, imm12.bits), ), sink, ); @@ -1094,7 +1101,10 @@ impl MachInstEmit for Inst { debug_assert_eq!(kind.rs2, zero_reg()); match taken { BranchTarget::Label(label) => { - put_string(&format!("{} :JMPZ(L{})\n", reg_name(kind.rs1), label.index()), sink); + put_string( + &format!("{} :JMPZ(L{})\n", reg_name(kind.rs1), label.index()), + sink, + ); // let code = kind.emit(); // let code_inverse = kind.inverse().emit().to_le_bytes(); From ee70c3201c9a3c98b9cfb5e90a89210f0c31ca91 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Wed, 30 Aug 2023 10:04:29 +0100 Subject: [PATCH 41/68] Fix clobbering logic --- cranelift/codegen/src/isa/zkasm/abi.rs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/abi.rs b/cranelift/codegen/src/isa/zkasm/abi.rs index 942577f859f6..9d1f0a01f5fd 100644 --- a/cranelift/codegen/src/isa/zkasm/abi.rs +++ b/cranelift/codegen/src/isa/zkasm/abi.rs @@ -419,10 +419,12 @@ impl ABIMachineSpec for Riscv64MachineDeps { // Adjust the stack pointer downward for clobbers and the function fixed // frame (spillslots and storage slots). let stack_size = fixed_frame_storage_size + clobbered_size; + // Each stack slot is 256 bit and can fit 8 u32 values. + let stack_size = stack_size / 8; // Store each clobbered register in order at offsets from SP, // placing them above the fixed frame slots. if stack_size > 0 { - let mut cur_offset = 8; + let mut cur_offset = 1; for reg in clobbered_callee_saves { let r_reg = reg.to_reg(); let ty = match r_reg.class() { @@ -435,7 +437,7 @@ impl ABIMachineSpec for Riscv64MachineDeps { real_reg_to_reg(reg.to_reg()), ty, )); - cur_offset += 8 + cur_offset += 1 } insts.push(Inst::AdjustSp { amount: -(stack_size as i64), @@ -456,12 +458,14 @@ impl ABIMachineSpec for Riscv64MachineDeps { let clobbered_callee_saves = Self::get_clobbered_callee_saves(call_conv, _flags, sig, clobbers); let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered_callee_saves); + // Each stack slot is 256 bit and can fit 8 u32 values. + let stack_size = stack_size / 8; if stack_size > 0 { insts.push(Inst::AdjustSp { amount: stack_size as i64, }); } - let mut cur_offset = 8; + let mut cur_offset = 1; for reg in &clobbered_callee_saves { let rreg = reg.to_reg(); let ty = match rreg.class() { @@ -474,7 +478,7 @@ impl ABIMachineSpec for Riscv64MachineDeps { Writable::from_reg(real_reg_to_reg(reg.to_reg())), ty, )); - cur_offset += 8 + cur_offset += 1 } insts } @@ -699,11 +703,12 @@ impl Riscv64ABICallSite { } } +// TODO(akashin): Figure out the correct clobbering convention. const CALLEE_SAVE_X_REG: [bool; 32] = [ - false, false, true, false, false, false, false, false, // 0-7 - true, true, false, false, false, false, false, false, // 8-15 - false, false, true, true, true, true, true, true, // 16-23 - true, true, true, true, false, false, false, false, // 24-31 + false, false, false, false, false, false, false, false, // 0-7 + false, false, false, false, false, false, false, false, // 8-15 + false, false, false, false, false, false, false, false, // 16-23 + false, false, false, false, false, false, false, false, // 24-31 ]; const CALLEE_SAVE_F_REG: [bool; 32] = [ false, false, false, false, false, false, false, false, // 0-7 From 07428459915e7fbb5b61efca5412f060f8d729f3 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Wed, 30 Aug 2023 10:48:18 +0100 Subject: [PATCH 42/68] Hack extend implementation --- cranelift/codegen/src/isa/zkasm/inst.isle | 15 +++++--- cranelift/codegen/src/isa/zkasm/inst/args.rs | 36 ++++++++++---------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst.isle b/cranelift/codegen/src/isa/zkasm/inst.isle index e3a6fd1508b1..164b786f8779 100644 --- a/cranelift/codegen/src/isa/zkasm/inst.isle +++ b/cranelift/codegen/src/isa/zkasm/inst.isle @@ -1942,12 +1942,17 @@ ;; In the most generic case, we shift left and then shift right. ;; The type of right shift is determined by the extend op. +; (rule 0 (extend val extend_op (fits_in_32 from_ty) (fits_in_64 to_ty)) +; (let ((val XReg (value_regs_get val 0)) +; (shift Imm12 (imm_from_bits (u64_sub 64 (ty_bits from_ty)))) +; (left XReg (rv_slli val shift)) +; (shift_op AluOPRRI (extend_shift_op extend_op)) +; (right XReg (alu_rr_imm12 shift_op left shift))) +; right)) + +;; Hacky no-op version. (rule 0 (extend val extend_op (fits_in_32 from_ty) (fits_in_64 to_ty)) - (let ((val XReg (value_regs_get val 0)) - (shift Imm12 (imm_from_bits (u64_sub 64 (ty_bits from_ty)))) - (left XReg (rv_slli val shift)) - (shift_op AluOPRRI (extend_shift_op extend_op)) - (right XReg (alu_rr_imm12 shift_op left shift))) + (let ((right XReg (value_regs_get val 0))) right)) ;; If we are zero extending a U8 we can use a `andi` instruction. diff --git a/cranelift/codegen/src/isa/zkasm/inst/args.rs b/cranelift/codegen/src/isa/zkasm/inst/args.rs index 86f8aba0f6a0..997cc9aab185 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/args.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/args.rs @@ -1755,24 +1755,24 @@ impl FloatSelectOP { } } // move qnan bits into int register. - pub(crate) fn snan_bits(self, rd: Writable, ty: Type) -> SmallInstVec { - let mut insts = SmallInstVec::new(); - insts.push(Inst::load_imm12(rd, Imm12::from_bits(-1))); - let x = if ty == F32 { 22 } else { 51 }; - insts.push(Inst::AluRRImm12 { - alu_op: AluOPRRI::Srli, - rd: rd, - rs: rd.to_reg(), - imm12: Imm12::from_bits(x), - }); - insts.push(Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd: rd, - rs: rd.to_reg(), - imm12: Imm12::from_bits(x), - }); - insts - } + // pub(crate) fn snan_bits(self, rd: Writable, ty: Type) -> SmallInstVec { + // let mut insts = SmallInstVec::new(); + // insts.push(Inst::load_imm12(rd, Imm12::from_bits(-1))); + // let x = if ty == F32 { 22 } else { 51 }; + // insts.push(Inst::AluRRImm12 { + // alu_op: AluOPRRI::Srli, + // rd: rd, + // rs: rd.to_reg(), + // imm12: Imm12::from_bits(x), + // }); + // insts.push(Inst::AluRRImm12 { + // alu_op: AluOPRRI::Slli, + // rd: rd, + // rs: rd.to_reg(), + // imm12: Imm12::from_bits(x), + // }); + // insts + // } } pub(crate) fn f32_bits(f: f32) -> u32 { From 5edf8ffb26a9ace115b572fe9d2d07f9142990fb Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Wed, 30 Aug 2023 12:59:22 +0100 Subject: [PATCH 43/68] Format abi.rs --- cranelift/codegen/src/isa/zkasm/abi.rs | 246 ++++++++++++------------- 1 file changed, 123 insertions(+), 123 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/abi.rs b/cranelift/codegen/src/isa/zkasm/abi.rs index 9d1f0a01f5fd..c534da28134a 100644 --- a/cranelift/codegen/src/isa/zkasm/abi.rs +++ b/cranelift/codegen/src/isa/zkasm/abi.rs @@ -766,61 +766,61 @@ const fn default_clobbers() -> PRegSet { .with(px_reg(29)) .with(px_reg(30)) .with(px_reg(31)) - // F Regs - // .with(pf_reg(0)) - // .with(pf_reg(1)) - // .with(pf_reg(2)) - // .with(pf_reg(3)) - // .with(pf_reg(4)) - // .with(pf_reg(5)) - // .with(pf_reg(6)) - // .with(pf_reg(7)) - // .with(pf_reg(9)) - // .with(pf_reg(10)) - // .with(pf_reg(11)) - // .with(pf_reg(12)) - // .with(pf_reg(13)) - // .with(pf_reg(14)) - // .with(pf_reg(15)) - // .with(pf_reg(16)) - // .with(pf_reg(17)) - // .with(pf_reg(28)) - // .with(pf_reg(29)) - // .with(pf_reg(30)) - // .with(pf_reg(31)) - // V Regs - All vector regs get clobbered - // .with(pv_reg(0)) - // .with(pv_reg(1)) - // .with(pv_reg(2)) - // .with(pv_reg(3)) - // .with(pv_reg(4)) - // .with(pv_reg(5)) - // .with(pv_reg(6)) - // .with(pv_reg(7)) - // .with(pv_reg(8)) - // .with(pv_reg(9)) - // .with(pv_reg(10)) - // .with(pv_reg(11)) - // .with(pv_reg(12)) - // .with(pv_reg(13)) - // .with(pv_reg(14)) - // .with(pv_reg(15)) - // .with(pv_reg(16)) - // .with(pv_reg(17)) - // .with(pv_reg(18)) - // .with(pv_reg(19)) - // .with(pv_reg(20)) - // .with(pv_reg(21)) - // .with(pv_reg(22)) - // .with(pv_reg(23)) - // .with(pv_reg(24)) - // .with(pv_reg(25)) - // .with(pv_reg(26)) - // .with(pv_reg(27)) - // .with(pv_reg(28)) - // .with(pv_reg(29)) - // .with(pv_reg(30)) - // .with(pv_reg(31)) + // F Regs + // .with(pf_reg(0)) + // .with(pf_reg(1)) + // .with(pf_reg(2)) + // .with(pf_reg(3)) + // .with(pf_reg(4)) + // .with(pf_reg(5)) + // .with(pf_reg(6)) + // .with(pf_reg(7)) + // .with(pf_reg(9)) + // .with(pf_reg(10)) + // .with(pf_reg(11)) + // .with(pf_reg(12)) + // .with(pf_reg(13)) + // .with(pf_reg(14)) + // .with(pf_reg(15)) + // .with(pf_reg(16)) + // .with(pf_reg(17)) + // .with(pf_reg(28)) + // .with(pf_reg(29)) + // .with(pf_reg(30)) + // .with(pf_reg(31)) + // V Regs - All vector regs get clobbered + // .with(pv_reg(0)) + // .with(pv_reg(1)) + // .with(pv_reg(2)) + // .with(pv_reg(3)) + // .with(pv_reg(4)) + // .with(pv_reg(5)) + // .with(pv_reg(6)) + // .with(pv_reg(7)) + // .with(pv_reg(8)) + // .with(pv_reg(9)) + // .with(pv_reg(10)) + // .with(pv_reg(11)) + // .with(pv_reg(12)) + // .with(pv_reg(13)) + // .with(pv_reg(14)) + // .with(pv_reg(15)) + // .with(pv_reg(16)) + // .with(pv_reg(17)) + // .with(pv_reg(18)) + // .with(pv_reg(19)) + // .with(pv_reg(20)) + // .with(pv_reg(21)) + // .with(pv_reg(22)) + // .with(pv_reg(23)) + // .with(pv_reg(24)) + // .with(pv_reg(25)) + // .with(pv_reg(26)) + // .with(pv_reg(27)) + // .with(pv_reg(28)) + // .with(pv_reg(29)) + // .with(pv_reg(30)) + // .with(pv_reg(31)) } const DEFAULT_CLOBBERS: PRegSet = default_clobbers(); @@ -859,74 +859,74 @@ const fn tail_clobbers() -> PRegSet { .with(px_reg(27)) .with(px_reg(28)) .with(px_reg(29)) - // `x30` and `x31` are reserved as scratch registers, and are not - // allocatable. - // - // F Regs - // .with(pf_reg(0)) - // .with(pf_reg(1)) - // .with(pf_reg(2)) - // .with(pf_reg(3)) - // .with(pf_reg(4)) - // .with(pf_reg(5)) - // .with(pf_reg(6)) - // .with(pf_reg(7)) - // .with(pf_reg(9)) - // .with(pf_reg(10)) - // .with(pf_reg(11)) - // .with(pf_reg(12)) - // .with(pf_reg(13)) - // .with(pf_reg(14)) - // .with(pf_reg(15)) - // .with(pf_reg(16)) - // .with(pf_reg(17)) - // .with(pf_reg(18)) - // .with(pf_reg(19)) - // .with(pf_reg(20)) - // .with(pf_reg(21)) - // .with(pf_reg(22)) - // .with(pf_reg(23)) - // .with(pf_reg(24)) - // .with(pf_reg(25)) - // .with(pf_reg(26)) - // .with(pf_reg(27)) - // .with(pf_reg(28)) - // .with(pf_reg(29)) - // .with(pf_reg(30)) - // .with(pf_reg(31)) - // V Regs - // .with(pv_reg(0)) - // .with(pv_reg(1)) - // .with(pv_reg(2)) - // .with(pv_reg(3)) - // .with(pv_reg(4)) - // .with(pv_reg(5)) - // .with(pv_reg(6)) - // .with(pv_reg(7)) - // .with(pv_reg(8)) - // .with(pv_reg(9)) - // .with(pv_reg(10)) - // .with(pv_reg(11)) - // .with(pv_reg(12)) - // .with(pv_reg(13)) - // .with(pv_reg(14)) - // .with(pv_reg(15)) - // .with(pv_reg(16)) - // .with(pv_reg(17)) - // .with(pv_reg(18)) - // .with(pv_reg(19)) - // .with(pv_reg(20)) - // .with(pv_reg(21)) - // .with(pv_reg(22)) - // .with(pv_reg(23)) - // .with(pv_reg(24)) - // .with(pv_reg(25)) - // .with(pv_reg(26)) - // .with(pv_reg(27)) - // .with(pv_reg(28)) - // .with(pv_reg(29)) - // .with(pv_reg(30)) - // .with(pv_reg(31)) + // `x30` and `x31` are reserved as scratch registers, and are not + // allocatable. + // + // F Regs + // .with(pf_reg(0)) + // .with(pf_reg(1)) + // .with(pf_reg(2)) + // .with(pf_reg(3)) + // .with(pf_reg(4)) + // .with(pf_reg(5)) + // .with(pf_reg(6)) + // .with(pf_reg(7)) + // .with(pf_reg(9)) + // .with(pf_reg(10)) + // .with(pf_reg(11)) + // .with(pf_reg(12)) + // .with(pf_reg(13)) + // .with(pf_reg(14)) + // .with(pf_reg(15)) + // .with(pf_reg(16)) + // .with(pf_reg(17)) + // .with(pf_reg(18)) + // .with(pf_reg(19)) + // .with(pf_reg(20)) + // .with(pf_reg(21)) + // .with(pf_reg(22)) + // .with(pf_reg(23)) + // .with(pf_reg(24)) + // .with(pf_reg(25)) + // .with(pf_reg(26)) + // .with(pf_reg(27)) + // .with(pf_reg(28)) + // .with(pf_reg(29)) + // .with(pf_reg(30)) + // .with(pf_reg(31)) + // V Regs + // .with(pv_reg(0)) + // .with(pv_reg(1)) + // .with(pv_reg(2)) + // .with(pv_reg(3)) + // .with(pv_reg(4)) + // .with(pv_reg(5)) + // .with(pv_reg(6)) + // .with(pv_reg(7)) + // .with(pv_reg(8)) + // .with(pv_reg(9)) + // .with(pv_reg(10)) + // .with(pv_reg(11)) + // .with(pv_reg(12)) + // .with(pv_reg(13)) + // .with(pv_reg(14)) + // .with(pv_reg(15)) + // .with(pv_reg(16)) + // .with(pv_reg(17)) + // .with(pv_reg(18)) + // .with(pv_reg(19)) + // .with(pv_reg(20)) + // .with(pv_reg(21)) + // .with(pv_reg(22)) + // .with(pv_reg(23)) + // .with(pv_reg(24)) + // .with(pv_reg(25)) + // .with(pv_reg(26)) + // .with(pv_reg(27)) + // .with(pv_reg(28)) + // .with(pv_reg(29)) + // .with(pv_reg(30)) + // .with(pv_reg(31)) } const TAIL_CLOBBERS: PRegSet = tail_clobbers(); From 557d5ead7657ecfd5f958b648fbcff9428fc0bc4 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Wed, 30 Aug 2023 16:06:58 +0100 Subject: [PATCH 44/68] Add results of compilation to git --- cranelift/data/add.zkasm | 23 +++++++++++++++ cranelift/data/add_func.zkasm | 37 ++++++++++++++++++++++++ cranelift/data/counter.zkasm | 35 +++++++++++++++++++++++ cranelift/data/fibonacci.zkasm | 45 ++++++++++++++++++++++++++++++ cranelift/data/locals.zkasm | 23 +++++++++++++++ cranelift/data/locals_simple.zkasm | 23 +++++++++++++++ cranelift/data/simple_locals.zkasm | 0 7 files changed, 186 insertions(+) create mode 100644 cranelift/data/add.zkasm create mode 100644 cranelift/data/add_func.zkasm create mode 100644 cranelift/data/counter.zkasm create mode 100644 cranelift/data/fibonacci.zkasm create mode 100644 cranelift/data/locals.zkasm create mode 100644 cranelift/data/locals_simple.zkasm create mode 100644 cranelift/data/simple_locals.zkasm diff --git a/cranelift/data/add.zkasm b/cranelift/data/add.zkasm new file mode 100644 index 000000000000..8211e8144920 --- /dev/null +++ b/cranelift/data/add.zkasm @@ -0,0 +1,23 @@ +start: + zkPC + 2 => RR + :JMP(function_0) + :JMP(finalizeExecution) +function_0: + SP + 1 => SP + RR :MSTORE(SP) +L0: + C => E + 2 + 3 => C + 0 + 5 => D + C => A + D :ASSERT + :JMP(L1) +L1: + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) + diff --git a/cranelift/data/add_func.zkasm b/cranelift/data/add_func.zkasm new file mode 100644 index 000000000000..d0dea0f151fc --- /dev/null +++ b/cranelift/data/add_func.zkasm @@ -0,0 +1,37 @@ +start: + zkPC + 2 => RR + :JMP(function_0) + :JMP(finalizeExecution) +function_0: + SP + 1 => SP + RR :MSTORE(SP) +L0: + C => s2 + 0 + 2 => C + 0 + 3 => D + s2 => E + C => A + D :ASSERT + 0 + 5 => D + s2 => E + C => A + D :ASSERT + :JMP(L1) +L1: + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +function_1: +L0: + C => A + D => B + $ => C :ADD + :JMP(L1) +L1: + :JMP(RR) + +finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) + diff --git a/cranelift/data/counter.zkasm b/cranelift/data/counter.zkasm new file mode 100644 index 000000000000..e4b7ab5b64a8 --- /dev/null +++ b/cranelift/data/counter.zkasm @@ -0,0 +1,35 @@ +start: + zkPC + 2 => RR + :JMP(function_0) + :JMP(finalizeExecution) +function_0: + SP + 1 => SP + RR :MSTORE(SP) +L0: + C => E + 0 + 0 => A + :JMP(L1) +L1: + 0 + 1 => B + $ => C :ADD + 0 + 10 => a7 + ValueRegs { parts: [p5i, v2097151] }, ValueRegs { parts: [p17i, v2097151] } => a7 :CMP + a7 :JMPZ(L3) + :JMP(L2) +L2: + C => A + :JMP(L1) +L3: + 0 + 10 => D + C => A + D :ASSERT + :JMP(L4) +L4: + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) + diff --git a/cranelift/data/fibonacci.zkasm b/cranelift/data/fibonacci.zkasm new file mode 100644 index 000000000000..508fa888e793 --- /dev/null +++ b/cranelift/data/fibonacci.zkasm @@ -0,0 +1,45 @@ +start: + zkPC + 2 => RR + :JMP(function_0) + :JMP(finalizeExecution) +function_0: + SP + 1 => SP + RR :MSTORE(SP) +L0: + C => a2 + 0 + 0 => A + A => a3 + 0 + 0 => A + 0 + 1 => B + B => a4 + :JMP(L1) +L1: + $ => C :ADD + B => a4 + 0 + 1 => B + a3 => A + $ => A :ADD + 0 + 10 => E + ValueRegs { parts: [p10i, v2097151] }, ValueRegs { parts: [p7i, v2097151] } => E :CMP + E :JMPZ(L3) + :JMP(L2) +L2: + A => a3 + C => B + a4 => A + :JMP(L1) +L3: + 0 + 89 => D + a2 => E + C => A + D :ASSERT + :JMP(L4) +L4: + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) + diff --git a/cranelift/data/locals.zkasm b/cranelift/data/locals.zkasm new file mode 100644 index 000000000000..8211e8144920 --- /dev/null +++ b/cranelift/data/locals.zkasm @@ -0,0 +1,23 @@ +start: + zkPC + 2 => RR + :JMP(function_0) + :JMP(finalizeExecution) +function_0: + SP + 1 => SP + RR :MSTORE(SP) +L0: + C => E + 2 + 3 => C + 0 + 5 => D + C => A + D :ASSERT + :JMP(L1) +L1: + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) + diff --git a/cranelift/data/locals_simple.zkasm b/cranelift/data/locals_simple.zkasm new file mode 100644 index 000000000000..510ea91dd35e --- /dev/null +++ b/cranelift/data/locals_simple.zkasm @@ -0,0 +1,23 @@ +start: + zkPC + 2 => RR + :JMP(function_0) + :JMP(finalizeExecution) +function_0: + SP + 1 => SP + RR :MSTORE(SP) +L0: + C => E + 0 + 2 => C + 0 + 2 => D + C => A + D :ASSERT + :JMP(L1) +L1: + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) + diff --git a/cranelift/data/simple_locals.zkasm b/cranelift/data/simple_locals.zkasm new file mode 100644 index 000000000000..e69de29bb2d1 From db6794606cbf8e14cc21379264ea2988c499b1ea Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Wed, 30 Aug 2023 16:08:46 +0100 Subject: [PATCH 45/68] Remove empty file --- cranelift/data/simple_locals.zkasm | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 cranelift/data/simple_locals.zkasm diff --git a/cranelift/data/simple_locals.zkasm b/cranelift/data/simple_locals.zkasm deleted file mode 100644 index e69de29bb2d1..000000000000 From 6b2c993f3b7e9dc50d8bf99838d23703cd1f955b Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Thu, 31 Aug 2023 10:38:39 +0100 Subject: [PATCH 46/68] Print code conditionally --- cranelift/src/wasm.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs index 5a8b24daeef9..2aae2248e394 100644 --- a/cranelift/src/wasm.rs +++ b/cranelift/src/wasm.rs @@ -266,10 +266,9 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - .map_err(|err| anyhow::anyhow!("{}", pretty_error(&err.func, err.inner)))?; let code_info = compiled_code.code_info(); - println!( - "{}", - std::str::from_utf8(compiled_code.code_buffer()).unwrap() - ); + if let Ok(code) = std::str::from_utf8(compiled_code.code_buffer()) { + println!("{code}",); + } if options.print_size { println!( From 3ff0f246f4805d56cc838c476a2ea0c97315c579 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Thu, 31 Aug 2023 11:28:33 +0100 Subject: [PATCH 47/68] Handle VMContext register --- cranelift/codegen/src/isa/zkasm/abi.rs | 18 +++++++++++++++++- cranelift/codegen/src/isa/zkasm/inst/mod.rs | 3 ++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/abi.rs b/cranelift/codegen/src/isa/zkasm/abi.rs index c534da28134a..739391a15341 100644 --- a/cranelift/codegen/src/isa/zkasm/abi.rs +++ b/cranelift/codegen/src/isa/zkasm/abi.rs @@ -103,7 +103,7 @@ impl ABIMachineSpec for Riscv64MachineDeps { // All registers that can be used as parameters or rets. // both start and end are included. let (x_start, x_end, f_start, f_end) = match (call_conv, args_or_rets) { - (isa::CallConv::Tail, _) => (9, 29, 0, 31), + (isa::CallConv::Tail, _) => (5, 7, 5, 7), (_, ArgsOrRets::Args) => (5, 7, 5, 7), (_, ArgsOrRets::Rets) => (5, 7, 5, 7), }; @@ -126,6 +126,22 @@ impl ABIMachineSpec for Riscv64MachineDeps { continue; } + // TODO(akashin): Figure out how to properly handle VMContext register. + // For now we pin it to `CTX` register of ZK ASM. + if let ir::ArgumentPurpose::VMContext = param.purpose { + let mut slots = ABIArgSlotVec::new(); + slots.push(ABIArgSlot::Reg { + reg: x_reg(12).to_real_reg().unwrap(), + ty: I32, + extension: param.extension, + }); + args.push(ABIArg::Slots { + slots, + purpose: param.purpose, + }); + continue; + } + // Find regclass(es) of the register(s) used to store a value of this type. let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?; let mut slots = ABIArgSlotVec::new(); diff --git a/cranelift/codegen/src/isa/zkasm/inst/mod.rs b/cranelift/codegen/src/isa/zkasm/inst/mod.rs index 71d6043c421c..3b68000399fe 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/mod.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/mod.rs @@ -1024,7 +1024,8 @@ pub fn reg_name(reg: Reg) -> String { 9 => "s1".into(), 10 => "A".into(), 11 => "B".into(), - 12..=17 => format!("a{}", real.hw_enc() - 10), + 12 => "CTX".into(), + 13..=17 => format!("a{}", real.hw_enc() - 10), 18..=27 => format!("s{}", real.hw_enc() - 16), 28..=31 => format!("t{}", real.hw_enc() - 25), _ => unreachable!(), From 7256201da431f1520610ede88a8f885f5d8a5a8f Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Thu, 31 Aug 2023 12:11:37 +0100 Subject: [PATCH 48/68] Add a function for context_register --- cranelift/codegen/src/isa/zkasm/abi.rs | 5 ++--- cranelift/codegen/src/isa/zkasm/inst/regs.rs | 6 ++++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/abi.rs b/cranelift/codegen/src/isa/zkasm/abi.rs index 739391a15341..fadab7d650cf 100644 --- a/cranelift/codegen/src/isa/zkasm/abi.rs +++ b/cranelift/codegen/src/isa/zkasm/abi.rs @@ -126,12 +126,11 @@ impl ABIMachineSpec for Riscv64MachineDeps { continue; } - // TODO(akashin): Figure out how to properly handle VMContext register. - // For now we pin it to `CTX` register of ZK ASM. + // For now we pin VMContext register to `CTX` register of ZK ASM. if let ir::ArgumentPurpose::VMContext = param.purpose { let mut slots = ABIArgSlotVec::new(); slots.push(ABIArgSlot::Reg { - reg: x_reg(12).to_real_reg().unwrap(), + reg: context_reg().to_real_reg().unwrap(), ty: I32, extension: param.extension, }); diff --git a/cranelift/codegen/src/isa/zkasm/inst/regs.rs b/cranelift/codegen/src/isa/zkasm/inst/regs.rs index 33777caf2629..f99de3620c60 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/regs.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/regs.rs @@ -80,6 +80,7 @@ pub fn zero_reg() -> Reg { pub fn writable_zero_reg() -> Writable { Writable::from_reg(zero_reg()) } + #[inline] pub fn stack_reg() -> Reg { x_reg(2) @@ -102,6 +103,11 @@ pub fn writable_link_reg() -> Writable { Writable::from_reg(link_reg()) } +/// Get a reference to the context register (CTX). +pub fn context_reg() -> Reg { + x_reg(12) +} + /// Get a reference to the frame pointer (x29). #[inline] pub fn fp_reg() -> Reg { From d6ddbf838b71cf5382c94feb57a565cf8a6ca567 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Thu, 31 Aug 2023 12:30:51 +0100 Subject: [PATCH 49/68] Support Icmp instruction --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 28 +++++++++++++++++--- cranelift/codegen/src/isa/zkasm/inst/mod.rs | 15 +++++++++-- cranelift/codegen/src/isa/zkasm/inst/regs.rs | 2 +- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index af30d47d0f90..7140a17fbca0 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -1520,10 +1520,30 @@ impl MachInstEmit for Inst { let a = alloc_value_regs(a, &mut allocs); let b = alloc_value_regs(b, &mut allocs); let rd = allocs.next_writable(rd); - put_string( - &format!("{:?}, {:?} => {} :CMP\n", a, b, reg_name(rd.to_reg())), - sink, - ); + + let a = a + .only_reg() + .expect("Only support 1 register in comparison now"); + let b = b + .only_reg() + .expect("Only support 1 register in comparison now"); + debug_assert_eq!(a, a0()); + debug_assert_eq!(b, b0()); + + let opcode = match cc { + IntCC::Equal => "EQ", + IntCC::NotEqual => "NEQ", + IntCC::SignedLessThan => "SLT", + IntCC::SignedGreaterThanOrEqual => todo!(), + IntCC::SignedGreaterThan => todo!(), + IntCC::SignedLessThanOrEqual => todo!(), + IntCC::UnsignedLessThan => "LT", + IntCC::UnsignedGreaterThanOrEqual => todo!(), + IntCC::UnsignedGreaterThan => todo!(), + IntCC::UnsignedLessThanOrEqual => todo!(), + }; + + put_string(&format!("$ => {} :{opcode}\n", reg_name(rd.to_reg())), sink); /* let label_true = sink.get_label(); diff --git a/cranelift/codegen/src/isa/zkasm/inst/mod.rs b/cranelift/codegen/src/isa/zkasm/inst/mod.rs index 3b68000399fe..4892bd01312a 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/mod.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/mod.rs @@ -568,8 +568,19 @@ fn zkasm_get_operands VReg>(inst: &Inst, collector: &mut OperandC } &Inst::Icmp { rd, a, b, .. } => { - collector.reg_uses(a.regs()); - collector.reg_uses(b.regs()); + // TODO(akashin): Why would Icmp have multiple input registers? + // collector.reg_uses(a.regs()); + // collector.reg_uses(b.regs()); + collector.reg_fixed_use( + a.only_reg() + .expect("Only support 1 register in comparison now"), + a0(), + ); + collector.reg_fixed_use( + b.only_reg() + .expect("Only support 1 register in comparison now"), + b0(), + ); collector.reg_def(rd); } diff --git a/cranelift/codegen/src/isa/zkasm/inst/regs.rs b/cranelift/codegen/src/isa/zkasm/inst/regs.rs index f99de3620c60..7f76b9baa070 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/regs.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/regs.rs @@ -149,7 +149,7 @@ pub fn crate_reg_eviroment(_flags: &settings::Flags) -> MachineEnv { let preferred_regs_by_class: [Vec; 3] = { // Registers are A, B, C, D, E. let x_registers: Vec = (5..=7) - .chain(10..20) + .chain(10..=12) .map(|i| PReg::new(i, RegClass::Int)) .collect(); From 6f18a583e6934c9403f1e9983b21ccc77ee59cf7 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Thu, 31 Aug 2023 12:36:00 +0100 Subject: [PATCH 50/68] Update generated zkasm with conditionals --- cranelift/data/add.zkasm | 1 - cranelift/data/add_func.zkasm | 8 +++++--- cranelift/data/counter.zkasm | 10 +++++----- cranelift/data/fibonacci.zkasm | 20 +++++++++----------- cranelift/data/gen.sh | 8 ++++++++ cranelift/data/locals.zkasm | 1 - cranelift/data/locals_simple.zkasm | 1 - 7 files changed, 27 insertions(+), 22 deletions(-) create mode 100755 cranelift/data/gen.sh diff --git a/cranelift/data/add.zkasm b/cranelift/data/add.zkasm index 8211e8144920..74a7274eeb0d 100644 --- a/cranelift/data/add.zkasm +++ b/cranelift/data/add.zkasm @@ -6,7 +6,6 @@ function_0: SP + 1 => SP RR :MSTORE(SP) L0: - C => E 2 + 3 => C 0 + 5 => D C => A diff --git a/cranelift/data/add_func.zkasm b/cranelift/data/add_func.zkasm index d0dea0f151fc..e252c45ccf25 100644 --- a/cranelift/data/add_func.zkasm +++ b/cranelift/data/add_func.zkasm @@ -5,19 +5,21 @@ start: function_0: SP + 1 => SP RR :MSTORE(SP) + SP + 1 => SP L0: - C => s2 + CTX :MSTORE(SP) 0 + 2 => C 0 + 3 => D - s2 => E + $ => CTX :MLOAD(SP) C => A D :ASSERT 0 + 5 => D - s2 => E + $ => CTX :MLOAD(SP) C => A D :ASSERT :JMP(L1) L1: + SP - 1 => SP $ => RR :MLOAD(SP) SP - 1 => SP :JMP(RR) diff --git a/cranelift/data/counter.zkasm b/cranelift/data/counter.zkasm index e4b7ab5b64a8..1bb0733cd5c0 100644 --- a/cranelift/data/counter.zkasm +++ b/cranelift/data/counter.zkasm @@ -6,15 +6,15 @@ function_0: SP + 1 => SP RR :MSTORE(SP) L0: - C => E 0 + 0 => A :JMP(L1) L1: 0 + 1 => B - $ => C :ADD - 0 + 10 => a7 - ValueRegs { parts: [p5i, v2097151] }, ValueRegs { parts: [p17i, v2097151] } => a7 :CMP - a7 :JMPZ(L3) + $ => A :ADD + 0 + 10 => B + $ => B :EQ + A => C + B :JMPZ(L3) :JMP(L2) L2: C => A diff --git a/cranelift/data/fibonacci.zkasm b/cranelift/data/fibonacci.zkasm index 508fa888e793..d83968e931aa 100644 --- a/cranelift/data/fibonacci.zkasm +++ b/cranelift/data/fibonacci.zkasm @@ -6,31 +6,29 @@ function_0: SP + 1 => SP RR :MSTORE(SP) L0: - C => a2 0 + 0 => A - A => a3 + A => D 0 + 0 => A 0 + 1 => B - B => a4 + B => E :JMP(L1) L1: $ => C :ADD - B => a4 + B => E 0 + 1 => B - a3 => A + D => A $ => A :ADD - 0 + 10 => E - ValueRegs { parts: [p10i, v2097151] }, ValueRegs { parts: [p7i, v2097151] } => E :CMP - E :JMPZ(L3) + 0 + 10 => B + $ => B :EQ + B :JMPZ(L3) :JMP(L2) L2: - A => a3 C => B - a4 => A + A => D + E => A :JMP(L1) L3: 0 + 89 => D - a2 => E C => A D :ASSERT :JMP(L4) diff --git a/cranelift/data/gen.sh b/cranelift/data/gen.sh new file mode 100755 index 000000000000..f0d369202ab0 --- /dev/null +++ b/cranelift/data/gen.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +cargo build +for name in add counter add_func fibonacci locals locals_simple +do + echo $name; + ../target/debug/clif-util wasm --target sparc-unknown-unknown ../../zkwasm/data/$name.wat > data/$name.zkasm +done diff --git a/cranelift/data/locals.zkasm b/cranelift/data/locals.zkasm index 8211e8144920..74a7274eeb0d 100644 --- a/cranelift/data/locals.zkasm +++ b/cranelift/data/locals.zkasm @@ -6,7 +6,6 @@ function_0: SP + 1 => SP RR :MSTORE(SP) L0: - C => E 2 + 3 => C 0 + 5 => D C => A diff --git a/cranelift/data/locals_simple.zkasm b/cranelift/data/locals_simple.zkasm index 510ea91dd35e..874a6f7836d2 100644 --- a/cranelift/data/locals_simple.zkasm +++ b/cranelift/data/locals_simple.zkasm @@ -6,7 +6,6 @@ function_0: SP + 1 => SP RR :MSTORE(SP) L0: - C => E 0 + 2 => C 0 + 2 => D C => A From 33748c6fa5afdded8a291ca6c8adf6ec9a822fb7 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Thu, 31 Aug 2023 12:52:55 +0100 Subject: [PATCH 51/68] Fix inversion in jump comparison --- cranelift/codegen/src/isa/zkasm/inst/args.rs | 6 +++--- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 6 ++++-- cranelift/data/counter.zkasm | 2 +- cranelift/data/fibonacci.zkasm | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/args.rs b/cranelift/codegen/src/isa/zkasm/inst/args.rs index 997cc9aab185..269eb8555006 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/args.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/args.rs @@ -273,13 +273,13 @@ impl IntegerCompare { #[inline] pub(crate) fn op_name(&self) -> &'static str { match self.kind { - IntCC::Equal => "beq", + IntCC::Equal => "EQ", IntCC::NotEqual => "bne", - IntCC::SignedLessThan => "blt", + IntCC::SignedLessThan => "SLT", IntCC::SignedGreaterThanOrEqual => "bge", IntCC::SignedGreaterThan => "bgt", IntCC::SignedLessThanOrEqual => "ble", - IntCC::UnsignedLessThan => "bltu", + IntCC::UnsignedLessThan => "LT", IntCC::UnsignedGreaterThanOrEqual => "bgeu", IntCC::UnsignedGreaterThan => "bgtu", IntCC::UnsignedLessThanOrEqual => "bleu", diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 7140a17fbca0..3099e7d3558b 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -1098,11 +1098,13 @@ impl MachInstEmit for Inst { } => { kind.rs1 = allocs.next(kind.rs1); kind.rs2 = allocs.next(kind.rs2); - debug_assert_eq!(kind.rs2, zero_reg()); + // TODO(akashin): Support other types of comparisons. + assert!(matches!(kind.kind, IntCC::NotEqual)); + assert_eq!(kind.rs2, zero_reg()); match taken { BranchTarget::Label(label) => { put_string( - &format!("{} :JMPZ(L{})\n", reg_name(kind.rs1), label.index()), + &format!("{} :JMPNZ(L{})\n", reg_name(kind.rs1), label.index()), sink, ); diff --git a/cranelift/data/counter.zkasm b/cranelift/data/counter.zkasm index 1bb0733cd5c0..3df91a869f17 100644 --- a/cranelift/data/counter.zkasm +++ b/cranelift/data/counter.zkasm @@ -14,7 +14,7 @@ L1: 0 + 10 => B $ => B :EQ A => C - B :JMPZ(L3) + B :JMPNZ(L3) :JMP(L2) L2: C => A diff --git a/cranelift/data/fibonacci.zkasm b/cranelift/data/fibonacci.zkasm index d83968e931aa..963f7f76ec53 100644 --- a/cranelift/data/fibonacci.zkasm +++ b/cranelift/data/fibonacci.zkasm @@ -20,7 +20,7 @@ L1: $ => A :ADD 0 + 10 => B $ => B :EQ - B :JMPZ(L3) + B :JMPNZ(L3) :JMP(L2) L2: C => B From 9d2bff72d383003dd1e54eec28cddf34b13197e0 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Mon, 4 Sep 2023 19:03:26 +0100 Subject: [PATCH 52/68] Implement function calls --- cranelift/codegen/src/isa/zkasm/abi.rs | 27 +--- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 141 +++++++++++-------- cranelift/data/add.zkasm | 7 +- cranelift/data/add_func.zkasm | 16 +-- cranelift/data/counter.zkasm | 7 +- cranelift/data/fibonacci.zkasm | 19 +-- cranelift/data/locals.zkasm | 7 +- cranelift/data/locals_simple.zkasm | 7 +- cranelift/src/wasm.rs | 29 +++- 9 files changed, 140 insertions(+), 120 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/abi.rs b/cranelift/codegen/src/isa/zkasm/abi.rs index fadab7d650cf..0b64ca72620b 100644 --- a/cranelift/codegen/src/isa/zkasm/abi.rs +++ b/cranelift/codegen/src/isa/zkasm/abi.rs @@ -103,9 +103,9 @@ impl ABIMachineSpec for Riscv64MachineDeps { // All registers that can be used as parameters or rets. // both start and end are included. let (x_start, x_end, f_start, f_end) = match (call_conv, args_or_rets) { - (isa::CallConv::Tail, _) => (5, 7, 5, 7), - (_, ArgsOrRets::Args) => (5, 7, 5, 7), - (_, ArgsOrRets::Rets) => (5, 7, 5, 7), + (isa::CallConv::Tail, _) => (10, 11, 0, 0), + (_, ArgsOrRets::Args) => (10, 11, 0, 0), + (_, ArgsOrRets::Rets) => (10, 11, 0, 0), }; let mut next_x_reg = x_start; let mut next_f_reg = f_start; @@ -511,7 +511,7 @@ impl ABIMachineSpec for Riscv64MachineDeps { ) -> SmallVec<[Self::I; 2]> { let mut insts = SmallVec::new(); match &dest { - &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push(Inst::Call { + &CallDest::ExtName(ref name, _) => insts.push(Inst::Call { info: Box::new(CallInfo { dest: name.clone(), uses, @@ -523,25 +523,6 @@ impl ABIMachineSpec for Riscv64MachineDeps { callee_pop_size, }), }), - &CallDest::ExtName(ref name, RelocDistance::Far) => { - insts.push(Inst::LoadExtName { - rd: tmp, - name: Box::new(name.clone()), - offset: 0, - }); - insts.push(Inst::CallInd { - info: Box::new(CallIndInfo { - rn: tmp.to_reg(), - uses, - defs, - clobbers, - opcode, - caller_callconv: caller_conv, - callee_callconv: callee_conv, - callee_pop_size, - }), - }); - } &CallDest::Reg(reg) => insts.push(Inst::CallInd { info: Box::new(CallIndInfo { rn: *reg, diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 3099e7d3558b..bd994c0406ba 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -921,78 +921,98 @@ impl MachInstEmit for Inst { } &Inst::Call { ref info } => { // call - todo!() /* match info.dest { - ExternalName::User { .. } => { - if info.opcode.is_call() { - sink.add_call_site(info.opcode); - } - sink.add_reloc(Reloc::RiscvCall, &info.dest, 0); - if let Some(s) = state.take_stack_map() { - sink.add_stack_map(StackMapExtent::UpcomingBytes(8), s); - } - Inst::construct_auipc_and_jalr( - Some(writable_link_reg()), - writable_link_reg(), - 0, - ) - .into_iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - } - ExternalName::LibCall(..) - | ExternalName::TestCase { .. } - | ExternalName::KnownSymbol(..) => { - // use indirect call. it is more simple. - // load ext name. - Inst::LoadExtName { - rd: writable_spilltmp_reg2(), - name: Box::new(info.dest.clone()), - offset: 0, - } - .emit(&[], sink, emit_info, state); - - if let Some(s) = state.take_stack_map() { - sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); - } - if info.opcode.is_call() { - sink.add_call_site(info.opcode); - } - // call - Inst::Jalr { - rd: writable_link_reg(), - base: spilltmp_reg2(), - offset: Imm12::zero(), - } - .emit(&[], sink, emit_info, state); - } - } + match info.dest { + ExternalName::User(name) => { + // For now we only support calls. + assert!(info.opcode.is_call()); + sink.add_call_site(info.opcode); + sink.add_reloc(Reloc::RiscvCall, &info.dest, 0); + // This will be patched externally to do a necessary jump. + put_string(&format!("; CALL {name}\n"), sink); + + // match name.index() { + // // Special case for ASSERT call. + // 0 => { + // Inst::Mov { + // ty: types::I64, + // rd: regs::writable_a0(), + // rm: info.uses[0].preg, + // } + // .emit(&[], sink, emit_info, state); + // put_string( + // &format!("{} :ASSERT\n", reg_name(info.uses[1].preg)), + // sink, + // ); + // } + // v => { + // Inst::Jal { + // dest: BranchTarget::Label(MachLabel::new(v)), + // } + // .emit(&[], sink, emit_info, state); + // } + // }; + + // if let Some(s) = state.take_stack_map() { + // sink.add_stack_map(StackMapExtent::UpcomingBytes(8), s); + // } + // Inst::construct_auipc_and_jalr( + // Some(writable_link_reg()), + // writable_link_reg(), + // 0, + // ) + // .into_iter() + // .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + ExternalName::LibCall(..) + | ExternalName::TestCase { .. } + | ExternalName::KnownSymbol(..) => { + unimplemented!(); + // use indirect call. it is more simple. + // load ext name. + // Inst::LoadExtName { + // rd: writable_spilltmp_reg2(), + // name: Box::new(info.dest.clone()), + // offset: 0, + // } + // .emit(&[], sink, emit_info, state); + // + // if let Some(s) = state.take_stack_map() { + // sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); + // } + // if info.opcode.is_call() { + // sink.add_call_site(info.opcode); + // } + // call + // Inst::Jalr { + // rd: writable_link_reg(), + // base: spilltmp_reg2(), + // offset: Imm12::zero(), + // } + // .emit(&[], sink, emit_info, state); + } + } - let callee_pop_size = i64::from(info.callee_pop_size); - state.virtual_sp_offset -= callee_pop_size; - trace!( - "call adjusts virtual sp offset by {callee_pop_size} -> {}", - state.virtual_sp_offset - ); */ + let callee_pop_size = i64::from(info.callee_pop_size); + state.virtual_sp_offset -= callee_pop_size; + trace!( + "call adjusts virtual sp offset by {callee_pop_size} -> {}", + state.virtual_sp_offset + ); } &Inst::CallInd { ref info } => { // let rn = allocs.next(info.rn); // put_string(&format!("CALL {}, {:?}\n", reg_name(rn), info.uses), sink); - Inst::Mov { - ty: types::I64, - rd: regs::writable_a0(), - rm: info.uses[0].preg, - } - .emit(&[], sink, emit_info, state); - put_string(&format!("{} :ASSERT\n", reg_name(info.uses[1].preg)), sink); + dbg!(info); + todo!(); + // For now we only support calls. + // assert!(info.opcode.is_call()); /* if let Some(s) = state.take_stack_map() { sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); } - if info.opcode.is_call() { - sink.add_call_site(info.opcode); - } Inst::Jalr { rd: writable_link_reg(), base: rn, @@ -2145,6 +2165,7 @@ impl MachInstEmit for Inst { ref name, offset, } => { + // dbg!(rd, name, offset); // let rd = allocs.next_writable(rd); // put_string(&format!("CALL {name:?} => {}\n", reg_name(rd.to_reg())), sink); diff --git a/cranelift/data/add.zkasm b/cranelift/data/add.zkasm index 74a7274eeb0d..2235140a57fe 100644 --- a/cranelift/data/add.zkasm +++ b/cranelift/data/add.zkasm @@ -6,10 +6,9 @@ function_0: SP + 1 => SP RR :MSTORE(SP) L0: - 2 + 3 => C - 0 + 5 => D - C => A - D :ASSERT + 2 + 3 => A + 0 + 5 => B + B :ASSERT :JMP(L1) L1: $ => RR :MLOAD(SP) diff --git a/cranelift/data/add_func.zkasm b/cranelift/data/add_func.zkasm index e252c45ccf25..a80067d32473 100644 --- a/cranelift/data/add_func.zkasm +++ b/cranelift/data/add_func.zkasm @@ -8,15 +8,13 @@ function_0: SP + 1 => SP L0: CTX :MSTORE(SP) - 0 + 2 => C - 0 + 3 => D + 0 + 2 => A + 0 + 3 => B $ => CTX :MLOAD(SP) - C => A - D :ASSERT - 0 + 5 => D + B :ASSERT + 0 + 5 => B $ => CTX :MLOAD(SP) - C => A - D :ASSERT + JMP(function_1) :JMP(L1) L1: SP - 1 => SP @@ -26,9 +24,7 @@ L1: function_1: L0: - C => A - D => B - $ => C :ADD + $ => A :ADD :JMP(L1) L1: :JMP(RR) diff --git a/cranelift/data/counter.zkasm b/cranelift/data/counter.zkasm index 3df91a869f17..87ac80ce0e74 100644 --- a/cranelift/data/counter.zkasm +++ b/cranelift/data/counter.zkasm @@ -13,16 +13,13 @@ L1: $ => A :ADD 0 + 10 => B $ => B :EQ - A => C B :JMPNZ(L3) :JMP(L2) L2: - C => A :JMP(L1) L3: - 0 + 10 => D - C => A - D :ASSERT + 0 + 10 => B + B :ASSERT :JMP(L4) L4: $ => RR :MLOAD(SP) diff --git a/cranelift/data/fibonacci.zkasm b/cranelift/data/fibonacci.zkasm index 963f7f76ec53..13a44391fe47 100644 --- a/cranelift/data/fibonacci.zkasm +++ b/cranelift/data/fibonacci.zkasm @@ -5,34 +5,37 @@ start: function_0: SP + 1 => SP RR :MSTORE(SP) + SP + 1 => SP L0: 0 + 0 => A A => D 0 + 0 => A 0 + 1 => B - B => E + B :MSTORE(SP) :JMP(L1) L1: - $ => C :ADD - B => E + $ => A :ADD + A => C + B :MSTORE(SP) 0 + 1 => B D => A $ => A :ADD 0 + 10 => B - $ => B :EQ - B :JMPNZ(L3) + $ => E :EQ + E :JMPNZ(L3) :JMP(L2) L2: C => B A => D - E => A + $ => A :MLOAD(SP) :JMP(L1) L3: - 0 + 89 => D + 0 + 89 => B C => A - D :ASSERT + B :ASSERT :JMP(L4) L4: + SP - 1 => SP $ => RR :MLOAD(SP) SP - 1 => SP :JMP(RR) diff --git a/cranelift/data/locals.zkasm b/cranelift/data/locals.zkasm index 74a7274eeb0d..2235140a57fe 100644 --- a/cranelift/data/locals.zkasm +++ b/cranelift/data/locals.zkasm @@ -6,10 +6,9 @@ function_0: SP + 1 => SP RR :MSTORE(SP) L0: - 2 + 3 => C - 0 + 5 => D - C => A - D :ASSERT + 2 + 3 => A + 0 + 5 => B + B :ASSERT :JMP(L1) L1: $ => RR :MLOAD(SP) diff --git a/cranelift/data/locals_simple.zkasm b/cranelift/data/locals_simple.zkasm index 874a6f7836d2..a825897bcd18 100644 --- a/cranelift/data/locals_simple.zkasm +++ b/cranelift/data/locals_simple.zkasm @@ -6,10 +6,9 @@ function_0: SP + 1 => SP RR :MSTORE(SP) L0: - 0 + 2 => C - 0 + 2 => D - C => A - D :ASSERT + 0 + 2 => A + 0 + 2 => B + B :ASSERT :JMP(L1) L1: $ => RR :MLOAD(SP) diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs index 2aae2248e394..b236a11ed350 100644 --- a/cranelift/src/wasm.rs +++ b/cranelift/src/wasm.rs @@ -10,6 +10,7 @@ use crate::disasm::print_all; use anyhow::{Context as _, Result}; use clap::Parser; +use cranelift_codegen::ir::ExternalName; use cranelift_codegen::print_errors::{pretty_error, pretty_verifier_error}; use cranelift_codegen::settings::FlagsOrIsa; use cranelift_codegen::timing; @@ -241,7 +242,8 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - .start_func .expect("Must have a start function"); println!(" zkPC + 2 => RR"); - // TODO(akashin): Figure out why we need to do -1 here. + // TODO(akashin): This is a poor translation between DefinedFuncIndex and FuncIndex. + // Ideally, we would use some library function for this. println!(" :JMP(function_{})", start_func.index() - 1); println!(" :JMP(finalizeExecution)"); @@ -265,8 +267,31 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - .compile_and_emit(isa, &mut mem, &mut Default::default()) .map_err(|err| anyhow::anyhow!("{}", pretty_error(&err.func, err.inner)))?; let code_info = compiled_code.code_info(); + let mut code_buffer = compiled_code.code_buffer().to_vec(); + let mut delta = 0i32; + for reloc in compiled_code.buffer.relocs() { + let start = (reloc.offset as i32 + delta) as usize; + let mut pos = start; + while code_buffer[pos] != b'\n' { + pos += 1; + delta -= 1; + } + + let code = if let ExternalName::User(name) = reloc.name { + if name.index() == 0 { + b" B :ASSERT".to_vec() + } else { + format!(" JMP(function_{})", name.index()).as_bytes().to_vec() + } + } else { + b" UNKNOWN".to_vec() + }; + delta += code.len() as i32; + + code_buffer.splice(start..pos, code); + } - if let Ok(code) = std::str::from_utf8(compiled_code.code_buffer()) { + if let Ok(code) = std::str::from_utf8(&code_buffer) { println!("{code}",); } From ee6bed4aad59f68e02e308a0b0d249675d9120dd Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Mon, 4 Sep 2023 20:58:03 +0100 Subject: [PATCH 53/68] Fix function indexing --- cranelift/data/add.zkasm | 4 ++-- cranelift/data/add_func.zkasm | 6 +++--- cranelift/data/counter.zkasm | 4 ++-- cranelift/data/fibonacci.zkasm | 4 ++-- cranelift/data/locals.zkasm | 4 ++-- cranelift/data/locals_simple.zkasm | 4 ++-- cranelift/src/wasm.rs | 6 ++---- 7 files changed, 15 insertions(+), 17 deletions(-) diff --git a/cranelift/data/add.zkasm b/cranelift/data/add.zkasm index 2235140a57fe..cbe5e153f19c 100644 --- a/cranelift/data/add.zkasm +++ b/cranelift/data/add.zkasm @@ -1,8 +1,8 @@ start: zkPC + 2 => RR - :JMP(function_0) + :JMP(function_1) :JMP(finalizeExecution) -function_0: +function_1: SP + 1 => SP RR :MSTORE(SP) L0: diff --git a/cranelift/data/add_func.zkasm b/cranelift/data/add_func.zkasm index a80067d32473..a48bfb3a0606 100644 --- a/cranelift/data/add_func.zkasm +++ b/cranelift/data/add_func.zkasm @@ -1,8 +1,8 @@ start: zkPC + 2 => RR - :JMP(function_0) + :JMP(function_1) :JMP(finalizeExecution) -function_0: +function_1: SP + 1 => SP RR :MSTORE(SP) SP + 1 => SP @@ -22,7 +22,7 @@ L1: SP - 1 => SP :JMP(RR) -function_1: +function_2: L0: $ => A :ADD :JMP(L1) diff --git a/cranelift/data/counter.zkasm b/cranelift/data/counter.zkasm index 87ac80ce0e74..2edf71973fd4 100644 --- a/cranelift/data/counter.zkasm +++ b/cranelift/data/counter.zkasm @@ -1,8 +1,8 @@ start: zkPC + 2 => RR - :JMP(function_0) + :JMP(function_1) :JMP(finalizeExecution) -function_0: +function_1: SP + 1 => SP RR :MSTORE(SP) L0: diff --git a/cranelift/data/fibonacci.zkasm b/cranelift/data/fibonacci.zkasm index 13a44391fe47..dd30bd570ead 100644 --- a/cranelift/data/fibonacci.zkasm +++ b/cranelift/data/fibonacci.zkasm @@ -1,8 +1,8 @@ start: zkPC + 2 => RR - :JMP(function_0) + :JMP(function_1) :JMP(finalizeExecution) -function_0: +function_1: SP + 1 => SP RR :MSTORE(SP) SP + 1 => SP diff --git a/cranelift/data/locals.zkasm b/cranelift/data/locals.zkasm index 2235140a57fe..cbe5e153f19c 100644 --- a/cranelift/data/locals.zkasm +++ b/cranelift/data/locals.zkasm @@ -1,8 +1,8 @@ start: zkPC + 2 => RR - :JMP(function_0) + :JMP(function_1) :JMP(finalizeExecution) -function_0: +function_1: SP + 1 => SP RR :MSTORE(SP) L0: diff --git a/cranelift/data/locals_simple.zkasm b/cranelift/data/locals_simple.zkasm index a825897bcd18..f691bcf76d22 100644 --- a/cranelift/data/locals_simple.zkasm +++ b/cranelift/data/locals_simple.zkasm @@ -1,8 +1,8 @@ start: zkPC + 2 => RR - :JMP(function_0) + :JMP(function_1) :JMP(finalizeExecution) -function_0: +function_1: SP + 1 => SP RR :MSTORE(SP) L0: diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs index b236a11ed350..43fed6d85c4e 100644 --- a/cranelift/src/wasm.rs +++ b/cranelift/src/wasm.rs @@ -242,20 +242,18 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - .start_func .expect("Must have a start function"); println!(" zkPC + 2 => RR"); - // TODO(akashin): This is a poor translation between DefinedFuncIndex and FuncIndex. - // Ideally, we would use some library function for this. - println!(" :JMP(function_{})", start_func.index() - 1); + println!(" :JMP(function_{})", start_func.index()); println!(" :JMP(finalizeExecution)"); let num_func_imports = dummy_environ.get_num_func_imports(); let mut total_module_code_size = 0; let mut context = Context::new(); for (def_index, func) in dummy_environ.info.function_bodies.iter() { - println!("function_{}:", def_index.index()); context.func = func.clone(); let mut saved_size = None; let func_index = num_func_imports + def_index.index(); + println!("function_{}:", func_index); let mut mem = vec![]; let (relocs, traps, stack_maps) = if options.check_translation { if let Err(errors) = context.verify(fisa) { From e044dfbf72ff5f4aef4bc499f4d88e678abd275d Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Mon, 4 Sep 2023 21:00:53 +0100 Subject: [PATCH 54/68] Add a newline between functions --- cranelift/src/wasm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs index 43fed6d85c4e..52c5bb933661 100644 --- a/cranelift/src/wasm.rs +++ b/cranelift/src/wasm.rs @@ -253,7 +253,7 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - let mut saved_size = None; let func_index = num_func_imports + def_index.index(); - println!("function_{}:", func_index); + println!("\nfunction_{}:", func_index); let mut mem = vec![]; let (relocs, traps, stack_maps) = if options.check_translation { if let Err(errors) = context.verify(fisa) { From a382e44b4655621038ee0da1d359007a587270f6 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Mon, 4 Sep 2023 21:34:22 +0100 Subject: [PATCH 55/68] Property result extfuncnames --- cranelift/data/add.zkasm | 1 + cranelift/data/add_func.zkasm | 6 ++++-- cranelift/data/counter.zkasm | 1 + cranelift/data/fibonacci.zkasm | 1 + cranelift/data/locals.zkasm | 1 + cranelift/data/locals_simple.zkasm | 1 + cranelift/src/wasm.rs | 5 +++-- 7 files changed, 12 insertions(+), 4 deletions(-) diff --git a/cranelift/data/add.zkasm b/cranelift/data/add.zkasm index cbe5e153f19c..430daa212ffa 100644 --- a/cranelift/data/add.zkasm +++ b/cranelift/data/add.zkasm @@ -2,6 +2,7 @@ start: zkPC + 2 => RR :JMP(function_1) :JMP(finalizeExecution) + function_1: SP + 1 => SP RR :MSTORE(SP) diff --git a/cranelift/data/add_func.zkasm b/cranelift/data/add_func.zkasm index a48bfb3a0606..091e154d8a14 100644 --- a/cranelift/data/add_func.zkasm +++ b/cranelift/data/add_func.zkasm @@ -2,6 +2,7 @@ start: zkPC + 2 => RR :JMP(function_1) :JMP(finalizeExecution) + function_1: SP + 1 => SP RR :MSTORE(SP) @@ -11,10 +12,10 @@ L0: 0 + 2 => A 0 + 3 => B $ => CTX :MLOAD(SP) - B :ASSERT + JMP(function_2) 0 + 5 => B $ => CTX :MLOAD(SP) - JMP(function_1) + B :ASSERT :JMP(L1) L1: SP - 1 => SP @@ -22,6 +23,7 @@ L1: SP - 1 => SP :JMP(RR) + function_2: L0: $ => A :ADD diff --git a/cranelift/data/counter.zkasm b/cranelift/data/counter.zkasm index 2edf71973fd4..31bf9034f25d 100644 --- a/cranelift/data/counter.zkasm +++ b/cranelift/data/counter.zkasm @@ -2,6 +2,7 @@ start: zkPC + 2 => RR :JMP(function_1) :JMP(finalizeExecution) + function_1: SP + 1 => SP RR :MSTORE(SP) diff --git a/cranelift/data/fibonacci.zkasm b/cranelift/data/fibonacci.zkasm index dd30bd570ead..03e83db996e9 100644 --- a/cranelift/data/fibonacci.zkasm +++ b/cranelift/data/fibonacci.zkasm @@ -2,6 +2,7 @@ start: zkPC + 2 => RR :JMP(function_1) :JMP(finalizeExecution) + function_1: SP + 1 => SP RR :MSTORE(SP) diff --git a/cranelift/data/locals.zkasm b/cranelift/data/locals.zkasm index cbe5e153f19c..430daa212ffa 100644 --- a/cranelift/data/locals.zkasm +++ b/cranelift/data/locals.zkasm @@ -2,6 +2,7 @@ start: zkPC + 2 => RR :JMP(function_1) :JMP(finalizeExecution) + function_1: SP + 1 => SP RR :MSTORE(SP) diff --git a/cranelift/data/locals_simple.zkasm b/cranelift/data/locals_simple.zkasm index f691bcf76d22..19e7fbf21ae0 100644 --- a/cranelift/data/locals_simple.zkasm +++ b/cranelift/data/locals_simple.zkasm @@ -2,6 +2,7 @@ start: zkPC + 2 => RR :JMP(function_1) :JMP(finalizeExecution) + function_1: SP + 1 => SP RR :MSTORE(SP) diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs index 52c5bb933661..c7f58b32743e 100644 --- a/cranelift/src/wasm.rs +++ b/cranelift/src/wasm.rs @@ -276,10 +276,11 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - } let code = if let ExternalName::User(name) = reloc.name { - if name.index() == 0 { + let name = &func.params.user_named_funcs()[name]; + if name.index == 0 { b" B :ASSERT".to_vec() } else { - format!(" JMP(function_{})", name.index()).as_bytes().to_vec() + format!(" JMP(function_{})", name.index).as_bytes().to_vec() } } else { b" UNKNOWN".to_vec() From 140d38c4c46e3d02c0077c3d3c8914e43a61b46e Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 5 Sep 2023 11:30:58 +0100 Subject: [PATCH 56/68] Disambiguate labels across functions --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 10 +++++----- cranelift/data/add.zkasm | 7 +++---- cranelift/data/add_func.zkasm | 16 +++++++-------- cranelift/data/counter.zkasm | 21 ++++++++++---------- cranelift/data/fibonacci.zkasm | 21 ++++++++++---------- cranelift/data/locals.zkasm | 7 +++---- cranelift/data/locals_simple.zkasm | 7 +++---- cranelift/src/wasm.rs | 20 +++++++++++++++++-- 8 files changed, 59 insertions(+), 50 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index bd994c0406ba..3a63ce3eaf8b 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -562,7 +562,7 @@ impl MachInstEmit for Inst { x.emit(&[], sink, emit_info, state) */ } &Inst::Label { imm } => { - sink.put_data(format!("L{imm}:\n").as_bytes()); + sink.put_data(format!("label_{imm}:\n").as_bytes()); } &Inst::RawData { ref data } => { // Right now we only put a u32 or u64 in this instruction. @@ -1084,9 +1084,9 @@ impl MachInstEmit for Inst { match dest { BranchTarget::Label(label) => { // TODO: the following two lines allow eg. optimizing out jump-to-here - /* sink.use_label_at_offset(start_off, label, LabelUse::Jal20); - sink.add_uncond_branch(start_off, start_off + 4, label); */ - put_string(&format!(":JMP(L{})\n", label.index()), sink); + // sink.use_label_at_offset(start_off, label, LabelUse::Jal20); + // sink.add_uncond_branch(start_off, start_off + 4, label); + put_string(&format!(":JMP(label_{})\n", label.index()), sink); } BranchTarget::ResolvedOffset(offset) => { todo!() /* @@ -1124,7 +1124,7 @@ impl MachInstEmit for Inst { match taken { BranchTarget::Label(label) => { put_string( - &format!("{} :JMPNZ(L{})\n", reg_name(kind.rs1), label.index()), + &format!("{} :JMPNZ(label_{})\n", reg_name(kind.rs1), label.index()), sink, ); diff --git a/cranelift/data/add.zkasm b/cranelift/data/add.zkasm index 430daa212ffa..c177a0e2a80f 100644 --- a/cranelift/data/add.zkasm +++ b/cranelift/data/add.zkasm @@ -6,16 +6,15 @@ start: function_1: SP + 1 => SP RR :MSTORE(SP) -L0: +L1_0: 2 + 3 => A 0 + 5 => B B :ASSERT - :JMP(L1) -L1: + :JMP(L1_1) +L1_1: $ => RR :MLOAD(SP) SP - 1 => SP :JMP(RR) - finalizeExecution: ${beforeLast()} :JMPN(finalizeExecution) :JMP(start) diff --git a/cranelift/data/add_func.zkasm b/cranelift/data/add_func.zkasm index 091e154d8a14..1ce9996b58c1 100644 --- a/cranelift/data/add_func.zkasm +++ b/cranelift/data/add_func.zkasm @@ -7,30 +7,28 @@ function_1: SP + 1 => SP RR :MSTORE(SP) SP + 1 => SP -L0: +L1_0: CTX :MSTORE(SP) 0 + 2 => A 0 + 3 => B $ => CTX :MLOAD(SP) - JMP(function_2) + :JMP(function_2) 0 + 5 => B $ => CTX :MLOAD(SP) B :ASSERT - :JMP(L1) -L1: + :JMP(L1_1) +L1_1: SP - 1 => SP $ => RR :MLOAD(SP) SP - 1 => SP :JMP(RR) - function_2: -L0: +L2_0: $ => A :ADD - :JMP(L1) -L1: + :JMP(L2_1) +L2_1: :JMP(RR) - finalizeExecution: ${beforeLast()} :JMPN(finalizeExecution) :JMP(start) diff --git a/cranelift/data/counter.zkasm b/cranelift/data/counter.zkasm index 31bf9034f25d..a1f251ee819f 100644 --- a/cranelift/data/counter.zkasm +++ b/cranelift/data/counter.zkasm @@ -6,27 +6,26 @@ start: function_1: SP + 1 => SP RR :MSTORE(SP) -L0: +L1_0: 0 + 0 => A - :JMP(L1) -L1: + :JMP(L1_1) +L1_1: 0 + 1 => B $ => A :ADD 0 + 10 => B $ => B :EQ - B :JMPNZ(L3) - :JMP(L2) -L2: - :JMP(L1) -L3: + B :JMPNZ(L1_3) + :JMP(L1_2) +L1_2: + :JMP(L1_1) +L1_3: 0 + 10 => B B :ASSERT - :JMP(L4) -L4: + :JMP(L1_4) +L1_4: $ => RR :MLOAD(SP) SP - 1 => SP :JMP(RR) - finalizeExecution: ${beforeLast()} :JMPN(finalizeExecution) :JMP(start) diff --git a/cranelift/data/fibonacci.zkasm b/cranelift/data/fibonacci.zkasm index 03e83db996e9..6ab1b60d8b05 100644 --- a/cranelift/data/fibonacci.zkasm +++ b/cranelift/data/fibonacci.zkasm @@ -7,14 +7,14 @@ function_1: SP + 1 => SP RR :MSTORE(SP) SP + 1 => SP -L0: +L1_0: 0 + 0 => A A => D 0 + 0 => A 0 + 1 => B B :MSTORE(SP) - :JMP(L1) -L1: + :JMP(L1_1) +L1_1: $ => A :ADD A => C B :MSTORE(SP) @@ -23,24 +23,23 @@ L1: $ => A :ADD 0 + 10 => B $ => E :EQ - E :JMPNZ(L3) - :JMP(L2) -L2: + E :JMPNZ(L1_3) + :JMP(L1_2) +L1_2: C => B A => D $ => A :MLOAD(SP) - :JMP(L1) -L3: + :JMP(L1_1) +L1_3: 0 + 89 => B C => A B :ASSERT - :JMP(L4) -L4: + :JMP(L1_4) +L1_4: SP - 1 => SP $ => RR :MLOAD(SP) SP - 1 => SP :JMP(RR) - finalizeExecution: ${beforeLast()} :JMPN(finalizeExecution) :JMP(start) diff --git a/cranelift/data/locals.zkasm b/cranelift/data/locals.zkasm index 430daa212ffa..c177a0e2a80f 100644 --- a/cranelift/data/locals.zkasm +++ b/cranelift/data/locals.zkasm @@ -6,16 +6,15 @@ start: function_1: SP + 1 => SP RR :MSTORE(SP) -L0: +L1_0: 2 + 3 => A 0 + 5 => B B :ASSERT - :JMP(L1) -L1: + :JMP(L1_1) +L1_1: $ => RR :MLOAD(SP) SP - 1 => SP :JMP(RR) - finalizeExecution: ${beforeLast()} :JMPN(finalizeExecution) :JMP(start) diff --git a/cranelift/data/locals_simple.zkasm b/cranelift/data/locals_simple.zkasm index 19e7fbf21ae0..3f8e53237038 100644 --- a/cranelift/data/locals_simple.zkasm +++ b/cranelift/data/locals_simple.zkasm @@ -6,16 +6,15 @@ start: function_1: SP + 1 => SP RR :MSTORE(SP) -L0: +L1_0: 0 + 2 => A 0 + 2 => B B :ASSERT - :JMP(L1) -L1: + :JMP(L1_1) +L1_1: $ => RR :MLOAD(SP) SP - 1 => SP :JMP(RR) - finalizeExecution: ${beforeLast()} :JMPN(finalizeExecution) :JMP(start) diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs index c7f58b32743e..876e8198a2e3 100644 --- a/cranelift/src/wasm.rs +++ b/cranelift/src/wasm.rs @@ -280,7 +280,9 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - if name.index == 0 { b" B :ASSERT".to_vec() } else { - format!(" JMP(function_{})", name.index).as_bytes().to_vec() + format!(" :JMP(function_{})", name.index) + .as_bytes() + .to_vec() } } else { b" UNKNOWN".to_vec() @@ -291,7 +293,21 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - } if let Ok(code) = std::str::from_utf8(&code_buffer) { - println!("{code}",); + let mut lines = Vec::new(); + for line in code.lines() { + let mut line = line.to_string(); + if line.starts_with(&"label_") { + let label_index = &line[6..]; + line = format!("L{func_index}_{label_index}"); + } else if line.contains(&"label_") { + let pos = line.find(&"label_").unwrap(); + let pos_end = pos + line[pos..].find(&")").unwrap(); + let label_index = &line[pos + 6..pos_end]; + line.replace_range(pos..pos_end, &format!("L{func_index}_{label_index}")); + } + lines.push(line); + } + println!("{}", lines.join("\n")); } if options.print_size { From 9c6677e17a1a216425bc7029977a7f5c19bc74cf Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 5 Sep 2023 11:55:11 +0100 Subject: [PATCH 57/68] Remove redundant labels --- cranelift/data/add.zkasm | 4 +-- cranelift/data/add_func.zkasm | 7 +---- cranelift/data/counter.zkasm | 6 +--- cranelift/data/fibonacci.zkasm | 6 +--- cranelift/data/locals.zkasm | 4 +-- cranelift/data/locals_simple.zkasm | 4 +-- cranelift/src/wasm.rs | 44 ++++++++++++++++++++++++++---- 7 files changed, 45 insertions(+), 30 deletions(-) diff --git a/cranelift/data/add.zkasm b/cranelift/data/add.zkasm index c177a0e2a80f..9bc8f465b6b3 100644 --- a/cranelift/data/add.zkasm +++ b/cranelift/data/add.zkasm @@ -6,15 +6,13 @@ start: function_1: SP + 1 => SP RR :MSTORE(SP) -L1_0: 2 + 3 => A 0 + 5 => B B :ASSERT - :JMP(L1_1) -L1_1: $ => RR :MLOAD(SP) SP - 1 => SP :JMP(RR) + finalizeExecution: ${beforeLast()} :JMPN(finalizeExecution) :JMP(start) diff --git a/cranelift/data/add_func.zkasm b/cranelift/data/add_func.zkasm index 1ce9996b58c1..f47c4d260e5d 100644 --- a/cranelift/data/add_func.zkasm +++ b/cranelift/data/add_func.zkasm @@ -7,7 +7,6 @@ function_1: SP + 1 => SP RR :MSTORE(SP) SP + 1 => SP -L1_0: CTX :MSTORE(SP) 0 + 2 => A 0 + 3 => B @@ -16,19 +15,15 @@ L1_0: 0 + 5 => B $ => CTX :MLOAD(SP) B :ASSERT - :JMP(L1_1) -L1_1: SP - 1 => SP $ => RR :MLOAD(SP) SP - 1 => SP :JMP(RR) function_2: -L2_0: $ => A :ADD - :JMP(L2_1) -L2_1: :JMP(RR) + finalizeExecution: ${beforeLast()} :JMPN(finalizeExecution) :JMP(start) diff --git a/cranelift/data/counter.zkasm b/cranelift/data/counter.zkasm index a1f251ee819f..a69561b7ebb0 100644 --- a/cranelift/data/counter.zkasm +++ b/cranelift/data/counter.zkasm @@ -6,7 +6,6 @@ start: function_1: SP + 1 => SP RR :MSTORE(SP) -L1_0: 0 + 0 => A :JMP(L1_1) L1_1: @@ -15,17 +14,14 @@ L1_1: 0 + 10 => B $ => B :EQ B :JMPNZ(L1_3) - :JMP(L1_2) -L1_2: :JMP(L1_1) L1_3: 0 + 10 => B B :ASSERT - :JMP(L1_4) -L1_4: $ => RR :MLOAD(SP) SP - 1 => SP :JMP(RR) + finalizeExecution: ${beforeLast()} :JMPN(finalizeExecution) :JMP(start) diff --git a/cranelift/data/fibonacci.zkasm b/cranelift/data/fibonacci.zkasm index 6ab1b60d8b05..3f6d73266c71 100644 --- a/cranelift/data/fibonacci.zkasm +++ b/cranelift/data/fibonacci.zkasm @@ -7,7 +7,6 @@ function_1: SP + 1 => SP RR :MSTORE(SP) SP + 1 => SP -L1_0: 0 + 0 => A A => D 0 + 0 => A @@ -24,8 +23,6 @@ L1_1: 0 + 10 => B $ => E :EQ E :JMPNZ(L1_3) - :JMP(L1_2) -L1_2: C => B A => D $ => A :MLOAD(SP) @@ -34,12 +31,11 @@ L1_3: 0 + 89 => B C => A B :ASSERT - :JMP(L1_4) -L1_4: SP - 1 => SP $ => RR :MLOAD(SP) SP - 1 => SP :JMP(RR) + finalizeExecution: ${beforeLast()} :JMPN(finalizeExecution) :JMP(start) diff --git a/cranelift/data/locals.zkasm b/cranelift/data/locals.zkasm index c177a0e2a80f..9bc8f465b6b3 100644 --- a/cranelift/data/locals.zkasm +++ b/cranelift/data/locals.zkasm @@ -6,15 +6,13 @@ start: function_1: SP + 1 => SP RR :MSTORE(SP) -L1_0: 2 + 3 => A 0 + 5 => B B :ASSERT - :JMP(L1_1) -L1_1: $ => RR :MLOAD(SP) SP - 1 => SP :JMP(RR) + finalizeExecution: ${beforeLast()} :JMPN(finalizeExecution) :JMP(start) diff --git a/cranelift/data/locals_simple.zkasm b/cranelift/data/locals_simple.zkasm index 3f8e53237038..dcbe3fb15b7a 100644 --- a/cranelift/data/locals_simple.zkasm +++ b/cranelift/data/locals_simple.zkasm @@ -6,15 +6,13 @@ start: function_1: SP + 1 => SP RR :MSTORE(SP) -L1_0: 0 + 2 => A 0 + 2 => B B :ASSERT - :JMP(L1_1) -L1_1: $ => RR :MLOAD(SP) SP - 1 => SP :JMP(RR) + finalizeExecution: ${beforeLast()} :JMPN(finalizeExecution) :JMP(start) diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs index 876e8198a2e3..84d73e73396e 100644 --- a/cranelift/src/wasm.rs +++ b/cranelift/src/wasm.rs @@ -18,6 +18,7 @@ use cranelift_codegen::Context; use cranelift_entity::EntityRef; use cranelift_reader::parse_sets_and_triple; use cranelift_wasm::{translate_module, DummyEnvironment, FuncIndex}; +use std::collections::{HashMap, HashSet}; use std::io::Read; use std::path::Path; use std::path::PathBuf; @@ -293,20 +294,52 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - } if let Ok(code) = std::str::from_utf8(&code_buffer) { + let mut label_definition: HashMap = HashMap::new(); + let mut label_uses: HashMap> = HashMap::new(); let mut lines = Vec::new(); - for line in code.lines() { + for (index, line) in code.lines().enumerate() { let mut line = line.to_string(); if line.starts_with(&"label_") { - let label_index = &line[6..]; - line = format!("L{func_index}_{label_index}"); + let label_index: usize = line[6..line.len() - 1] + .parse() + .expect("Failed to parse label index"); + line = format!("L{func_index}_{label_index}:"); + label_definition.insert(label_index, index); } else if line.contains(&"label_") { let pos = line.find(&"label_").unwrap(); let pos_end = pos + line[pos..].find(&")").unwrap(); - let label_index = &line[pos + 6..pos_end]; + let label_index: usize = line[pos + 6..pos_end] + .parse() + .expect("Failed to parse label index"); line.replace_range(pos..pos_end, &format!("L{func_index}_{label_index}")); + label_uses.entry(label_index).or_default().push(index); } lines.push(line); } + + let mut lines_to_delete = Vec::new(); + for (label, label_line) in label_definition { + match label_uses.entry(label) { + std::collections::hash_map::Entry::Occupied(uses) => { + if uses.get().len() == 1 { + let use_line = uses.get()[0]; + if use_line + 1 == label_line { + lines_to_delete.push(use_line); + lines_to_delete.push(label_line); + } + } + } + std::collections::hash_map::Entry::Vacant(_) => { + lines_to_delete.push(label_line); + } + } + } + lines_to_delete.sort(); + lines_to_delete.reverse(); + for index in lines_to_delete { + lines.remove(index); + } + println!("{}", lines.join("\n")); } @@ -365,7 +398,8 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - context.clear(); } - let postamble = "finalizeExecution: + let postamble = " +finalizeExecution: ${beforeLast()} :JMPN(finalizeExecution) :JMP(start) "; From e9ad96586c48fe59b2c7dfe06966f50c1e781c46 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 5 Sep 2023 12:26:12 +0100 Subject: [PATCH 58/68] Disable clobbering of CTX register --- cranelift/codegen/src/isa/zkasm/abi.rs | 3 ++- cranelift/data/add_func.zkasm | 5 ----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/abi.rs b/cranelift/codegen/src/isa/zkasm/abi.rs index 0b64ca72620b..18f10eae76b6 100644 --- a/cranelift/codegen/src/isa/zkasm/abi.rs +++ b/cranelift/codegen/src/isa/zkasm/abi.rs @@ -752,7 +752,8 @@ const fn default_clobbers() -> PRegSet { .with(px_reg(7)) .with(px_reg(10)) .with(px_reg(11)) - .with(px_reg(12)) + // CTX register is not clobbered. + // .with(px_reg(12)) .with(px_reg(13)) .with(px_reg(14)) .with(px_reg(15)) diff --git a/cranelift/data/add_func.zkasm b/cranelift/data/add_func.zkasm index f47c4d260e5d..b4d5be70539e 100644 --- a/cranelift/data/add_func.zkasm +++ b/cranelift/data/add_func.zkasm @@ -6,16 +6,11 @@ start: function_1: SP + 1 => SP RR :MSTORE(SP) - SP + 1 => SP - CTX :MSTORE(SP) 0 + 2 => A 0 + 3 => B - $ => CTX :MLOAD(SP) :JMP(function_2) 0 + 5 => B - $ => CTX :MLOAD(SP) B :ASSERT - SP - 1 => SP $ => RR :MLOAD(SP) SP - 1 => SP :JMP(RR) From f37587d2328c147736ff1a36d7aadc3921ae2dd8 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 5 Sep 2023 12:34:21 +0100 Subject: [PATCH 59/68] Fix return point after function call --- cranelift/data/add_func.zkasm | 1 + cranelift/src/wasm.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cranelift/data/add_func.zkasm b/cranelift/data/add_func.zkasm index b4d5be70539e..cddf7c328475 100644 --- a/cranelift/data/add_func.zkasm +++ b/cranelift/data/add_func.zkasm @@ -8,6 +8,7 @@ function_1: RR :MSTORE(SP) 0 + 2 => A 0 + 3 => B + zkPC + 2 => RR :JMP(function_2) 0 + 5 => B B :ASSERT diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs index 84d73e73396e..b4a435322c16 100644 --- a/cranelift/src/wasm.rs +++ b/cranelift/src/wasm.rs @@ -281,7 +281,7 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - if name.index == 0 { b" B :ASSERT".to_vec() } else { - format!(" :JMP(function_{})", name.index) + format!(" zkPC + 2 => RR\n :JMP(function_{})", name.index) .as_bytes() .to_vec() } From d72c06014ddc7e514bc30d398347884f9defe730 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 5 Sep 2023 12:39:13 +0100 Subject: [PATCH 60/68] Fix warnings --- cranelift/codegen/src/isa/zkasm/abi.rs | 8 ++++---- cranelift/src/wasm.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/abi.rs b/cranelift/codegen/src/isa/zkasm/abi.rs index 18f10eae76b6..d88031c0b117 100644 --- a/cranelift/codegen/src/isa/zkasm/abi.rs +++ b/cranelift/codegen/src/isa/zkasm/abi.rs @@ -356,7 +356,7 @@ impl ABIMachineSpec for Riscv64MachineDeps { } } - fn gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec { + fn gen_prologue_frame_setup(_flags: &settings::Flags) -> SmallInstVec { // add sp,sp,-16 ;; alloc stack space for fp. // sd ra,8(sp) ;; save ra. // sd fp,0(sp) ;; store old fp. @@ -423,8 +423,8 @@ impl ABIMachineSpec for Riscv64MachineDeps { // nominal SP offset; abi_impl generic code will do that. fn gen_clobber_save( _call_conv: isa::CallConv, - setup_frame: bool, - flags: &settings::Flags, + _setup_frame: bool, + _flags: &settings::Flags, clobbered_callee_saves: &[Writable], fixed_frame_storage_size: u32, _outgoing_args_size: u32, @@ -504,7 +504,7 @@ impl ABIMachineSpec for Riscv64MachineDeps { defs: CallRetList, clobbers: PRegSet, opcode: ir::Opcode, - tmp: Writable, + _tmp: Writable, callee_conv: isa::CallConv, caller_conv: isa::CallConv, callee_pop_size: u32, diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs index b4a435322c16..c0c00d4388da 100644 --- a/cranelift/src/wasm.rs +++ b/cranelift/src/wasm.rs @@ -18,7 +18,7 @@ use cranelift_codegen::Context; use cranelift_entity::EntityRef; use cranelift_reader::parse_sets_and_triple; use cranelift_wasm::{translate_module, DummyEnvironment, FuncIndex}; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::io::Read; use std::path::Path; use std::path::PathBuf; From 957c5287d61e5b68e2a87b4bf306f91b7ac36dc3 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Wed, 6 Sep 2023 21:11:49 +0100 Subject: [PATCH 61/68] Add recursive fibonacci zkasm --- cranelift/codegen/src/isa/zkasm/inst/args.rs | 6 +- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 13 ++-- cranelift/data/fibonacci_recursive.zkasm | 67 ++++++++++++++++++++ cranelift/data/gen.sh | 2 +- 4 files changed, 78 insertions(+), 10 deletions(-) create mode 100644 cranelift/data/fibonacci_recursive.zkasm diff --git a/cranelift/codegen/src/isa/zkasm/inst/args.rs b/cranelift/codegen/src/isa/zkasm/inst/args.rs index 269eb8555006..d71cef8a32c4 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/args.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/args.rs @@ -776,7 +776,7 @@ impl AluOPRRR { pub(crate) const fn op_name(self) -> &'static str { match self { Self::Add => "ADD", - Self::Sub => "sub", + Self::Sub => "SUB", Self::Sll => "sll", Self::Slt => "slt", Self::Sgt => "sgt", @@ -787,8 +787,8 @@ impl AluOPRRR { Self::Sra => "sra", Self::Or => "or", Self::And => "and", - Self::Addw => "addw", - Self::Subw => "subw", + Self::Addw => "ADD", + Self::Subw => "SUB", Self::Sllw => "sllw", Self::Srlw => "srlw", Self::Sraw => "sraw", diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 3a63ce3eaf8b..b22bde1359ce 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -1369,12 +1369,13 @@ impl MachInstEmit for Inst { } &Inst::VirtualSPOffsetAdj { amount } => { - todo!() /* crate::trace!( - "virtual sp offset adjusted by {} -> {}", - amount, - state.virtual_sp_offset + amount - ); - state.virtual_sp_offset += amount; */ + println!("virtual_sp_offset_adj {amount}"); + // crate::trace!( + // "virtual sp offset adjusted by {} -> {}", + // amount, + // state.virtual_sp_offset + amount + // ); + // state.virtual_sp_offset += amount; } &Inst::Atomic { op, diff --git a/cranelift/data/fibonacci_recursive.zkasm b/cranelift/data/fibonacci_recursive.zkasm new file mode 100644 index 000000000000..90e0da518126 --- /dev/null +++ b/cranelift/data/fibonacci_recursive.zkasm @@ -0,0 +1,67 @@ +start: + zkPC + 2 => RR + :JMP(function_2) + :JMP(finalizeExecution) + +function_1: + SP + 1 => SP + RR :MSTORE(SP) + SP + 2 => SP + A :MSTORE(SP) + 0 + 0 => B + $ => E :EQ + E :JMPNZ(L1_5) + 0 + 1 => B + $ => A :MLOAD(SP) + $ => B :SUB + B => A + 0 + 0 => B + $ => A :EQ + A :JMPNZ(L1_3) + 0 + 1 => B + $ => A :MLOAD(SP) + $ => B :SUB + A :MSTORE(SP) + B => A + zkPC + 2 => RR + :JMP(function_1) + A :MSTORE(SP + 8) + 0 + 2 => B + $ => A :MLOAD(SP) + $ => A :SUB + zkPC + 2 => RR + :JMP(function_1) + A => B + $ => A :MLOAD(SP + 8) + $ => A :ADD + :JMP(L1_4) +L1_3: + 0 + 1 => A + :JMP(L1_4) +L1_4: + :JMP(L1_6) +L1_5: + 0 + 0 => A + :JMP(L1_6) +L1_6: + SP - 2 => SP + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +function_2: + SP + 1 => SP + RR :MSTORE(SP) + 0 + 11 => A + zkPC + 2 => RR + :JMP(function_1) + 0 + 89 => B + B :ASSERT + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) + diff --git a/cranelift/data/gen.sh b/cranelift/data/gen.sh index f0d369202ab0..72c63e7dff58 100755 --- a/cranelift/data/gen.sh +++ b/cranelift/data/gen.sh @@ -1,7 +1,7 @@ #!/bin/bash cargo build -for name in add counter add_func fibonacci locals locals_simple +for name in add counter add_func fibonacci locals locals_simple fibonacci_recursive do echo $name; ../target/debug/clif-util wasm --target sparc-unknown-unknown ../../zkwasm/data/$name.wat > data/$name.zkasm From 4ab350538e7d9bcdde5c5de8e5b94bc6ca5cdb02 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Thu, 7 Sep 2023 18:47:57 +0100 Subject: [PATCH 62/68] Fix typo in ISLE --- cranelift/codegen/src/isa/zkasm/lower.isle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/codegen/src/isa/zkasm/lower.isle b/cranelift/codegen/src/isa/zkasm/lower.isle index acbd9e90c96f..e9abd7250117 100644 --- a/cranelift/codegen/src/isa/zkasm/lower.isle +++ b/cranelift/codegen/src/isa/zkasm/lower.isle @@ -1276,7 +1276,7 @@ (lower (uload32 flags p @ (value_type (ty_addr64 _)) offset)) (gen_load p offset (int_load_op $false 32) flags $I64)) -;;;;; Rules for `iload16`;;;;;;;;; +;;;;; Rules for `iload32`;;;;;;;;; (rule (lower (sload32 flags p @ (value_type (ty_addr64 _)) offset)) (gen_load p offset (int_load_op $true 32) flags $I64)) From eafc91dfcf8675e0978321b25d7aa8ff5ec039ff Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Tue, 12 Sep 2023 21:37:57 +0100 Subject: [PATCH 63/68] Add support for 32-bit addresses --- cranelift/codegen/src/isa/zkasm/lower.isle | 4 ++-- cranelift/codegen/src/isle_prelude.rs | 8 ++++++++ cranelift/codegen/src/prelude.isle | 3 +++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/lower.isle b/cranelift/codegen/src/isa/zkasm/lower.isle index e9abd7250117..fcdf74a93cca 100644 --- a/cranelift/codegen/src/isa/zkasm/lower.isle +++ b/cranelift/codegen/src/isa/zkasm/lower.isle @@ -1282,7 +1282,7 @@ (gen_load p offset (int_load_op $true 32) flags $I64)) (rule - (lower (has_type ty (load flags p @ (value_type (ty_addr64 _)) offset))) + (lower (has_type ty (load flags p @ (value_type (ty_addr32 _)) offset))) (gen_load p offset (load_op ty) flags ty) ) ;;;; for I128 @@ -1356,7 +1356,7 @@ ;;;;; Rules for `store`;;;;;;;;; (rule - (lower (store flags x @ (value_type ty) p @ (value_type (ty_addr64 _)) offset)) + (lower (store flags x @ (value_type ty) p @ (value_type (ty_addr32 _)) offset)) (gen_store p offset (store_op ty) flags x)) ;;; special for I128 diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index b537d9e10cb0..53fac6435810 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -507,6 +507,14 @@ macro_rules! isle_common_prelude_methods { } } + #[inline] + fn ty_addr32(&mut self, ty: Type) -> Option { + match ty { + I32 | R32 => Some(ty), + _ => None, + } + } + #[inline] fn u64_from_imm64(&mut self, imm: Imm64) -> u64 { imm.bits() as u64 diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index dd3c186a6747..188f423c41e7 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -461,6 +461,9 @@ (decl ty_addr64 (Type) Type) (extern extractor ty_addr64 ty_addr64) +(decl ty_addr32 (Type) Type) +(extern extractor ty_addr32 ty_addr32) + ;; A pure constructor that matches everything except vectors with size 32X2. (decl pure partial not_vec32x2 (Type) Type) (extern constructor not_vec32x2 not_vec32x2) From 70f6491752c85e2449e79e1c4c98d3321f67ec71 Mon Sep 17 00:00:00 2001 From: Andrei Kashin Date: Fri, 15 Sep 2023 08:11:10 +0100 Subject: [PATCH 64/68] Add example working with memory --- cranelift/data/add_memory.zkasm | 39 +++++++++++++++++++++++++++++++++ cranelift/data/gen.sh | 2 +- 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 cranelift/data/add_memory.zkasm diff --git a/cranelift/data/add_memory.zkasm b/cranelift/data/add_memory.zkasm new file mode 100644 index 000000000000..2040a74e15d8 --- /dev/null +++ b/cranelift/data/add_memory.zkasm @@ -0,0 +1,39 @@ +start: + zkPC + 2 => RR + :JMP(function_1) + :JMP(finalizeExecution) + +function_1: + SP + 1 => SP + RR :MSTORE(SP) + 0 + 0 => B + 0 + 2 => D + $ => A :MLOAD(CTX) + $ => E :ADD + D :MSTORE(E) + 0 + 8 => B + 0 + 3 => E + $ => A :MLOAD(CTX) + $ => A :ADD + E :MSTORE(A) + 0 + 0 => B + $ => A :MLOAD(CTX) + $ => A :ADD + $ => A :MLOAD(A) + A => E + 0 + 8 => B + $ => A :MLOAD(CTX) + $ => A :ADD + $ => B :MLOAD(A) + E => A + $ => A :ADD + 0 + 5 => B + B :ASSERT + $ => RR :MLOAD(SP) + SP - 1 => SP + :JMP(RR) + +finalizeExecution: + ${beforeLast()} :JMPN(finalizeExecution) + :JMP(start) + diff --git a/cranelift/data/gen.sh b/cranelift/data/gen.sh index 72c63e7dff58..d41eab089e4c 100755 --- a/cranelift/data/gen.sh +++ b/cranelift/data/gen.sh @@ -1,7 +1,7 @@ #!/bin/bash cargo build -for name in add counter add_func fibonacci locals locals_simple fibonacci_recursive +for name in add counter add_func add_memory fibonacci locals locals_simple fibonacci_recursive do echo $name; ../target/debug/clif-util wasm --target sparc-unknown-unknown ../../zkwasm/data/$name.wat > data/$name.zkasm From 9588014608fc4f59ea7d79b2babe5fb9b105008b Mon Sep 17 00:00:00 2001 From: Viktar Makouski Date: Mon, 18 Sep 2023 04:57:14 +0300 Subject: [PATCH 65/68] training changes --- cranelift/codegen/src/isa/zkasm/inst.isle | 12 ++++++++++++ cranelift/codegen/src/isa/zkasm/inst/args.rs | 4 ++-- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 10 ++++++++++ cranelift/codegen/src/isa/zkasm/inst/mod.rs | 9 +++++++++ cranelift/data/add.zkasm | 4 ++-- cranelift/data/gen.sh | 2 +- 6 files changed, 36 insertions(+), 5 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst.isle b/cranelift/codegen/src/isa/zkasm/inst.isle index 164b786f8779..fb040a036493 100644 --- a/cranelift/codegen/src/isa/zkasm/inst.isle +++ b/cranelift/codegen/src/isa/zkasm/inst.isle @@ -404,6 +404,12 @@ (src1 Imm32) (src2 Imm32)) + ;; A multiplication with 2 32-bit immediates. + (MulImm32 + (rd WritableReg) + (src1 Imm32) + (src2 Imm32)) + )) @@ -1010,6 +1016,12 @@ (_ Unit (emit (MInst.AddImm32 dst imm1 imm2)))) dst)) +(decl zk_mul (Imm32 Imm32) XReg) +(rule (zk_mul imm1 imm2) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.MulImm32 dst imm1 imm2)))) + dst)) + ;; Helper for emitting the `add` instruction. ;; rd ← rs1 + rs2 (decl rv_add (XReg XReg) XReg) diff --git a/cranelift/codegen/src/isa/zkasm/inst/args.rs b/cranelift/codegen/src/isa/zkasm/inst/args.rs index d71cef8a32c4..5ac8fd9e3c7d 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/args.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/args.rs @@ -792,7 +792,7 @@ impl AluOPRRR { Self::Sllw => "sllw", Self::Srlw => "srlw", Self::Sraw => "sraw", - Self::Mul => "mul", + Self::Mul => "MUL", Self::Mulh => "mulh", Self::Mulhsu => "mulhsu", Self::Mulhu => "mulhu", @@ -800,7 +800,7 @@ impl AluOPRRR { Self::DivU => "divu", Self::Rem => "rem", Self::RemU => "remu", - Self::Mulw => "mulw", + Self::Mulw => "MUL", Self::Divw => "divw", Self::Divuw => "divuw", Self::Remw => "remw", diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index b22bde1359ce..1d3b5af54165 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -429,6 +429,7 @@ impl Inst { | Inst::LoadConst64 { .. } | Inst::AluRRR { .. } | Inst::AddImm32 { .. } + | Inst::MulImm32 { .. } | Inst::FpuRRR { .. } | Inst::AluRRImm12 { .. } | Inst::Load { .. } @@ -668,6 +669,14 @@ impl MachInstEmit for Inst { sink, ); } + &Inst::MulImm32 { rd, src1, src2 } => { + let rd = allocs.next(rd.to_reg()); + // TODO(akashin): Should we have a function for `bits` field? + put_string( + &format!("{} * {} => {}\n", src1.bits, src2.bits, reg_name(rd)), + sink, + ); + } &Inst::AluRRR { alu_op, rd, @@ -742,6 +751,7 @@ impl MachInstEmit for Inst { sink, ); } + _ => unreachable!("Op {:?} is not implemented", alu_op), }; diff --git a/cranelift/codegen/src/isa/zkasm/inst/mod.rs b/cranelift/codegen/src/isa/zkasm/inst/mod.rs index 4892bd01312a..ef612569dcda 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/mod.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/mod.rs @@ -716,6 +716,10 @@ fn zkasm_get_operands VReg>(inst: &Inst, collector: &mut OperandC collector.reg_def(*rd); } + Inst::MulImm32 { rd, src1, src2 } => { + collector.reg_def(*rd); + } + &Inst::VecAluRRRImm5 { op, vd, @@ -1464,6 +1468,11 @@ impl Inst { format!("{src1} + {src2} => {rd};") } + Inst::MulImm32 { rd, src1, src2 } => { + let rd = format_reg(rd.to_reg(), allocs); + format!("{src1} * {src2} => {rd};") + } + &Inst::FpuRR { frm, alu_op, diff --git a/cranelift/data/add.zkasm b/cranelift/data/add.zkasm index 9bc8f465b6b3..e31c9cc02655 100644 --- a/cranelift/data/add.zkasm +++ b/cranelift/data/add.zkasm @@ -6,8 +6,8 @@ start: function_1: SP + 1 => SP RR :MSTORE(SP) - 2 + 3 => A - 0 + 5 => B + 2 * 3 => A + 0 + 6 => B B :ASSERT $ => RR :MLOAD(SP) SP - 1 => SP diff --git a/cranelift/data/gen.sh b/cranelift/data/gen.sh index d41eab089e4c..a6af61f42a61 100755 --- a/cranelift/data/gen.sh +++ b/cranelift/data/gen.sh @@ -1,7 +1,7 @@ #!/bin/bash cargo build -for name in add counter add_func add_memory fibonacci locals locals_simple fibonacci_recursive +for name in add counter add_func add_memory fibonacci locals locals_simple fibonacci_recursive mul do echo $name; ../target/debug/clif-util wasm --target sparc-unknown-unknown ../../zkwasm/data/$name.wat > data/$name.zkasm From a1a42aa3f795023b397fd69487567060300ec9e0 Mon Sep 17 00:00:00 2001 From: Viktar Makouski Date: Tue, 19 Sep 2023 16:13:59 +0300 Subject: [PATCH 66/68] mul for Imm32 --- cranelift/codegen/src/isa/zkasm/lower.isle | 5 +++++ cranelift/data/add.zkasm | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/lower.isle b/cranelift/codegen/src/isa/zkasm/lower.isle index fcdf74a93cca..ad9bd74abc68 100644 --- a/cranelift/codegen/src/isa/zkasm/lower.isle +++ b/cranelift/codegen/src/isa/zkasm/lower.isle @@ -259,6 +259,11 @@ (rule 5 (lower (has_type (ty_vec_fits_in_register ty) (imul x (splat y)))) (rv_vmul_vx x y (unmasked) ty)) + +(rule 6 (lower (imul (imm32_from_value x) (imm32_from_value y))) + (zk_mul x y)) + + ;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (smulhi x y))) (lower_smlhi ty (sext x ty $I64) (sext y ty $I64))) diff --git a/cranelift/data/add.zkasm b/cranelift/data/add.zkasm index e31c9cc02655..9bc8f465b6b3 100644 --- a/cranelift/data/add.zkasm +++ b/cranelift/data/add.zkasm @@ -6,8 +6,8 @@ start: function_1: SP + 1 => SP RR :MSTORE(SP) - 2 * 3 => A - 0 + 6 => B + 2 + 3 => A + 0 + 5 => B B :ASSERT $ => RR :MLOAD(SP) SP - 1 => SP From caa24c3ce2dc73bab43bb58df66a8c4c2cdb1fc7 Mon Sep 17 00:00:00 2001 From: Viktar Makouski Date: Wed, 20 Sep 2023 10:39:38 +0300 Subject: [PATCH 67/68] revert args.rs --- cranelift/codegen/src/isa/zkasm/inst/args.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/args.rs b/cranelift/codegen/src/isa/zkasm/inst/args.rs index 5ac8fd9e3c7d..d71cef8a32c4 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/args.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/args.rs @@ -792,7 +792,7 @@ impl AluOPRRR { Self::Sllw => "sllw", Self::Srlw => "srlw", Self::Sraw => "sraw", - Self::Mul => "MUL", + Self::Mul => "mul", Self::Mulh => "mulh", Self::Mulhsu => "mulhsu", Self::Mulhu => "mulhu", @@ -800,7 +800,7 @@ impl AluOPRRR { Self::DivU => "divu", Self::Rem => "rem", Self::RemU => "remu", - Self::Mulw => "MUL", + Self::Mulw => "mulw", Self::Divw => "divw", Self::Divuw => "divuw", Self::Remw => "remw", From c9481d090690857f873d2298119eb51d8ab526a2 Mon Sep 17 00:00:00 2001 From: Viktar Makouski Date: Wed, 20 Sep 2023 10:48:29 +0300 Subject: [PATCH 68/68] cargo fmt --- cranelift/codegen/src/isa/zkasm/inst/emit.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs index 1d3b5af54165..122e7bc97c1c 100644 --- a/cranelift/codegen/src/isa/zkasm/inst/emit.rs +++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs @@ -751,7 +751,6 @@ impl MachInstEmit for Inst { sink, ); } - _ => unreachable!("Op {:?} is not implemented", alu_op), };