diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml
index 15d2a1c6c108..5cbedb9a82de 100644
--- a/cranelift/codegen/Cargo.toml
+++ b/cranelift/codegen/Cargo.toml
@@ -73,6 +73,7 @@ x86 = []
 arm64 = []
 s390x = []
 riscv64 = []
+zkasm = []
 # Enable the ISA target for the host machine
 host-arch = []
 
@@ -81,7 +82,8 @@ all-arch = [
     "x86",
     "arm64",
     "s390x",
-    "riscv64"
+    "riscv64",
+    "zkasm"
 ]
 
 # For dependent crates that want to serialize some parts of cranelift
diff --git a/cranelift/codegen/build.rs b/cranelift/codegen/build.rs
index 211b62177dd2..a634bd5209b2 100644
--- a/cranelift/codegen/build.rs
+++ b/cranelift/codegen/build.rs
@@ -33,7 +33,7 @@ fn main() {
         .cloned()
         .filter(|isa| {
             let env_key = format!("CARGO_FEATURE_{}", isa.to_string().to_uppercase());
-            env::var(env_key).is_ok()
+            dbg!(env::var(dbg!(env_key)).is_ok())
         })
         .collect::<Vec<_>>();
 
@@ -200,6 +200,8 @@ fn get_isle_compilations(
 
     let src_isa_risc_v =
         make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("isa").join("riscv64"));
+    let src_isa_zkasm =
+        make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("isa").join("zkasm"));
     // This is a set of ISLE compilation units.
     //
     // The format of each entry is:
@@ -280,6 +282,17 @@ fn get_isle_compilations(
                 ],
                 untracked_inputs: vec![clif_lower_isle.clone()],
             },
+            IsleCompilation {
+                output: out_dir.join("isle_zkasm.rs"),
+                inputs: vec![
+                    prelude_isle.clone(),
+                    prelude_lower_isle.clone(),
+                    src_isa_zkasm.join("inst.isle"),
+                    src_isa_zkasm.join("inst_vector.isle"),
+                    src_isa_zkasm.join("lower.isle"),
+                ],
+                untracked_inputs: vec![clif_lower_isle.clone()],
+            },
         ],
     })
 }
diff --git a/cranelift/codegen/meta/src/isa/mod.rs b/cranelift/codegen/meta/src/isa/mod.rs
index ecda9b83d054..37906d557352 100644
--- a/cranelift/codegen/meta/src/isa/mod.rs
+++ b/cranelift/codegen/meta/src/isa/mod.rs
@@ -6,6 +6,7 @@ mod arm64;
 mod riscv64;
 mod s390x;
 pub(crate) mod x86;
+mod zkasm;
 
 /// Represents known ISA target.
 #[derive(PartialEq, Copy, Clone)]
@@ -14,6 +15,7 @@ pub enum Isa {
     Arm64,
     S390x,
     Riscv64,
+    ZkAsm,
 }
 
 impl Isa {
@@ -29,6 +31,7 @@ impl Isa {
     pub fn from_arch(arch: &str) -> Option<Self> {
         match arch {
             "aarch64" => Some(Isa::Arm64),
+            "sparc" | "zkasm" => Some(Isa::ZkAsm),
             "s390x" => Some(Isa::S390x),
             x if ["x86_64", "i386", "i586", "i686"].contains(&x) => Some(Isa::X86),
             "riscv64" | "riscv64gc" | "riscv64imac" => Some(Isa::Riscv64),
@@ -38,7 +41,7 @@ impl Isa {
 
     /// Returns all supported isa targets.
     pub fn all() -> &'static [Isa] {
-        &[Isa::X86, Isa::Arm64, Isa::S390x, Isa::Riscv64]
+        &[Isa::X86, Isa::Arm64, Isa::S390x, Isa::Riscv64, Isa::ZkAsm]
     }
 }
 
@@ -50,6 +53,7 @@ impl fmt::Display for Isa {
             Isa::Arm64 => write!(f, "arm64"),
             Isa::S390x => write!(f, "s390x"),
             Isa::Riscv64 => write!(f, "riscv64"),
+            Isa::ZkAsm => write!(f, "zkasm"),
         }
     }
 }
@@ -61,6 +65,7 @@ pub(crate) fn define(isas: &[Isa]) -> Vec<TargetIsa> {
             Isa::Arm64 => arm64::define(),
             Isa::S390x => s390x::define(),
             Isa::Riscv64 => riscv64::define(),
+            Isa::ZkAsm => zkasm::define(),
         })
         .collect()
 }
diff --git a/cranelift/codegen/meta/src/isa/zkasm.rs b/cranelift/codegen/meta/src/isa/zkasm.rs
new file mode 100644
index 000000000000..22ab97407540
--- /dev/null
+++ b/cranelift/codegen/meta/src/isa/zkasm.rs
@@ -0,0 +1,101 @@
+use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::settings::SettingGroupBuilder;
+
+macro_rules! define_zvl_ext {
+    (DEF: $settings:expr, $size:expr) => {{
+        let name = concat!("has_zvl", $size, "b");
+        let desc = concat!("has extension Zvl", $size, "b?");
+        let comment = concat!(
+            "Zvl",
+            $size,
+            "b: Vector register has a minimum of ",
+            $size,
+            " bits"
+        );
+        $settings.add_bool(&name, &desc, &comment, false)
+    }};
+    ($settings:expr, $size:expr $(, $implies:expr)*) => {{
+        let has_feature = define_zvl_ext!(DEF: $settings, $size);
+
+        let name = concat!("zvl", $size, "b");
+        let desc = concat!("Has a vector register size of at least ", $size, " bits");
+
+        let preset = $settings.add_preset(&name, &desc, preset!(has_feature $( && $implies )*));
+        (has_feature, preset)
+    }};
+}
+
+pub(crate) fn define() -> TargetIsa {
+    let mut setting = SettingGroupBuilder::new("zkasm");
+
+    let _has_m = setting.add_bool("has_m", "has extension M?", "", false);
+    let _has_a = setting.add_bool("has_a", "has extension A?", "", false);
+    let _has_f = setting.add_bool("has_f", "has extension F?", "", false);
+    let _has_d = setting.add_bool("has_d", "has extension D?", "", false);
+    let _has_v = setting.add_bool("has_v", "has extension V?", "", false);
+    let _has_c = setting.add_bool("has_c", "has extension C?", "", false);
+    let _has_zbkb = setting.add_bool(
+        "has_zbkb",
+        "has extension zbkb?",
+        "Zbkb: Bit-manipulation for Cryptography",
+        false,
+    );
+    let _has_zba = setting.add_bool(
+        "has_zba",
+        "has extension zba?",
+        "Zba: Address Generation",
+        false,
+    );
+    let _has_zbb = setting.add_bool(
+        "has_zbb",
+        "has extension zbb?",
+        "Zbb: Basic bit-manipulation",
+        false,
+    );
+    let _has_zbc = setting.add_bool(
+        "has_zbc",
+        "has extension zbc?",
+        "Zbc: Carry-less multiplication",
+        false,
+    );
+    let _has_zbs = setting.add_bool(
+        "has_zbs",
+        "has extension zbs?",
+        "Zbs: Single-bit instructions",
+        false,
+    );
+
+    let _has_zicsr = setting.add_bool(
+        "has_zicsr",
+        "has extension zicsr?",
+        "Zicsr: Control and Status Register (CSR) Instructions",
+        false,
+    );
+    let _has_zifencei = setting.add_bool(
+        "has_zifencei",
+        "has extension zifencei?",
+        "Zifencei: Instruction-Fetch Fence",
+        false,
+    );
+
+    // Zvl*: Minimum Vector Length Standard Extensions
+    // These extension specifiy the minimum number of bits in a vector register.
+    // Since it is a minimum, Zvl64b implies Zvl32b, Zvl128b implies Zvl64b, etc.
+    // The V extension supports a maximum of 64K bits in a single register.
+    //
+    // See: https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#181-zvl-minimum-vector-length-standard-extensions
+    let (_, zvl32b) = define_zvl_ext!(setting, 32);
+    let (_, zvl64b) = define_zvl_ext!(setting, 64, zvl32b);
+    let (_, zvl128b) = define_zvl_ext!(setting, 128, zvl64b);
+    let (_, zvl256b) = define_zvl_ext!(setting, 256, zvl128b);
+    let (_, zvl512b) = define_zvl_ext!(setting, 512, zvl256b);
+    let (_, zvl1024b) = define_zvl_ext!(setting, 1024, zvl512b);
+    let (_, zvl2048b) = define_zvl_ext!(setting, 2048, zvl1024b);
+    let (_, zvl4096b) = define_zvl_ext!(setting, 4096, zvl2048b);
+    let (_, zvl8192b) = define_zvl_ext!(setting, 8192, zvl4096b);
+    let (_, zvl16384b) = define_zvl_ext!(setting, 16384, zvl8192b);
+    let (_, zvl32768b) = define_zvl_ext!(setting, 32768, zvl16384b);
+    let (_, _zvl65536b) = define_zvl_ext!(setting, 65536, zvl32768b);
+
+    TargetIsa::new("zkasm", setting.build())
+}
diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
index 006a6b807d3d..92377395626a 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -1158,6 +1158,7 @@ impl MachInst for Inst {
     }
 
     fn gen_block_start(
+        _block_index: usize,
         is_indirect_branch_target: bool,
         is_forward_edge_cfi_enabled: bool,
     ) -> Option<Self> {
diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs
index 860fcbd24e14..5b2fffc57101 100644
--- a/cranelift/codegen/src/isa/mod.rs
+++ b/cranelift/codegen/src/isa/mod.rs
@@ -74,6 +74,9 @@ pub mod riscv64;
 #[cfg(feature = "s390x")]
 mod s390x;
 
+#[cfg(feature = "zkasm")]
+pub mod zkasm;
+
 pub mod unwind;
 
 mod call_conv;
@@ -103,6 +106,7 @@ pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
         Architecture::Aarch64 { .. } => isa_builder!(aarch64, (feature = "arm64"), triple),
         Architecture::S390x { .. } => isa_builder!(s390x, (feature = "s390x"), triple),
         Architecture::Riscv64 { .. } => isa_builder!(riscv64, (feature = "riscv64"), triple),
+        Architecture::Sparc { .. } => isa_builder!(zkasm, (feature = "zkasm"), triple),
         _ => Err(LookupError::Unsupported),
     }
 }
@@ -110,7 +114,7 @@ pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
 /// The string names of all the supported, but possibly not enabled, architectures. The elements of
 /// this slice are suitable to be passed to the [lookup_by_name] function to obtain the default
 /// configuration for that architecture.
-pub const ALL_ARCHITECTURES: &[&str] = &["x86_64", "aarch64", "s390x", "riscv64"];
+pub const ALL_ARCHITECTURES: &[&str] = &["x86_64", "aarch64", "s390x", "riscv64", "sparc"];
 
 /// Look for a supported ISA with the given `name`.
 /// Return a builder that can create a corresponding `TargetIsa`.
diff --git a/cranelift/codegen/src/isa/zkasm/abi.rs b/cranelift/codegen/src/isa/zkasm/abi.rs
new file mode 100644
index 000000000000..d88031c0b117
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/abi.rs
@@ -0,0 +1,961 @@
+//! Implementation of a standard Riscv64 ABI.
+
+use crate::ir;
+use crate::ir::types::*;
+
+use crate::ir::ExternalName;
+use crate::ir::MemFlags;
+use crate::isa;
+
+use crate::isa::zkasm::{inst::EmitState, inst::*};
+use crate::isa::CallConv;
+use crate::machinst::*;
+
+use crate::ir::types::I8;
+use crate::ir::LibCall;
+use crate::ir::Signature;
+use crate::isa::zkasm::settings::Flags as RiscvFlags;
+use crate::settings;
+use crate::CodegenError;
+use crate::CodegenResult;
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use regalloc2::PRegSet;
+use regs::x_reg;
+
+use smallvec::{smallvec, SmallVec};
+
+/// Support for the Riscv64 ABI from the callee side (within a function body).
+pub(crate) type Riscv64Callee = Callee<Riscv64MachineDeps>;
+
+/// Support for the Riscv64 ABI from the caller side (at a callsite).
+pub(crate) type Riscv64ABICallSite = CallSite<Riscv64MachineDeps>;
+
+/// This is the limit for the size of argument and return-value areas on the
+/// stack. We place a reasonable limit here to avoid integer overflow issues
+/// with 32-bit arithmetic: for now, 128 MB.
+static STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
+
+/// Riscv64-specific ABI behavior. This struct just serves as an implementation
+/// point for the trait; it is never actually instantiated.
+pub struct Riscv64MachineDeps;
+
+impl IsaFlags for RiscvFlags {}
+
+impl RiscvFlags {
+    pub(crate) fn min_vec_reg_size(&self) -> u64 {
+        let entries = [
+            (self.has_zvl65536b(), 65536),
+            (self.has_zvl32768b(), 32768),
+            (self.has_zvl16384b(), 16384),
+            (self.has_zvl8192b(), 8192),
+            (self.has_zvl4096b(), 4096),
+            (self.has_zvl2048b(), 2048),
+            (self.has_zvl1024b(), 1024),
+            (self.has_zvl512b(), 512),
+            (self.has_zvl256b(), 256),
+            // In order to claim the Application Profile V extension, a minimum
+            // register size of 128 is required. i.e. V implies Zvl128b.
+            (self.has_v(), 128),
+            (self.has_zvl128b(), 128),
+            (self.has_zvl64b(), 64),
+            (self.has_zvl32b(), 32),
+        ];
+
+        for (has_flag, size) in entries.into_iter() {
+            if !has_flag {
+                continue;
+            }
+
+            // Due to a limitation in regalloc2, we can't support types
+            // larger than 1024 bytes. So limit that here.
+            return std::cmp::min(size, 1024);
+        }
+
+        return 0;
+    }
+}
+
+impl ABIMachineSpec for Riscv64MachineDeps {
+    type I = Inst;
+    type F = RiscvFlags;
+
+    fn word_bits() -> u32 {
+        64
+    }
+
+    /// Return required stack alignment in bytes.
+    fn stack_align(_call_conv: isa::CallConv) -> u32 {
+        1
+    }
+
+    fn compute_arg_locs<'a, I>(
+        call_conv: isa::CallConv,
+        _flags: &settings::Flags,
+        params: I,
+        args_or_rets: ArgsOrRets,
+        add_ret_area_ptr: bool,
+        mut args: ArgsAccumulator<'_>,
+    ) -> CodegenResult<(u32, Option<usize>)>
+    where
+        I: IntoIterator<Item = &'a ir::AbiParam>,
+    {
+        // All registers that can be used as parameters or rets.
+        // both start and end are included.
+        let (x_start, x_end, f_start, f_end) = match (call_conv, args_or_rets) {
+            (isa::CallConv::Tail, _) => (10, 11, 0, 0),
+            (_, ArgsOrRets::Args) => (10, 11, 0, 0),
+            (_, ArgsOrRets::Rets) => (10, 11, 0, 0),
+        };
+        let mut next_x_reg = x_start;
+        let mut next_f_reg = f_start;
+        // Stack space.
+        let mut next_stack: u32 = 0;
+
+        for param in params {
+            if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
+                let offset = next_stack;
+                assert!(size % 8 == 0, "StructArgument size is not properly aligned");
+                next_stack += size;
+                args.push(ABIArg::StructArg {
+                    pointer: None,
+                    offset: offset as i64,
+                    size: size as u64,
+                    purpose: param.purpose,
+                });
+                continue;
+            }
+
+            // For now we pin VMContext register to `CTX` register of ZK ASM.
+            if let ir::ArgumentPurpose::VMContext = param.purpose {
+                let mut slots = ABIArgSlotVec::new();
+                slots.push(ABIArgSlot::Reg {
+                    reg: context_reg().to_real_reg().unwrap(),
+                    ty: I32,
+                    extension: param.extension,
+                });
+                args.push(ABIArg::Slots {
+                    slots,
+                    purpose: param.purpose,
+                });
+                continue;
+            }
+
+            // Find regclass(es) of the register(s) used to store a value of this type.
+            let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
+            let mut slots = ABIArgSlotVec::new();
+            for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
+                let next_reg = if (next_x_reg <= x_end) && *rc == RegClass::Int {
+                    let x = Some(x_reg(next_x_reg));
+                    next_x_reg += 1;
+                    x
+                } else if (next_f_reg <= f_end) && *rc == RegClass::Float {
+                    let x = Some(f_reg(next_f_reg));
+                    next_f_reg += 1;
+                    x
+                } else {
+                    None
+                };
+                if let Some(reg) = next_reg {
+                    slots.push(ABIArgSlot::Reg {
+                        reg: reg.to_real_reg().unwrap(),
+                        ty: *reg_ty,
+                        extension: param.extension,
+                    });
+                } else {
+                    // Compute size and 16-byte stack alignment happens
+                    // separately after all args.
+                    let size = reg_ty.bits() / 8;
+                    let size = std::cmp::max(size, 8);
+                    // Align.
+                    debug_assert!(size.is_power_of_two());
+                    next_stack = align_to(next_stack, size);
+                    slots.push(ABIArgSlot::Stack {
+                        offset: next_stack as i64,
+                        ty: *reg_ty,
+                        extension: param.extension,
+                    });
+                    next_stack += size;
+                }
+            }
+            args.push(ABIArg::Slots {
+                slots,
+                purpose: param.purpose,
+            });
+        }
+        let pos: Option<usize> = if add_ret_area_ptr {
+            assert!(ArgsOrRets::Args == args_or_rets);
+            if next_x_reg <= x_end {
+                let arg = ABIArg::reg(
+                    x_reg(next_x_reg).to_real_reg().unwrap(),
+                    I64,
+                    ir::ArgumentExtension::None,
+                    ir::ArgumentPurpose::Normal,
+                );
+                args.push(arg);
+            } else {
+                let arg = ABIArg::stack(
+                    next_stack as i64,
+                    I64,
+                    ir::ArgumentExtension::None,
+                    ir::ArgumentPurpose::Normal,
+                );
+                args.push(arg);
+                next_stack += 8;
+            }
+            Some(args.args().len() - 1)
+        } else {
+            None
+        };
+
+        next_stack = align_to(next_stack, Self::stack_align(call_conv));
+
+        // To avoid overflow issues, limit the arg/return size to something
+        // reasonable -- here, 128 MB.
+        if next_stack > STACK_ARG_RET_SIZE_LIMIT {
+            return Err(CodegenError::ImplLimitExceeded);
+        }
+
+        Ok((next_stack, pos))
+    }
+
+    fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 {
+        // lr fp.
+        16
+    }
+
+    fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
+        Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted())
+    }
+
+    fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {
+        Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted())
+    }
+
+    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
+        Inst::gen_move(to_reg, from_reg, ty)
+    }
+
+    fn gen_extend(
+        to_reg: Writable<Reg>,
+        from_reg: Reg,
+        signed: bool,
+        from_bits: u8,
+        to_bits: u8,
+    ) -> Inst {
+        assert!(from_bits < to_bits);
+        Inst::Extend {
+            rd: to_reg,
+            rn: from_reg,
+            signed,
+            from_bits,
+            to_bits,
+        }
+    }
+
+    fn get_ext_mode(
+        _call_conv: isa::CallConv,
+        specified: ir::ArgumentExtension,
+    ) -> ir::ArgumentExtension {
+        specified
+    }
+
+    fn gen_args(_isa_flags: &crate::isa::zkasm::settings::Flags, args: Vec<ArgPair>) -> Inst {
+        Inst::Args { args }
+    }
+
+    fn gen_ret(
+        _setup_frame: bool,
+        _isa_flags: &Self::F,
+        _call_conv: isa::CallConv,
+        rets: Vec<RetPair>,
+        stack_bytes_to_pop: u32,
+    ) -> Inst {
+        Inst::Ret {
+            rets,
+            stack_bytes_to_pop,
+        }
+    }
+
+    fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
+        spilltmp_reg()
+    }
+
+    fn gen_add_imm(
+        _call_conv: isa::CallConv,
+        into_reg: Writable<Reg>,
+        from_reg: Reg,
+        imm: u32,
+    ) -> SmallInstVec<Inst> {
+        let mut insts = SmallInstVec::new();
+        if let Some(imm12) = Imm12::maybe_from_u64(imm as u64) {
+            insts.push(Inst::AluRRImm12 {
+                alu_op: AluOPRRI::Addi,
+                rd: into_reg,
+                rs: from_reg,
+                imm12,
+            });
+        } else {
+            insts.extend(Inst::load_constant_u32(
+                writable_spilltmp_reg2(),
+                imm as u64,
+                &mut |_| writable_spilltmp_reg2(),
+            ));
+            insts.push(Inst::AluRRR {
+                alu_op: AluOPRRR::Add,
+                rd: into_reg,
+                rs1: spilltmp_reg2(),
+                rs2: from_reg,
+            });
+        }
+        insts
+    }
+
+    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
+        let mut insts = SmallVec::new();
+        insts.push(Inst::TrapIfC {
+            cc: IntCC::UnsignedLessThan,
+            rs1: stack_reg(),
+            rs2: limit_reg,
+            trap_code: ir::TrapCode::StackOverflow,
+        });
+        insts
+    }
+
+    fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, _ty: Type) -> Inst {
+        Inst::LoadAddr {
+            rd: into_reg,
+            mem: mem.into(),
+        }
+    }
+
+    fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
+        let mem = AMode::RegOffset(base, offset as i64, ty);
+        Inst::gen_load(into_reg, mem, ty, MemFlags::trusted())
+    }
+
+    fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
+        let mem = AMode::RegOffset(base, offset as i64, ty);
+        Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())
+    }
+
+    fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {
+        let mut insts = SmallVec::new();
+        if amount == 0 {
+            return insts;
+        }
+        insts.push(Inst::AdjustSp {
+            amount: amount as i64,
+        });
+        insts
+    }
+
+    fn gen_nominal_sp_adj(offset: i32) -> Inst {
+        Inst::VirtualSPOffsetAdj {
+            amount: offset as i64,
+        }
+    }
+
+    fn gen_prologue_frame_setup(_flags: &settings::Flags) -> SmallInstVec<Inst> {
+        // add  sp,sp,-16    ;; alloc stack space for fp.
+        // sd   ra,8(sp)     ;; save ra.
+        // sd   fp,0(sp)     ;; store old fp.
+        // mv   fp,sp        ;; set fp to sp.
+        let mut insts = SmallVec::new();
+        insts.push(Inst::AdjustSp { amount: -1 });
+        insts.push(Self::gen_store_stack(
+            StackAMode::SPOffset(0, I64),
+            link_reg(),
+            I64,
+        ));
+        // insts.push(Self::gen_store_stack(
+        //     StackAMode::SPOffset(0, I64),
+        //     fp_reg(),
+        //     I64,
+        // ));
+        // insts.push(Inst::Mov {
+        //     rd: writable_fp_reg(),
+        //     rm: stack_reg(),
+        //     ty: I64,
+        // });
+        insts
+    }
+    /// reverse of gen_prologue_frame_setup.
+    fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec<Inst> {
+        let mut insts = SmallVec::new();
+        insts.push(Self::gen_load_stack(
+            StackAMode::SPOffset(0, I64),
+            writable_link_reg(),
+            I64,
+        ));
+        // insts.push(Self::gen_load_stack(
+        //     StackAMode::SPOffset(0, I64),
+        //     writable_fp_reg(),
+        //     I64,
+        // ));
+        insts.push(Inst::AdjustSp { amount: 1 });
+        insts
+    }
+
+    fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32) {
+        insts.extend(Inst::load_constant_u32(
+            writable_a0(),
+            frame_size as u64,
+            &mut |_| writable_a0(),
+        ));
+        insts.push(Inst::Call {
+            info: Box::new(CallInfo {
+                dest: ExternalName::LibCall(LibCall::Probestack),
+                uses: smallvec![CallArgPair {
+                    vreg: a0(),
+                    preg: a0(),
+                }],
+                defs: smallvec![],
+                clobbers: PRegSet::empty(),
+                opcode: Opcode::Call,
+                callee_callconv: CallConv::SystemV,
+                caller_callconv: CallConv::SystemV,
+                callee_pop_size: 0,
+            }),
+        });
+    }
+    // Returns stack bytes used as well as instructions. Does not adjust
+    // nominal SP offset; abi_impl generic code will do that.
+    fn gen_clobber_save(
+        _call_conv: isa::CallConv,
+        _setup_frame: bool,
+        _flags: &settings::Flags,
+        clobbered_callee_saves: &[Writable<RealReg>],
+        fixed_frame_storage_size: u32,
+        _outgoing_args_size: u32,
+    ) -> (u64, SmallVec<[Inst; 16]>) {
+        let mut insts = SmallVec::new();
+        let clobbered_size = compute_clobber_size(&clobbered_callee_saves);
+        // Adjust the stack pointer downward for clobbers and the function fixed
+        // frame (spillslots and storage slots).
+        let stack_size = fixed_frame_storage_size + clobbered_size;
+        // Each stack slot is 256 bit and can fit 8 u32 values.
+        let stack_size = stack_size / 8;
+        // Store each clobbered register in order at offsets from SP,
+        // placing them above the fixed frame slots.
+        if stack_size > 0 {
+            let mut cur_offset = 1;
+            for reg in clobbered_callee_saves {
+                let r_reg = reg.to_reg();
+                let ty = match r_reg.class() {
+                    RegClass::Int => I64,
+                    RegClass::Float => F64,
+                    RegClass::Vector => unimplemented!("Vector Clobber Saves"),
+                };
+                insts.push(Self::gen_store_stack(
+                    StackAMode::SPOffset(-(cur_offset as i64), ty),
+                    real_reg_to_reg(reg.to_reg()),
+                    ty,
+                ));
+                cur_offset += 1
+            }
+            insts.push(Inst::AdjustSp {
+                amount: -(stack_size as i64),
+            });
+        }
+        (clobbered_size as u64, insts)
+    }
+
+    fn gen_clobber_restore(
+        call_conv: isa::CallConv,
+        sig: &Signature,
+        _flags: &settings::Flags,
+        clobbers: &[Writable<RealReg>],
+        fixed_frame_storage_size: u32,
+        _outgoing_args_size: u32,
+    ) -> SmallVec<[Inst; 16]> {
+        let mut insts = SmallVec::new();
+        let clobbered_callee_saves =
+            Self::get_clobbered_callee_saves(call_conv, _flags, sig, clobbers);
+        let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered_callee_saves);
+        // Each stack slot is 256 bit and can fit 8 u32 values.
+        let stack_size = stack_size / 8;
+        if stack_size > 0 {
+            insts.push(Inst::AdjustSp {
+                amount: stack_size as i64,
+            });
+        }
+        let mut cur_offset = 1;
+        for reg in &clobbered_callee_saves {
+            let rreg = reg.to_reg();
+            let ty = match rreg.class() {
+                RegClass::Int => I64,
+                RegClass::Float => F64,
+                RegClass::Vector => unimplemented!("Vector Clobber Restores"),
+            };
+            insts.push(Self::gen_load_stack(
+                StackAMode::SPOffset(-cur_offset, ty),
+                Writable::from_reg(real_reg_to_reg(reg.to_reg())),
+                ty,
+            ));
+            cur_offset += 1
+        }
+        insts
+    }
+
+    fn gen_call(
+        dest: &CallDest,
+        uses: CallArgList,
+        defs: CallRetList,
+        clobbers: PRegSet,
+        opcode: ir::Opcode,
+        _tmp: Writable<Reg>,
+        callee_conv: isa::CallConv,
+        caller_conv: isa::CallConv,
+        callee_pop_size: u32,
+    ) -> SmallVec<[Self::I; 2]> {
+        let mut insts = SmallVec::new();
+        match &dest {
+            &CallDest::ExtName(ref name, _) => insts.push(Inst::Call {
+                info: Box::new(CallInfo {
+                    dest: name.clone(),
+                    uses,
+                    defs,
+                    clobbers,
+                    opcode,
+                    caller_callconv: caller_conv,
+                    callee_callconv: callee_conv,
+                    callee_pop_size,
+                }),
+            }),
+            &CallDest::Reg(reg) => insts.push(Inst::CallInd {
+                info: Box::new(CallIndInfo {
+                    rn: *reg,
+                    uses,
+                    defs,
+                    clobbers,
+                    opcode,
+                    caller_callconv: caller_conv,
+                    callee_callconv: callee_conv,
+                    callee_pop_size,
+                }),
+            }),
+        }
+        insts
+    }
+
+    fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
+        call_conv: isa::CallConv,
+        dst: Reg,
+        src: Reg,
+        size: usize,
+        mut alloc_tmp: F,
+    ) -> SmallVec<[Self::I; 8]> {
+        let mut insts = SmallVec::new();
+        let arg0 = Writable::from_reg(x_reg(10));
+        let arg1 = Writable::from_reg(x_reg(11));
+        let arg2 = Writable::from_reg(x_reg(12));
+        let tmp = alloc_tmp(Self::word_type());
+        insts.extend(Inst::load_constant_u64(tmp, size as u64, &mut alloc_tmp).into_iter());
+        insts.push(Inst::Call {
+            info: Box::new(CallInfo {
+                dest: ExternalName::LibCall(LibCall::Memcpy),
+                uses: smallvec![
+                    CallArgPair {
+                        vreg: dst,
+                        preg: arg0.to_reg()
+                    },
+                    CallArgPair {
+                        vreg: src,
+                        preg: arg1.to_reg()
+                    },
+                    CallArgPair {
+                        vreg: tmp.to_reg(),
+                        preg: arg2.to_reg()
+                    }
+                ],
+                defs: smallvec![],
+                clobbers: Self::get_regs_clobbered_by_call(call_conv),
+                opcode: Opcode::Call,
+                caller_callconv: call_conv,
+                callee_callconv: call_conv,
+                callee_pop_size: 0,
+            }),
+        });
+        insts
+    }
+
+    fn get_number_of_spillslots_for_value(
+        rc: RegClass,
+        _target_vector_bytes: u32,
+        isa_flags: &RiscvFlags,
+    ) -> u32 {
+        // We allocate in terms of 8-byte slots.
+        match rc {
+            RegClass::Int => 1,
+            RegClass::Float => 1,
+            RegClass::Vector => (isa_flags.min_vec_reg_size() / 8) as u32,
+        }
+    }
+
+    /// Get the current virtual-SP offset from an instruction-emission state.
+    fn get_virtual_sp_offset_from_state(s: &EmitState) -> i64 {
+        s.virtual_sp_offset
+    }
+
+    /// Get the nominal-SP-to-FP offset from an instruction-emission state.
+    fn get_nominal_sp_to_fp(s: &EmitState) -> i64 {
+        s.nominal_sp_to_fp
+    }
+
+    fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegSet {
+        if call_conv_of_callee == isa::CallConv::Tail {
+            TAIL_CLOBBERS
+        } else {
+            DEFAULT_CLOBBERS
+        }
+    }
+
+    fn get_clobbered_callee_saves(
+        call_conv: isa::CallConv,
+        _flags: &settings::Flags,
+        _sig: &Signature,
+        regs: &[Writable<RealReg>],
+    ) -> Vec<Writable<RealReg>> {
+        let mut regs: Vec<Writable<RealReg>> = regs
+            .iter()
+            .cloned()
+            .filter(|r| is_reg_saved_in_prologue(call_conv, r.to_reg()))
+            .collect();
+
+        regs.sort();
+        regs
+    }
+
+    fn is_frame_setup_needed(
+        is_leaf: bool,
+        stack_args_size: u32,
+        num_clobbered_callee_saves: usize,
+        fixed_frame_storage_size: u32,
+    ) -> bool {
+        !is_leaf
+            // The function arguments that are passed on the stack are addressed
+            // relative to the Frame Pointer.
+            || stack_args_size > 0
+            || num_clobbered_callee_saves > 0
+        || fixed_frame_storage_size > 0
+    }
+
+    fn gen_inline_probestack(
+        insts: &mut SmallInstVec<Self::I>,
+        call_conv: isa::CallConv,
+        frame_size: u32,
+        guard_size: u32,
+    ) {
+        // Unroll at most n consecutive probes, before falling back to using a loop
+        const PROBE_MAX_UNROLL: u32 = 3;
+        // Number of probes that we need to perform
+        let probe_count = align_to(frame_size, guard_size) / guard_size;
+
+        if probe_count <= PROBE_MAX_UNROLL {
+            Self::gen_probestack_unroll(insts, guard_size, probe_count)
+        } else {
+            Self::gen_probestack_loop(insts, call_conv, guard_size, probe_count)
+        }
+    }
+}
+
+impl Riscv64ABICallSite {
+    pub fn emit_return_call(mut self, ctx: &mut Lower<Inst>, args: isle::ValueSlice) {
+        let (new_stack_arg_size, old_stack_arg_size) =
+            self.emit_temporary_tail_call_frame(ctx, args);
+
+        let dest = self.dest().clone();
+        let opcode = self.opcode();
+        let uses = self.take_uses();
+        let info = Box::new(ReturnCallInfo {
+            uses,
+            opcode,
+            old_stack_arg_size,
+            new_stack_arg_size,
+        });
+
+        match dest {
+            CallDest::ExtName(name, RelocDistance::Near) => {
+                ctx.emit(Inst::ReturnCall {
+                    callee: Box::new(name),
+                    info,
+                });
+            }
+            CallDest::ExtName(name, RelocDistance::Far) => {
+                let callee = ctx.alloc_tmp(ir::types::I64).only_reg().unwrap();
+                ctx.emit(Inst::LoadExtName {
+                    rd: callee,
+                    name: Box::new(name),
+                    offset: 0,
+                });
+                ctx.emit(Inst::ReturnCallInd {
+                    callee: callee.to_reg(),
+                    info,
+                });
+            }
+            CallDest::Reg(callee) => ctx.emit(Inst::ReturnCallInd { callee, info }),
+        }
+    }
+}
+
+// TODO(akashin): Figure out the correct clobbering convention.
+const CALLEE_SAVE_X_REG: [bool; 32] = [
+    false, false, false, false, false, false, false, false, // 0-7
+    false, false, false, false, false, false, false, false, // 8-15
+    false, false, false, false, false, false, false, false, // 16-23
+    false, false, false, false, false, false, false, false, // 24-31
+];
+const CALLEE_SAVE_F_REG: [bool; 32] = [
+    false, false, false, false, false, false, false, false, // 0-7
+    true, false, false, false, false, false, false, false, // 8-15
+    false, false, true, true, true, true, true, true, // 16-23
+    true, true, true, true, false, false, false, false, // 24-31
+];
+
+/// This should be the registers that must be saved by callee.
+#[inline]
+fn is_reg_saved_in_prologue(conv: CallConv, reg: RealReg) -> bool {
+    if conv == CallConv::Tail {
+        return false;
+    }
+
+    match reg.class() {
+        RegClass::Int => CALLEE_SAVE_X_REG[reg.hw_enc() as usize],
+        RegClass::Float => CALLEE_SAVE_F_REG[reg.hw_enc() as usize],
+        // All vector registers are caller saved.
+        RegClass::Vector => false,
+    }
+}
+
+fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
+    let mut clobbered_size = 0;
+    for reg in clobbers {
+        match reg.to_reg().class() {
+            RegClass::Int => {
+                clobbered_size += 8;
+            }
+            RegClass::Float => {
+                clobbered_size += 8;
+            }
+            RegClass::Vector => unimplemented!("Vector Size Clobbered"),
+        }
+    }
+    align_to(clobbered_size, 16)
+}
+
+const fn default_clobbers() -> PRegSet {
+    PRegSet::empty()
+        .with(px_reg(1))
+        .with(px_reg(5))
+        .with(px_reg(6))
+        .with(px_reg(7))
+        .with(px_reg(10))
+        .with(px_reg(11))
+        // CTX register is not clobbered.
+        // .with(px_reg(12))
+        .with(px_reg(13))
+        .with(px_reg(14))
+        .with(px_reg(15))
+        .with(px_reg(16))
+        .with(px_reg(17))
+        .with(px_reg(28))
+        .with(px_reg(29))
+        .with(px_reg(30))
+        .with(px_reg(31))
+    // F Regs
+    // .with(pf_reg(0))
+    // .with(pf_reg(1))
+    // .with(pf_reg(2))
+    // .with(pf_reg(3))
+    // .with(pf_reg(4))
+    // .with(pf_reg(5))
+    // .with(pf_reg(6))
+    // .with(pf_reg(7))
+    // .with(pf_reg(9))
+    // .with(pf_reg(10))
+    // .with(pf_reg(11))
+    // .with(pf_reg(12))
+    // .with(pf_reg(13))
+    // .with(pf_reg(14))
+    // .with(pf_reg(15))
+    // .with(pf_reg(16))
+    // .with(pf_reg(17))
+    // .with(pf_reg(28))
+    // .with(pf_reg(29))
+    // .with(pf_reg(30))
+    // .with(pf_reg(31))
+    // V Regs - All vector regs get clobbered
+    // .with(pv_reg(0))
+    // .with(pv_reg(1))
+    // .with(pv_reg(2))
+    // .with(pv_reg(3))
+    // .with(pv_reg(4))
+    // .with(pv_reg(5))
+    // .with(pv_reg(6))
+    // .with(pv_reg(7))
+    // .with(pv_reg(8))
+    // .with(pv_reg(9))
+    // .with(pv_reg(10))
+    // .with(pv_reg(11))
+    // .with(pv_reg(12))
+    // .with(pv_reg(13))
+    // .with(pv_reg(14))
+    // .with(pv_reg(15))
+    // .with(pv_reg(16))
+    // .with(pv_reg(17))
+    // .with(pv_reg(18))
+    // .with(pv_reg(19))
+    // .with(pv_reg(20))
+    // .with(pv_reg(21))
+    // .with(pv_reg(22))
+    // .with(pv_reg(23))
+    // .with(pv_reg(24))
+    // .with(pv_reg(25))
+    // .with(pv_reg(26))
+    // .with(pv_reg(27))
+    // .with(pv_reg(28))
+    // .with(pv_reg(29))
+    // .with(pv_reg(30))
+    // .with(pv_reg(31))
+}
+
+const DEFAULT_CLOBBERS: PRegSet = default_clobbers();
+
+// All allocatable registers are clobbered by calls using the `tail` calling
+// convention.
+const fn tail_clobbers() -> PRegSet {
+    PRegSet::empty()
+        // `x0` is the zero register, and not allocatable.
+        .with(px_reg(1))
+        // `x2` is the stack pointer, `x3` is the global pointer, and `x4` is
+        // the thread pointer. None are allocatable.
+        .with(px_reg(5))
+        .with(px_reg(6))
+        .with(px_reg(7))
+        // `x8` is the frame pointer, and not allocatable.
+        .with(px_reg(9))
+        .with(px_reg(10))
+        .with(px_reg(10))
+        .with(px_reg(11))
+        .with(px_reg(12))
+        .with(px_reg(13))
+        .with(px_reg(14))
+        .with(px_reg(15))
+        .with(px_reg(16))
+        .with(px_reg(17))
+        .with(px_reg(18))
+        .with(px_reg(19))
+        .with(px_reg(20))
+        .with(px_reg(21))
+        .with(px_reg(22))
+        .with(px_reg(23))
+        .with(px_reg(24))
+        .with(px_reg(25))
+        .with(px_reg(26))
+        .with(px_reg(27))
+        .with(px_reg(28))
+        .with(px_reg(29))
+    // `x30` and `x31` are reserved as scratch registers, and are not
+    // allocatable.
+    //
+    // F Regs
+    // .with(pf_reg(0))
+    // .with(pf_reg(1))
+    // .with(pf_reg(2))
+    // .with(pf_reg(3))
+    // .with(pf_reg(4))
+    // .with(pf_reg(5))
+    // .with(pf_reg(6))
+    // .with(pf_reg(7))
+    // .with(pf_reg(9))
+    // .with(pf_reg(10))
+    // .with(pf_reg(11))
+    // .with(pf_reg(12))
+    // .with(pf_reg(13))
+    // .with(pf_reg(14))
+    // .with(pf_reg(15))
+    // .with(pf_reg(16))
+    // .with(pf_reg(17))
+    // .with(pf_reg(18))
+    // .with(pf_reg(19))
+    // .with(pf_reg(20))
+    // .with(pf_reg(21))
+    // .with(pf_reg(22))
+    // .with(pf_reg(23))
+    // .with(pf_reg(24))
+    // .with(pf_reg(25))
+    // .with(pf_reg(26))
+    // .with(pf_reg(27))
+    // .with(pf_reg(28))
+    // .with(pf_reg(29))
+    // .with(pf_reg(30))
+    // .with(pf_reg(31))
+    // V Regs
+    // .with(pv_reg(0))
+    // .with(pv_reg(1))
+    // .with(pv_reg(2))
+    // .with(pv_reg(3))
+    // .with(pv_reg(4))
+    // .with(pv_reg(5))
+    // .with(pv_reg(6))
+    // .with(pv_reg(7))
+    // .with(pv_reg(8))
+    // .with(pv_reg(9))
+    // .with(pv_reg(10))
+    // .with(pv_reg(11))
+    // .with(pv_reg(12))
+    // .with(pv_reg(13))
+    // .with(pv_reg(14))
+    // .with(pv_reg(15))
+    // .with(pv_reg(16))
+    // .with(pv_reg(17))
+    // .with(pv_reg(18))
+    // .with(pv_reg(19))
+    // .with(pv_reg(20))
+    // .with(pv_reg(21))
+    // .with(pv_reg(22))
+    // .with(pv_reg(23))
+    // .with(pv_reg(24))
+    // .with(pv_reg(25))
+    // .with(pv_reg(26))
+    // .with(pv_reg(27))
+    // .with(pv_reg(28))
+    // .with(pv_reg(29))
+    // .with(pv_reg(30))
+    // .with(pv_reg(31))
+}
+
+const TAIL_CLOBBERS: PRegSet = tail_clobbers();
+
+impl Riscv64MachineDeps {
+    fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
+        insts.reserve(probe_count as usize);
+        for i in 0..probe_count {
+            let offset = (guard_size * (i + 1)) as i64;
+            insts.push(Self::gen_store_stack(
+                StackAMode::SPOffset(-offset, I8),
+                zero_reg(),
+                I32,
+            ));
+        }
+    }
+
+    fn gen_probestack_loop(
+        insts: &mut SmallInstVec<Inst>,
+        call_conv: isa::CallConv,
+        guard_size: u32,
+        probe_count: u32,
+    ) {
+        // Must be a caller-saved register that is not an argument.
+        let tmp = match call_conv {
+            isa::CallConv::Tail => Writable::from_reg(x_reg(1)),
+            _ => Writable::from_reg(x_reg(28)), // t3
+        };
+        insts.push(Inst::StackProbeLoop {
+            guard_size,
+            probe_count,
+            tmp,
+        });
+    }
+}
diff --git a/cranelift/codegen/src/isa/zkasm/inst.isle b/cranelift/codegen/src/isa/zkasm/inst.isle
new file mode 100644
index 000000000000..fb040a036493
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/inst.isle
@@ -0,0 +1,3007 @@
+;; Instruction formats.
+(type MInst
+  (enum
+    ;; A no-op of zero size.
+    (Nop0)
+    (Nop4)
+
+    ;; Label to output at the beginning of a block
+    (Label
+      (imm usize))
+
+    ;; load immediate
+    (Lui
+      (rd WritableReg)
+      (imm Imm20))
+
+    (LoadConst32
+      (rd WritableReg)
+      (imm u32))
+
+    (LoadConst64
+      (rd WritableReg)
+      (imm u64))
+
+     (Auipc
+      (rd WritableReg)
+      (imm Imm20))
+
+    ;; An ALU operation with one register sources and a register destination.
+    (FpuRR
+      (alu_op FpuOPRR)
+      (frm OptionFloatRoundingMode)
+      (rd WritableReg)
+      (rs Reg))
+
+
+    ;; An ALU operation with two register sources and a register destination.
+    (AluRRR
+      (alu_op AluOPRRR)
+      (rd WritableReg)
+      (rs1 Reg)
+      (rs2 Reg))
+
+    ;; An ALU operation with two register sources and a register destination.
+    (FpuRRR
+      (alu_op FpuOPRRR)
+      (frm OptionFloatRoundingMode)
+      (rd WritableReg)
+      (rs1 Reg)
+      (rs2 Reg))
+
+    ;; An ALU operation with three register sources and a register destination.
+    (FpuRRRR
+      (alu_op FpuOPRRRR)
+      (frm OptionFloatRoundingMode)
+      (rd WritableReg)
+      (rs1 Reg)
+      (rs2 Reg)
+      (rs3 Reg))
+
+    ;; An ALU operation with a register source and an immediate-12 source, and a register
+    ;; destination.
+    (AluRRImm12
+      (alu_op AluOPRRI)
+      (rd WritableReg)
+      (rs Reg)
+      (imm12 Imm12))
+
+    ;; An load
+    (Load
+      (rd WritableReg)
+      (op LoadOP)
+      (flags MemFlags)
+      (from AMode))
+    ;; An Store
+    (Store
+      (to AMode)
+      (op StoreOP)
+      (flags MemFlags)
+      (src Reg))
+
+    ;; A pseudo-instruction that captures register arguments in vregs.
+    (Args
+      (args VecArgPair))
+
+    (Ret (rets VecRetPair)
+         (stack_bytes_to_pop u32))
+
+     (Extend
+      (rd WritableReg)
+      (rn Reg)
+      (signed bool)
+      (from_bits u8)
+      (to_bits u8))
+
+    (AdjustSp
+      (amount i64))
+    (Call
+      (info BoxCallInfo))
+
+      ;; A machine indirect-call instruction.
+    (CallInd
+      (info BoxCallIndInfo))
+
+    ;; A direct return-call macro instruction.
+    (ReturnCall
+      (callee BoxExternalName)
+      (info BoxReturnCallInfo))
+
+    ;; An indirect return-call macro instruction.
+    (ReturnCallInd
+      (callee Reg)
+      (info BoxReturnCallInfo))
+
+    (TrapIf
+      (test Reg)
+      (trap_code TrapCode))
+
+    ;; use a simple compare to decide to cause trap or not.
+    (TrapIfC
+      (rs1 Reg)
+      (rs2 Reg)
+      (cc IntCC)
+      (trap_code TrapCode))
+
+    (Jal
+      ;; (rd WritableReg) don't use
+      (dest BranchTarget))
+
+    (CondBr
+      (taken BranchTarget)
+      (not_taken BranchTarget)
+      (kind IntegerCompare))
+
+    ;; Load an inline symbol reference.
+    (LoadExtName
+      (rd WritableReg)
+      (name BoxExternalName)
+      (offset i64))
+
+    ;; Load address referenced by `mem` into `rd`.
+    (LoadAddr
+      (rd WritableReg)
+      (mem AMode))
+
+    ;; Marker, no-op in generated code: SP "virtual offset" is adjusted. This
+    ;; controls how AMode::NominalSPOffset args are lowered.
+    (VirtualSPOffsetAdj
+      (amount i64))
+
+    ;; A MOV instruction. These are encoded as OrR's (AluRRR form) but we
+    ;; keep them separate at the `Inst` level for better pretty-printing
+    ;; and faster `is_move()` logic.
+    (Mov
+      (rd WritableReg)
+      (rm Reg)
+      (ty Type))
+
+    ;; A MOV instruction, but where the source register is a non-allocatable
+    ;; PReg. It's important that the register be non-allocatable, as regalloc2
+    ;; will not see it as used.
+    (MovFromPReg
+      (rd WritableReg)
+      (rm PReg))
+
+    (Fence
+      (pred FenceReq)
+      (succ FenceReq))
+
+    (FenceI)
+
+    (ECall)
+
+    (EBreak)
+
+    ;; An instruction guaranteed to always be undefined and to trigger an illegal instruction at
+    ;; runtime.
+    (Udf
+      (trap_code TrapCode))
+    ;; a jump and link register operation
+    (Jalr
+      ;;Plain unconditional jumps (assembler pseudo-op J) are encoded as a JAL with rd=x0.
+      (rd WritableReg)
+      (base Reg)
+      (offset Imm12))
+
+    ;; atomic operations.
+    (Atomic
+      (op AtomicOP)
+      (rd WritableReg)
+      (addr Reg)
+      (src Reg)
+      (amo AMO))
+    ;; an atomic store
+    (AtomicStore
+      (src Reg)
+      (ty Type)
+      (p Reg))
+    ;; an atomic load.
+    (AtomicLoad
+      (rd WritableReg)
+      (ty Type)
+      (p Reg))
+
+    ;; an atomic nand need using loop to implement.
+    (AtomicRmwLoop
+      (offset Reg)
+      (op AtomicRmwOp)
+      (dst WritableReg)
+      (ty Type)
+      (p Reg)
+      (x Reg)
+      (t0 WritableReg))
+
+    ;; select x or y base on condition
+    (Select
+      (dst VecWritableReg)
+      (ty Type)
+      (condition Reg)
+      (x ValueRegs)
+      (y ValueRegs))
+
+    (BrTable
+      (index Reg)
+      (tmp1 WritableReg)
+      (tmp2 WritableReg)
+      (targets VecBranchTarget))
+
+    ;; atomic compare and set operation
+    (AtomicCas
+      (offset Reg)
+      (t0 WritableReg)
+      (dst WritableReg)
+      (e Reg)
+      (addr Reg)
+      (v Reg)
+      (ty Type))
+    ;; select x or y base on op_code
+    (IntSelect
+      (op IntSelectOP)
+      (dst VecWritableReg)
+      (x ValueRegs)
+      (y ValueRegs)
+      (ty Type))
+    ;; an integer compare.
+    (Icmp
+      (cc IntCC)
+      (rd WritableReg)
+      (a ValueRegs)
+      (b ValueRegs)
+      (ty Type))
+    ;; select a reg base on condition.
+    ;; very useful because in lowering stage we can not have condition branch.
+    (SelectReg
+      (rd WritableReg)
+      (rs1 Reg)
+      (rs2 Reg)
+      (condition IntegerCompare))
+    ;;
+    (FcvtToInt
+      (is_sat bool)
+      (rd WritableReg)
+      (tmp WritableReg) ;; a float register to load bounds.
+      (rs Reg)
+      (is_signed bool)
+      (in_type Type)
+      (out_type Type))
+
+    (RawData (data VecU8))
+
+    ;; An unwind pseudo-instruction.
+       (Unwind
+        (inst UnwindInst))
+
+    ;; A dummy use, useful to keep a value alive.
+       (DummyUse
+        (reg Reg))
+    ;;;
+    (FloatRound
+      (op FloatRoundOP)
+      (rd WritableReg)
+      (int_tmp WritableReg)
+      (f_tmp WritableReg)
+      (rs Reg)
+      (ty Type))
+    ;;;; FMax
+    (FloatSelect
+      (op FloatSelectOP)
+      (rd WritableReg)
+      ;; a integer register
+      (tmp WritableReg)
+      (rs1 Reg)
+      (rs2 Reg)
+      (ty Type))
+
+    ;; popcnt  if target doesn't support extension B
+    ;; use iteration to implement.
+    (Popcnt
+      (sum WritableReg)
+      (step WritableReg)
+      (tmp WritableReg)
+      (rs Reg)
+      (ty Type))
+
+    ;;; counting leading or trailing zeros.
+    (Cltz
+      ;; leading or trailing.
+      (leading bool)
+      (sum WritableReg)
+      (step WritableReg)
+      (tmp WritableReg)
+      (rs Reg)
+      (ty Type))
+    ;; Byte-reverse register
+    (Rev8
+      (rs Reg)
+      (step WritableReg)
+      (tmp WritableReg)
+      (rd WritableReg))
+    ;;
+    (Brev8
+      (rs Reg)
+      (ty Type)
+      (step WritableReg)
+      (tmp WritableReg)
+      (tmp2 WritableReg)
+      (rd WritableReg))
+    (StackProbeLoop
+      (guard_size u32)
+      (probe_count u32)
+      (tmp WritableReg))
+
+    (VecAluRRRR
+      (op VecAluOpRRRR)
+      (vd WritableReg)
+      (vd_src Reg)
+      (vs2 Reg)
+      (vs1 Reg)
+      (mask VecOpMasking)
+      (vstate VState))
+
+    (VecAluRRRImm5
+      (op VecAluOpRRRImm5)
+      (vd WritableReg)
+      (vd_src Reg)
+      (vs2 Reg)
+      (imm Imm5)
+      (mask VecOpMasking)
+      (vstate VState))
+
+    (VecAluRRR
+      (op VecAluOpRRR)
+      (vd WritableReg)
+      (vs2 Reg)
+      (vs1 Reg)
+      (mask VecOpMasking)
+      (vstate VState))
+
+    (VecAluRRImm5
+      (op VecAluOpRRImm5)
+      (vd WritableReg)
+      (vs2 Reg)
+      (imm Imm5)
+      (mask VecOpMasking)
+      (vstate VState))
+
+    (VecAluRR
+      (op VecAluOpRR)
+      (vd WritableReg)
+      (vs Reg)
+      (mask VecOpMasking)
+      (vstate VState))
+
+    (VecAluRImm5
+      (op VecAluOpRImm5)
+      (vd WritableReg)
+      (imm Imm5)
+      (mask VecOpMasking)
+      (vstate VState))
+
+    (VecSetState
+      (rd WritableReg)
+      (vstate VState))
+
+    (VecLoad
+      (eew VecElementWidth)
+      (to WritableReg)
+      (from VecAMode)
+      (flags MemFlags)
+      (mask VecOpMasking)
+      (vstate VState))
+
+    (VecStore
+      (eew VecElementWidth)
+      (to VecAMode)
+      (from Reg)
+      (flags MemFlags)
+      (mask VecOpMasking)
+      (vstate VState))
+
+    ;; An addition with 2 32-bit immediates.
+    (AddImm32
+      (rd WritableReg)
+      (src1 Imm32)
+      (src2 Imm32))
+
+    ;; A multiplication with 2 32-bit immediates.
+    (MulImm32
+      (rd WritableReg)
+      (src1 Imm32)
+      (src2 Imm32))
+
+))
+
+
+(type FloatSelectOP (enum
+  (Max)
+  (Min)
+))
+
+(type FloatRoundOP (enum
+  (Nearest)
+  (Ceil)
+  (Floor)
+  (Trunc)
+))
+
+(type IntSelectOP (enum
+  (Smax)
+  (Umax)
+  (Smin)
+  (Umin)
+))
+
+(type AtomicOP (enum
+  (LrW)
+  (ScW)
+  (AmoswapW)
+  (AmoaddW)
+  (AmoxorW)
+  (AmoandW)
+  (AmoorW)
+  (AmominW)
+  (AmomaxW)
+  (AmominuW)
+  (AmomaxuW)
+  (LrD)
+  (ScD)
+  (AmoswapD)
+  (AmoaddD)
+  (AmoxorD)
+  (AmoandD)
+  (AmoorD)
+  (AmominD)
+  (AmomaxD)
+  (AmominuD)
+  (AmomaxuD)
+))
+
+(type FpuOPRRRR (enum
+  ;; float32
+  (FmaddS)
+  (FmsubS)
+  (FnmsubS)
+  (FnmaddS)
+  ;; float64
+  (FmaddD)
+  (FmsubD)
+  (FnmsubD)
+  (FnmaddD)
+))
+
+(type FClassResult (enum
+  ;;0 rs1 is −∞.
+  (NegInfinite)
+  ;; 1 rs1 is a negative normal number.
+  (NegNormal)
+  ;; 2 rs1 is a negative subnormal number.
+  (NegSubNormal)
+  ;; 3 rs1 is −0.
+  (NegZero)
+  ;; 4 rs1 is +0.
+  (PosZero)
+  ;; 5 rs1 is a positive subnormal number.
+  (PosSubNormal)
+  ;; 6 rs1 is a positive normal number.
+  (PosNormal)
+  ;; 7 rs1 is +∞.
+  (PosInfinite)
+  ;; 8 rs1 is a signaling NaN.
+  (SNaN)
+  ;; 9 rs1 is a quiet NaN.
+  (QNaN)
+))
+
+(type FpuOPRR (enum
+  ;; RV32F Standard Extension
+  (FsqrtS)
+  (FcvtWS)
+  (FcvtWuS)
+  (FmvXW)
+  (FclassS)
+  (FcvtSw)
+  (FcvtSwU)
+  (FmvWX)
+
+
+  ;; RV64F Standard Extension (in addition to RV32F)
+  (FcvtLS)
+  (FcvtLuS)
+  (FcvtSL)
+  (FcvtSLU)
+
+
+  ;; RV64D Standard Extension (in addition to RV32D)
+  (FcvtLD)
+  (FcvtLuD)
+  (FmvXD)
+  (FcvtDL)
+  (FcvtDLu)
+  (FmvDX)
+
+  ;; RV32D Standard Extension
+  (FsqrtD)
+  (FcvtSD)
+  (FcvtDS)
+  (FclassD)
+  (FcvtWD)
+  (FcvtWuD)
+  (FcvtDW)
+  (FcvtDWU)
+  ;; bitmapip
+
+))
+
+(type LoadOP (enum
+  (Lb)
+  (Lh)
+  (Lw)
+  (Lbu)
+  (Lhu)
+  (Lwu)
+  (Ld)
+  (Flw)
+  (Fld)
+))
+
+(type StoreOP (enum
+  (Sb)
+  (Sh)
+  (Sw)
+  (Sd)
+  (Fsw)
+  (Fsd)
+))
+
+(type AluOPRRR (enum
+  ;; base set
+  (Add)
+  (Sub)
+  (Sll)
+  (Slt)
+  (SltU)
+  (Sgt)
+  (Sgtu)
+  (Xor)
+  (Srl)
+  (Sra)
+  (Or)
+  (And)
+
+  ;; RV64I Base Instruction Set (in addition to RV32I)
+  (Addw)
+  (Subw)
+  (Sllw)
+  (Srlw)
+  (Sraw)
+
+
+  ;;RV32M Standard Extension
+  (Mul)
+  (Mulh)
+  (Mulhsu)
+  (Mulhu)
+  (Div)
+  (DivU)
+  (Rem)
+  (RemU)
+
+  ;; RV64M Standard Extension (in addition to RV32M)
+  (Mulw)
+  (Divw)
+  (Divuw)
+  (Remw)
+  (Remuw)
+
+  ;; Zba: Address Generation Instructions
+  (Adduw)
+  (Sh1add)
+  (Sh1adduw)
+  (Sh2add)
+  (Sh2adduw)
+  (Sh3add)
+  (Sh3adduw)
+
+  ;; Zbb: Bit Manipulation Instructions
+  (Andn)
+  (Orn)
+  (Xnor)
+  (Max)
+  (Maxu)
+  (Min)
+  (Minu)
+  (Rol)
+  (Rolw)
+  (Ror)
+  (Rorw)
+
+  ;; Zbs: Single-bit instructions
+  (Bclr)
+  (Bext)
+  (Binv)
+  (Bset)
+
+  ;; Zbc: Carry-less multiplication
+  (Clmul)
+  (Clmulh)
+  (Clmulr)
+
+  ;; Zbkb: Bit-manipulation for Cryptography
+  (Pack)
+  (Packw)
+  (Packh)
+))
+
+
+(type FpuOPRRR (enum
+  ;; RV32F Standard Extension
+  (FaddS)
+  (FsubS)
+  (FmulS)
+  (FdivS)
+
+  (FsgnjS)
+  (FsgnjnS)
+  (FsgnjxS)
+  (FminS)
+  (FmaxS)
+  (FeqS)
+  (FltS)
+  (FleS)
+
+  ;; RV32D Standard Extension
+  (FaddD)
+  (FsubD)
+  (FmulD)
+  (FdivD)
+  (FsgnjD)
+  (FsgnjnD)
+  (FsgnjxD)
+  (FminD)
+  (FmaxD)
+  (FeqD)
+  (FltD)
+  (FleD)
+))
+
+
+
+(type AluOPRRI (enum
+  ;; Base ISA
+  (Addi)
+  (Slti)
+  (SltiU)
+  (Xori)
+  (Ori)
+  (Andi)
+  (Slli)
+  (Srli)
+  (Srai)
+  (Addiw)
+  (Slliw)
+  (SrliW)
+  (Sraiw)
+
+  ;; Zba: Address Generation Instructions
+  (SlliUw)
+
+  ;; Zbb: Bit Manipulation Instructions
+  (Clz)
+  (Clzw)
+  (Ctz)
+  (Ctzw)
+  (Cpop)
+  (Cpopw)
+  (Sextb)
+  (Sexth)
+  (Zexth)
+  (Rori)
+  (Roriw)
+  (Rev8)
+  (Brev8)
+  (Orcb)
+
+  ;; Zbs: Single-bit instructions
+  (Bclri)
+  (Bexti)
+  (Binvi)
+  (Bseti)
+))
+
+
+(type FRM (enum
+  ;; Round to Nearest, ties to Even
+  (RNE)
+  ;; Round towards Zero
+  (RTZ)
+  ;;  Round Down (towards −∞)
+  (RDN)
+  ;; Round Up (towards +∞)
+  (RUP)
+  ;; Round to Nearest, ties to Max Magnitude
+  (RMM)
+  ;; In instruction’s rm field, selects dynamic rounding mode;
+  ;;In Rounding Mode register, Invalid.
+  (Fcsr)
+))
+
+(type FFlagsException (enum
+  ;; Invalid Operation
+  (NV)
+  ;; Divide by Zero
+  (DZ)
+  ;; Overflow
+  (OF)
+  ;; Underflow
+  (UF)
+  ;; Inexact
+  (NX)
+))
+
+;;;; input output read write
+;;;; SI SO SR SW
+;;;; PI PO PR PW
+;;;; lowest four bit are used.
+(type FenceReq (primitive u8))
+
+(type VecBranchTarget (primitive VecBranchTarget))
+(type BoxCallInfo (primitive BoxCallInfo))
+(type BoxCallIndInfo (primitive BoxCallIndInfo))
+(type BoxReturnCallInfo (primitive BoxReturnCallInfo))
+(type IntegerCompare (primitive IntegerCompare))
+(type AMode (primitive AMode))
+(type OptionReg (primitive OptionReg))
+(type OptionImm12 (primitive OptionImm12))
+(type OptionUimm5 (primitive OptionUimm5))
+(type Imm12 (primitive Imm12))
+(type Imm32 (primitive Imm32))
+(type UImm5 (primitive UImm5))
+(type Imm5 (primitive Imm5))
+(type Imm20 (primitive Imm20))
+(type Imm3 (primitive Imm3))
+(type BranchTarget (primitive BranchTarget))
+(type OptionFloatRoundingMode (primitive OptionFloatRoundingMode))
+(type VecU8 (primitive VecU8))
+(type AMO (primitive AMO))
+(type VecMachLabel extern (enum))
+
+
+;;;; Newtypes for Different Register Classes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(type XReg (primitive XReg))
+(type WritableXReg (primitive WritableXReg))
+(type FReg (primitive FReg))
+(type WritableFReg (primitive WritableFReg))
+(type VReg (primitive VReg))
+(type WritableVReg (primitive WritableVReg))
+
+;; Construct a new `XReg` from a `Reg`.
+;;
+;; Asserts that the register has a Integer RegClass.
+(decl xreg_new (Reg) XReg)
+(extern constructor xreg_new xreg_new)
+(convert Reg XReg xreg_new)
+
+;; Construct a new `WritableXReg` from a `WritableReg`.
+;;
+;; Asserts that the register has a Integer RegClass.
+(decl writable_xreg_new (WritableReg) WritableXReg)
+(extern constructor writable_xreg_new writable_xreg_new)
+(convert WritableReg WritableXReg writable_xreg_new)
+
+;; Put a value into a XReg.
+;;
+;; Asserts that the value goes into a XReg.
+(decl put_in_xreg (Value) XReg)
+(rule (put_in_xreg val) (xreg_new (put_in_reg val)))
+(convert Value XReg put_in_xreg)
+
+;; Construct an `InstOutput` out of a single XReg register.
+(decl output_xreg (XReg) InstOutput)
+(rule (output_xreg x) (output_reg x))
+(convert XReg InstOutput output_xreg)
+
+;; Convert a `WritableXReg` to an `XReg`.
+(decl pure writable_xreg_to_xreg (WritableXReg) XReg)
+(extern constructor writable_xreg_to_xreg writable_xreg_to_xreg)
+(convert WritableXReg XReg writable_xreg_to_xreg)
+
+;; Convert a `WritableXReg` to an `WritableReg`.
+(decl pure writable_xreg_to_writable_reg (WritableXReg) WritableReg)
+(extern constructor writable_xreg_to_writable_reg writable_xreg_to_writable_reg)
+(convert WritableXReg WritableReg writable_xreg_to_writable_reg)
+
+;; Convert a `WritableXReg` to an `Reg`.
+(decl pure writable_xreg_to_reg (WritableXReg) Reg)
+(rule (writable_xreg_to_reg x) (writable_xreg_to_writable_reg x))
+(convert WritableXReg Reg writable_xreg_to_reg)
+
+;; Convert an `XReg` to a `Reg`.
+(decl pure xreg_to_reg (XReg) Reg)
+(extern constructor xreg_to_reg xreg_to_reg)
+(convert XReg Reg xreg_to_reg)
+
+;; Convert a `XReg` to a `ValueRegs`.
+(decl xreg_to_value_regs (XReg) ValueRegs)
+(rule (xreg_to_value_regs x) (value_reg x))
+(convert XReg ValueRegs xreg_to_reg)
+
+;; Convert a `WritableXReg` to a `ValueRegs`.
+(decl writable_xreg_to_value_regs (WritableXReg) ValueRegs)
+(rule (writable_xreg_to_value_regs x) (value_reg x))
+(convert WritableXReg ValueRegs writable_xreg_to_value_regs)
+
+;; Allocates a new `WritableXReg`.
+(decl temp_writable_xreg () WritableXReg)
+(rule (temp_writable_xreg) (temp_writable_reg $I64))
+
+
+;; Construct a new `FReg` from a `Reg`.
+;;
+;; Asserts that the register has a Float RegClass.
+(decl freg_new (Reg) FReg)
+(extern constructor freg_new freg_new)
+(convert Reg FReg freg_new)
+
+;; Construct a new `WritableFReg` from a `WritableReg`.
+;;
+;; Asserts that the register has a Float RegClass.
+(decl writable_freg_new (WritableReg) WritableFReg)
+(extern constructor writable_freg_new writable_freg_new)
+(convert WritableReg WritableFReg writable_freg_new)
+
+;; Put a value into a FReg.
+;;
+;; Asserts that the value goes into a FReg.
+(decl put_in_freg (Value) FReg)
+(rule (put_in_freg val) (freg_new (put_in_reg val)))
+(convert Value FReg put_in_freg)
+
+;; Construct an `InstOutput` out of a single FReg register.
+(decl output_freg (FReg) InstOutput)
+(rule (output_freg x) (output_reg x))
+(convert FReg InstOutput output_freg)
+
+;; Convert a `WritableFReg` to an `FReg`.
+(decl pure writable_freg_to_freg (WritableFReg) FReg)
+(extern constructor writable_freg_to_freg writable_freg_to_freg)
+(convert WritableFReg FReg writable_freg_to_freg)
+
+;; Convert a `WritableFReg` to an `WritableReg`.
+(decl pure writable_freg_to_writable_reg (WritableFReg) WritableReg)
+(extern constructor writable_freg_to_writable_reg writable_freg_to_writable_reg)
+(convert WritableFReg WritableReg writable_freg_to_writable_reg)
+
+;; Convert a `WritableFReg` to an `Reg`.
+(decl pure writable_freg_to_reg (WritableFReg) Reg)
+(rule (writable_freg_to_reg x) (writable_freg_to_writable_reg x))
+(convert WritableFReg Reg writable_freg_to_reg)
+
+;; Convert an `FReg` to a `Reg`.
+(decl pure freg_to_reg (FReg) Reg)
+(extern constructor freg_to_reg freg_to_reg)
+(convert FReg Reg freg_to_reg)
+
+;; Convert a `FReg` to a `ValueRegs`.
+(decl freg_to_value_regs (FReg) ValueRegs)
+(rule (freg_to_value_regs x) (value_reg x))
+(convert FReg ValueRegs xreg_to_reg)
+
+;; Convert a `WritableFReg` to a `ValueRegs`.
+(decl writable_freg_to_value_regs (WritableFReg) ValueRegs)
+(rule (writable_freg_to_value_regs x) (value_reg x))
+(convert WritableFReg ValueRegs writable_freg_to_value_regs)
+
+;; Allocates a new `WritableFReg`.
+(decl temp_writable_freg () WritableFReg)
+(rule (temp_writable_freg) (temp_writable_reg $F64))
+
+
+
+;; Construct a new `VReg` from a `Reg`.
+;;
+;; Asserts that the register has a Vector RegClass.
+(decl vreg_new (Reg) VReg)
+(extern constructor vreg_new vreg_new)
+(convert Reg VReg vreg_new)
+
+;; Construct a new `WritableVReg` from a `WritableReg`.
+;;
+;; Asserts that the register has a Vector RegClass.
+(decl writable_vreg_new (WritableReg) WritableVReg)
+(extern constructor writable_vreg_new writable_vreg_new)
+(convert WritableReg WritableVReg writable_vreg_new)
+
+;; Put a value into a VReg.
+;;
+;; Asserts that the value goes into a VReg.
+(decl put_in_vreg (Value) VReg)
+(rule (put_in_vreg val) (vreg_new (put_in_reg val)))
+(convert Value VReg put_in_vreg)
+
+;; Construct an `InstOutput` out of a single VReg register.
+(decl output_vreg (VReg) InstOutput)
+(rule (output_vreg x) (output_reg x))
+(convert VReg InstOutput output_vreg)
+
+;; Convert a `WritableVReg` to an `VReg`.
+(decl pure writable_vreg_to_vreg (WritableVReg) VReg)
+(extern constructor writable_vreg_to_vreg writable_vreg_to_vreg)
+(convert WritableVReg VReg writable_vreg_to_vreg)
+
+;; Convert a `WritableVReg` to an `WritableReg`.
+(decl pure writable_vreg_to_writable_reg (WritableVReg) WritableReg)
+(extern constructor writable_vreg_to_writable_reg writable_vreg_to_writable_reg)
+(convert WritableVReg WritableReg writable_vreg_to_writable_reg)
+
+;; Convert a `WritableVReg` to an `Reg`.
+(decl pure writable_vreg_to_reg (WritableVReg) Reg)
+(rule (writable_vreg_to_reg x) (writable_vreg_to_writable_reg x))
+(convert WritableVReg Reg writable_vreg_to_reg)
+
+;; Convert an `VReg` to a `Reg`.
+(decl pure vreg_to_reg (VReg) Reg)
+(extern constructor vreg_to_reg vreg_to_reg)
+(convert VReg Reg vreg_to_reg)
+
+;; Convert a `VReg` to a `ValueRegs`.
+(decl vreg_to_value_regs (VReg) ValueRegs)
+(rule (vreg_to_value_regs x) (value_reg x))
+(convert VReg ValueRegs xreg_to_reg)
+
+;; Convert a `WritableVReg` to a `ValueRegs`.
+(decl writable_vreg_to_value_regs (WritableVReg) ValueRegs)
+(rule (writable_vreg_to_value_regs x) (value_reg x))
+(convert WritableVReg ValueRegs writable_vreg_to_value_regs)
+
+;; Allocates a new `WritableVReg`.
+(decl temp_writable_vreg () WritableVReg)
+(rule (temp_writable_vreg) (temp_writable_reg $I8X16))
+
+
+;; Converters
+
+(convert u8 i32 u8_as_i32)
+(decl u8_as_i32 (u8) i32)
+(extern constructor u8_as_i32 u8_as_i32)
+
+;; ISA Extension helpers
+
+(decl pure has_v () bool)
+(extern constructor has_v has_v)
+
+(decl pure has_zbkb () bool)
+(extern constructor has_zbkb has_zbkb)
+
+(decl pure has_zba () bool)
+(extern constructor has_zba has_zba)
+
+(decl pure has_zbb () bool)
+(extern constructor has_zbb has_zbb)
+
+(decl pure has_zbc () bool)
+(extern constructor has_zbc has_zbc)
+
+(decl pure has_zbs () bool)
+(extern constructor has_zbs has_zbs)
+
+(decl gen_float_round (FloatRoundOP Reg Type) Reg)
+(rule
+  (gen_float_round op rs ty)
+  (let
+    ((rd WritableReg (temp_writable_reg ty))
+      (tmp WritableXReg (temp_writable_xreg))
+      (tmp2 WritableFReg (temp_writable_freg))
+      (_ Unit (emit (MInst.FloatRound op rd tmp tmp2 rs ty))))
+    (writable_reg_to_reg rd)))
+
+(decl gen_float_select (FloatSelectOP Reg Reg Type) Reg)
+(rule
+  (gen_float_select op x y ty)
+  (let
+    ((rd WritableReg (temp_writable_reg ty))
+      (tmp WritableXReg (temp_writable_xreg))
+      (_ Unit (emit (MInst.FloatSelect op rd tmp x y ty))))
+    (writable_reg_to_reg rd)))
+
+
+;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; RV32I Base Integer Instruction Set
+
+(decl zk_add (Imm32 Imm32) XReg)
+(rule (zk_add imm1 imm2)
+    (let ((dst WritableXReg (temp_writable_xreg))
+          (_ Unit (emit (MInst.AddImm32 dst imm1 imm2))))
+     dst))
+
+(decl zk_mul (Imm32 Imm32) XReg)
+(rule (zk_mul imm1 imm2)
+    (let ((dst WritableXReg (temp_writable_xreg))
+          (_ Unit (emit (MInst.MulImm32 dst imm1 imm2))))
+     dst))
+
+;; Helper for emitting the `add` instruction.
+;; rd ← rs1 + rs2
+(decl rv_add (XReg XReg) XReg)
+(rule (rv_add rs1 rs2)
+  (alu_rrr (AluOPRRR.Add) rs1 rs2))
+
+;; Helper for emitting the `addi` ("Add Immediate") instruction.
+;; rd ← rs1 + sext(imm)
+(decl rv_addi (XReg Imm12) XReg)
+(rule (rv_addi rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Addi) rs1 imm))
+
+;; Helper for emitting the `sub` instruction.
+;; rd ← rs1 - rs2
+(decl rv_sub (XReg XReg) XReg)
+(rule (rv_sub rs1 rs2)
+  (alu_rrr (AluOPRRR.Sub) rs1 rs2))
+
+;; Helper for emitting the `neg` instruction.
+;; This instruction is a mnemonic for `sub rd, zero, rs1`.
+(decl rv_neg (XReg) XReg)
+(rule (rv_neg rs1)
+  (alu_rrr (AluOPRRR.Sub) (zero_reg) rs1))
+
+;; Helper for emitting the `sll` ("Shift Left Logical") instruction.
+;; rd ← rs1 << rs2
+(decl rv_sll (XReg XReg) XReg)
+(rule (rv_sll rs1 rs2)
+  (alu_rrr (AluOPRRR.Sll) rs1 rs2))
+
+;; Helper for emitting the `slli` ("Shift Left Logical Immediate") instruction.
+;; rd ← rs1 << uext(imm)
+(decl rv_slli (XReg Imm12) XReg)
+(rule (rv_slli rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Slli) rs1 imm))
+
+;; Helper for emitting the `srl` ("Shift Right Logical") instruction.
+;; rd ← rs1 >> rs2
+(decl rv_srl (XReg XReg) XReg)
+(rule (rv_srl rs1 rs2)
+  (alu_rrr (AluOPRRR.Srl) rs1 rs2))
+
+;; Helper for emitting the `srli` ("Shift Right Logical Immediate") instruction.
+;; rd ← rs1 >> uext(imm)
+(decl rv_srli (XReg Imm12) XReg)
+(rule (rv_srli rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Srli) rs1 imm))
+
+;; Helper for emitting the `sra` ("Shift Right Arithmetic") instruction.
+;; rd ← rs1 >> rs2
+(decl rv_sra (XReg XReg) XReg)
+(rule (rv_sra rs1 rs2)
+  (alu_rrr (AluOPRRR.Sra) rs1 rs2))
+
+;; Helper for emitting the `srai` ("Shift Right Arithmetic Immediate") instruction.
+;; rd ← rs1 >> uext(imm)
+(decl rv_srai (XReg Imm12) XReg)
+(rule (rv_srai rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Srai) rs1 imm))
+
+;; Helper for emitting the `or` instruction.
+;; rd ← rs1 ∨ rs2
+(decl rv_or (XReg XReg) XReg)
+(rule (rv_or rs1 rs2)
+  (alu_rrr (AluOPRRR.Or) rs1 rs2))
+
+;; Helper for emitting the `ori` ("Or Immediate") instruction.
+;; rd ← rs1 ∨ uext(imm)
+(decl rv_ori (XReg Imm12) XReg)
+(rule (rv_ori rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Ori) rs1 imm))
+
+;; Helper for emitting the `xor` instruction.
+;; rd ← rs1 ⊕ rs2
+(decl rv_xor (XReg XReg) XReg)
+(rule (rv_xor rs1 rs2)
+  (alu_rrr (AluOPRRR.Xor) rs1 rs2))
+
+;; Helper for emitting the `xori` ("Exlusive Or Immediate") instruction.
+;; rd ← rs1 ⊕ uext(imm)
+(decl rv_xori (XReg Imm12) XReg)
+(rule (rv_xori rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Xori) rs1 imm))
+
+;; Helper for emitting the `not` instruction.
+;; This instruction is a mnemonic for `xori rd, rs1, -1`.
+(decl rv_not (XReg) XReg)
+(rule (rv_not rs1)
+  (rv_xori rs1 (imm12_const -1)))
+
+;; Helper for emitting the `and` instruction.
+;; rd ← rs1 ∧ rs2
+(decl rv_and (XReg XReg) XReg)
+(rule (rv_and rs1 rs2)
+  (alu_rrr (AluOPRRR.And) rs1 rs2))
+
+;; Helper for emitting the `andi` ("And Immediate") instruction.
+;; rd ← rs1 ∧ uext(imm)
+(decl rv_andi (XReg Imm12) XReg)
+(rule (rv_andi rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Andi) rs1 imm))
+
+;; Helper for emitting the `sltu` ("Set Less Than Unsigned") instruction.
+;; rd ← rs1 < rs2
+(decl rv_sltu (XReg XReg) XReg)
+(rule (rv_sltu rs1 rs2)
+  (alu_rrr (AluOPRRR.SltU) rs1 rs2))
+
+;; Helper for emitting the `snez` instruction.
+;; This instruction is a mnemonic for `sltu rd, zero, rs`.
+(decl rv_snez (XReg) XReg)
+(rule (rv_snez rs1)
+  (rv_sltu (zero_reg) rs1))
+
+;; Helper for emiting the `sltiu` ("Set Less Than Immediate Unsigned") instruction.
+;; rd ← rs1 < imm
+(decl rv_sltiu (XReg Imm12) XReg)
+(rule (rv_sltiu rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.SltiU) rs1 imm))
+
+;; Helper for emitting the `seqz` instruction.
+;; This instruction is a mnemonic for `sltiu rd, rs, 1`.
+(decl rv_seqz (XReg) XReg)
+(rule (rv_seqz rs1)
+  (rv_sltiu rs1 (imm12_const 1)))
+
+
+;; RV64I Base Integer Instruction Set
+;; Unlike RV32I instructions these are only present in the 64bit ISA
+
+;; Helper for emitting the `addw` ("Add Word") instruction.
+;; rd ← sext32(rs1) + sext32(rs2)
+(decl rv_addw (XReg XReg) XReg)
+(rule (rv_addw rs1 rs2)
+  (alu_rrr (AluOPRRR.Addw) rs1 rs2))
+
+;; Helper for emitting the `addiw` ("Add Word Immediate") instruction.
+;; rd ← sext32(rs1) + imm
+(decl rv_addiw (XReg Imm12) XReg)
+(rule (rv_addiw rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Addiw) rs1 imm))
+
+;; Helper for emitting the `sext.w` ("Sign Extend Word") instruction.
+;; This instruction is a mnemonic for `addiw rd, rs, zero`.
+(decl rv_sextw (XReg) XReg)
+(rule (rv_sextw rs1)
+  (rv_addiw rs1 (imm12_const 0)))
+
+;; Helper for emitting the `subw` ("Subtract Word") instruction.
+;; rd ← sext32(rs1) - sext32(rs2)
+(decl rv_subw (XReg XReg) XReg)
+(rule (rv_subw rs1 rs2)
+  (alu_rrr (AluOPRRR.Subw) rs1 rs2))
+
+;; Helper for emitting the `sllw` ("Shift Left Logical Word") instruction.
+;; rd ← sext32(uext32(rs1) << rs2)
+(decl rv_sllw (XReg XReg) XReg)
+(rule (rv_sllw rs1 rs2)
+  (alu_rrr (AluOPRRR.Sllw) rs1 rs2))
+
+;; Helper for emitting the `slliw` ("Shift Left Logical Immediate Word") instruction.
+;; rd ← sext32(uext32(rs1) << imm)
+(decl rv_slliw (XReg Imm12) XReg)
+(rule (rv_slliw rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Slliw) rs1 imm))
+
+;; Helper for emitting the `srlw` ("Shift Right Logical Word") instruction.
+;; rd ← sext32(uext32(rs1) >> rs2)
+(decl rv_srlw (XReg XReg) XReg)
+(rule (rv_srlw rs1 rs2)
+  (alu_rrr (AluOPRRR.Srlw) rs1 rs2))
+
+;; Helper for emitting the `srliw` ("Shift Right Logical Immediate Word") instruction.
+;; rd ← sext32(uext32(rs1) >> imm)
+(decl rv_srliw (XReg Imm12) XReg)
+(rule (rv_srliw rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.SrliW) rs1 imm))
+
+;; Helper for emitting the `sraw` ("Shift Right Arithmetic Word") instruction.
+;; rd ← sext32(rs1 >> rs2)
+(decl rv_sraw (XReg XReg) XReg)
+(rule (rv_sraw rs1 rs2)
+  (alu_rrr (AluOPRRR.Sraw) rs1 rs2))
+
+;; Helper for emitting the `sraiw` ("Shift Right Arithmetic Immediate Word") instruction.
+;; rd ← sext32(rs1 >> imm)
+(decl rv_sraiw (XReg Imm12) XReg)
+(rule (rv_sraiw rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Sraiw) rs1 imm))
+
+
+;; RV32M Extension
+;; TODO: Enable these instructions only when we have the M extension
+
+;; Helper for emitting the `mul` instruction.
+;; rd ← rs1 × rs2
+(decl rv_mul (XReg XReg) XReg)
+(rule (rv_mul rs1 rs2)
+  (alu_rrr (AluOPRRR.Mul) rs1 rs2))
+
+;; Helper for emitting the `mulh` ("Multiply High Signed Signed") instruction.
+;; rd ← (sext(rs1) × sext(rs2)) » xlen
+(decl rv_mulh (XReg XReg) XReg)
+(rule (rv_mulh rs1 rs2)
+  (alu_rrr (AluOPRRR.Mulh) rs1 rs2))
+
+;; Helper for emitting the `mulhu` ("Multiply High Unsigned Unsigned") instruction.
+;; rd ← (uext(rs1) × uext(rs2)) » xlen
+(decl rv_mulhu (XReg XReg) XReg)
+(rule (rv_mulhu rs1 rs2)
+  (alu_rrr (AluOPRRR.Mulhu) rs1 rs2))
+
+;; Helper for emitting the `div` instruction.
+;; rd ← rs1 ÷ rs2
+(decl rv_div (XReg XReg) XReg)
+(rule (rv_div rs1 rs2)
+  (alu_rrr (AluOPRRR.Div) rs1 rs2))
+
+;; Helper for emitting the `divu` ("Divide Unsigned") instruction.
+;; rd ← rs1 ÷ rs2
+(decl rv_divu (XReg XReg) XReg)
+(rule (rv_divu rs1 rs2)
+  (alu_rrr (AluOPRRR.DivU) rs1 rs2))
+
+;; Helper for emitting the `rem` instruction.
+;; rd ← rs1 mod rs2
+(decl rv_rem (XReg XReg) XReg)
+(rule (rv_rem rs1 rs2)
+  (alu_rrr (AluOPRRR.Rem) rs1 rs2))
+
+;; Helper for emitting the `remu` ("Remainder Unsigned") instruction.
+;; rd ← rs1 mod rs2
+(decl rv_remu (XReg XReg) XReg)
+(rule (rv_remu rs1 rs2)
+  (alu_rrr (AluOPRRR.RemU) rs1 rs2))
+
+
+
+;; RV64M Extension
+;; TODO: Enable these instructions only when we have the M extension
+
+;; Helper for emitting the `mulw` ("Multiply Word") instruction.
+;; rd ← uext32(rs1) × uext32(rs2)
+(decl rv_mulw (XReg XReg) XReg)
+(rule (rv_mulw rs1 rs2)
+  (alu_rrr (AluOPRRR.Mulw) rs1 rs2))
+
+;; Helper for emitting the `divw` ("Divide Word") instruction.
+;; rd ← sext32(rs1) ÷ sext32(rs2)
+(decl rv_divw (XReg XReg) XReg)
+(rule (rv_divw rs1 rs2)
+  (alu_rrr (AluOPRRR.Divw) rs1 rs2))
+
+;; Helper for emitting the `divuw` ("Divide Unsigned Word") instruction.
+;; rd ← uext32(rs1) ÷ uext32(rs2)
+(decl rv_divuw (XReg XReg) XReg)
+(rule (rv_divuw rs1 rs2)
+  (alu_rrr (AluOPRRR.Divuw) rs1 rs2))
+
+;; Helper for emitting the `remw` ("Remainder Word") instruction.
+;; rd ← sext32(rs1) mod sext32(rs2)
+(decl rv_remw (XReg XReg) XReg)
+(rule (rv_remw rs1 rs2)
+  (alu_rrr (AluOPRRR.Remw) rs1 rs2))
+
+;; Helper for emitting the `remuw` ("Remainder Unsigned Word") instruction.
+;; rd ← uext32(rs1) mod uext32(rs2)
+(decl rv_remuw (XReg XReg) XReg)
+(rule (rv_remuw rs1 rs2)
+  (alu_rrr (AluOPRRR.Remuw) rs1 rs2))
+
+
+;; F and D Extensions
+;; TODO: Enable these instructions only when we have the F or D extensions
+
+;; Helper for emitting the `fadd` instruction.
+(decl rv_fadd (Type FReg FReg) FReg)
+(rule (rv_fadd $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FaddS) $F32 rs1 rs2))
+(rule (rv_fadd $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FaddD) $F64 rs1 rs2))
+
+;; Helper for emitting the `fsub` instruction.
+(decl rv_fsub (Type FReg FReg) FReg)
+(rule (rv_fsub $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsubS) $F32 rs1 rs2))
+(rule (rv_fsub $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsubD) $F64 rs1 rs2))
+
+;; Helper for emitting the `fmul` instruction.
+(decl rv_fmul (Type FReg FReg) FReg)
+(rule (rv_fmul $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FmulS) $F32 rs1 rs2))
+(rule (rv_fmul $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FmulD) $F64 rs1 rs2))
+
+;; Helper for emitting the `fdiv` instruction.
+(decl rv_fdiv (Type FReg FReg) FReg)
+(rule (rv_fdiv $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FdivS) $F32 rs1 rs2))
+(rule (rv_fdiv $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FdivD) $F64 rs1 rs2))
+
+;; Helper for emitting the `fsqrt` instruction.
+(decl rv_fsqrt (Type FReg) FReg)
+(rule (rv_fsqrt $F32 rs1) (fpu_rr (FpuOPRR.FsqrtS) $F32 rs1))
+(rule (rv_fsqrt $F64 rs1) (fpu_rr (FpuOPRR.FsqrtD) $F64 rs1))
+
+;; Helper for emitting the `fmadd` instruction.
+(decl rv_fmadd (Type FReg FReg FReg) FReg)
+(rule (rv_fmadd $F32 rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddS) $F32 rs1 rs2 rs3))
+(rule (rv_fmadd $F64 rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddD) $F64 rs1 rs2 rs3))
+
+;; Helper for emitting the `fmv.x.w` instruction.
+(decl rv_fmvxw (FReg) XReg)
+(rule (rv_fmvxw r) (fpu_rr (FpuOPRR.FmvXW) $I32 r))
+
+;; Helper for emitting the `fmv.x.d` instruction.
+(decl rv_fmvxd (FReg) XReg)
+(rule (rv_fmvxd r) (fpu_rr (FpuOPRR.FmvXD) $I64 r))
+
+;; Helper for emitting the `fmv.w.x` instruction.
+(decl rv_fmvwx (XReg) FReg)
+(rule (rv_fmvwx r) (fpu_rr (FpuOPRR.FmvWX) $F32 r))
+
+;; Helper for emitting the `fmv.d.x` instruction.
+(decl rv_fmvdx (XReg) FReg)
+(rule (rv_fmvdx r) (fpu_rr (FpuOPRR.FmvDX) $F64 r))
+
+;; Helper for emitting the `fcvt.d.s` ("Float Convert Double to Single") instruction.
+(decl rv_fcvtds (FReg) FReg)
+(rule (rv_fcvtds rs1) (fpu_rr (FpuOPRR.FcvtDS) $F32 rs1))
+
+;; Helper for emitting the `fcvt.s.d` ("Float Convert Single to Double") instruction.
+(decl rv_fcvtsd (FReg) FReg)
+(rule (rv_fcvtsd rs1) (fpu_rr (FpuOPRR.FcvtSD) $F64 rs1))
+
+;; Helper for emitting the `fsgnj` ("Floating Point Sign Injection") instruction.
+;; The output of this instruction is `rs1` with the sign bit from `rs2`
+;; This implements the `copysign` operation
+(decl rv_fsgnj (Type FReg FReg) FReg)
+(rule (rv_fsgnj $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjS) $F32 rs1 rs2))
+(rule (rv_fsgnj $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjD) $F64 rs1 rs2))
+
+;; Helper for emitting the `fsgnjn` ("Floating Point Sign Injection Negated") instruction.
+;; The output of this instruction is `rs1` with the negated sign bit from `rs2`
+;; When `rs1 == rs2` this implements the `neg` operation
+(decl rv_fsgnjn (Type FReg FReg) FReg)
+(rule (rv_fsgnjn $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjnS) $F32 rs1 rs2))
+(rule (rv_fsgnjn $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjnD) $F64 rs1 rs2))
+
+;; Helper for emitting the `fneg` ("Floating Point Negate") instruction.
+;; This instruction is a mnemonic for `fsgnjn rd, rs1, rs1`
+(decl rv_fneg (Type FReg) FReg)
+(rule (rv_fneg ty rs1) (rv_fsgnjn ty rs1 rs1))
+
+;; Helper for emitting the `fsgnjx` ("Floating Point Sign Injection Exclusive") instruction.
+;; The output of this instruction is `rs1` with the XOR of the sign bits from `rs1` and `rs2`.
+;; When `rs1 == rs2` this implements `fabs`
+(decl rv_fsgnjx (Type FReg FReg) FReg)
+(rule (rv_fsgnjx $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjxS) $F32 rs1 rs2))
+(rule (rv_fsgnjx $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjxD) $F64 rs1 rs2))
+
+;; Helper for emitting the `fabs` ("Floating Point Absolute") instruction.
+;; This instruction is a mnemonic for `fsgnjx rd, rs1, rs1`
+(decl rv_fabs (Type FReg) FReg)
+(rule (rv_fabs ty rs1) (rv_fsgnjx ty rs1 rs1))
+
+;; Helper for emitting the `feq` ("Float Equal") instruction.
+(decl rv_feq (Type FReg FReg) XReg)
+(rule (rv_feq $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FeqS) $I64 rs1 rs2))
+(rule (rv_feq $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FeqD) $I64 rs1 rs2))
+
+;; Helper for emitting the `flt` ("Float Less Than") instruction.
+(decl rv_flt (Type FReg FReg) XReg)
+(rule (rv_flt $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FltS) $I64 rs1 rs2))
+(rule (rv_flt $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FltD) $I64 rs1 rs2))
+
+;; Helper for emitting the `fle` ("Float Less Than or Equal") instruction.
+(decl rv_fle (Type FReg FReg) XReg)
+(rule (rv_fle $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FleS) $I64 rs1 rs2))
+(rule (rv_fle $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FleD) $I64 rs1 rs2))
+
+;; Helper for emitting the `fgt` ("Float Greater Than") instruction.
+;; Note: The arguments are reversed
+(decl rv_fgt (Type FReg FReg) XReg)
+(rule (rv_fgt ty rs1 rs2) (rv_flt ty rs2 rs1))
+
+;; Helper for emitting the `fge` ("Float Greater Than or Equal") instruction.
+;; Note: The arguments are reversed
+(decl rv_fge (Type FReg FReg) XReg)
+(rule (rv_fge ty rs1 rs2) (rv_fle ty rs2 rs1))
+
+
+;; `Zba` Extension Instructions
+
+;; Helper for emitting the `adduw` ("Add Unsigned Word") instruction.
+;; rd ← uext32(rs1) + uext32(rs2)
+(decl rv_adduw (XReg XReg) XReg)
+(rule (rv_adduw rs1 rs2)
+  (alu_rrr (AluOPRRR.Adduw) rs1 rs2))
+
+;; Helper for emitting the `zext.w` ("Zero Extend Word") instruction.
+;; This instruction is a mnemonic for `adduw rd, rs1, zero`.
+;; rd ← uext32(rs1)
+(decl rv_zextw (XReg) XReg)
+(rule (rv_zextw rs1)
+  (rv_adduw rs1 (zero_reg)))
+
+;; Helper for emitting the `slli.uw` ("Shift Left Logical Immediate Unsigned Word") instruction.
+;; rd ← uext32(rs1) << imm
+(decl rv_slliuw (XReg Imm12) XReg)
+(rule (rv_slliuw rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.SlliUw) rs1 imm))
+
+
+;; `Zbb` Extension Instructions
+
+;; Helper for emitting the `andn` ("And Negated") instruction.
+;; rd ← rs1 ∧ ~(rs2)
+(decl rv_andn (XReg XReg) XReg)
+(rule (rv_andn rs1 rs2)
+  (alu_rrr (AluOPRRR.Andn) rs1 rs2))
+
+;; Helper for emitting the `orn` ("Or Negated") instruction.
+;; rd ← rs1 ∨ ~(rs2)
+(decl rv_orn (XReg XReg) XReg)
+(rule (rv_orn rs1 rs2)
+  (alu_rrr (AluOPRRR.Orn) rs1 rs2))
+
+;; Helper for emitting the `clz` ("Count Leading Zero Bits") instruction.
+(decl rv_clz (XReg) XReg)
+(rule (rv_clz rs1)
+  (alu_rr_funct12 (AluOPRRI.Clz) rs1))
+
+;; Helper for emitting the `clzw` ("Count Leading Zero Bits in Word") instruction.
+(decl rv_clzw (XReg) XReg)
+(rule (rv_clzw rs1)
+  (alu_rr_funct12 (AluOPRRI.Clzw) rs1))
+
+;; Helper for emitting the `ctz` ("Count Trailing Zero Bits") instruction.
+(decl rv_ctz (XReg) XReg)
+(rule (rv_ctz rs1)
+  (alu_rr_funct12 (AluOPRRI.Ctz) rs1))
+
+;; Helper for emitting the `ctzw` ("Count Trailing Zero Bits in Word") instruction.
+(decl rv_ctzw (XReg) XReg)
+(rule (rv_ctzw rs1)
+  (alu_rr_funct12 (AluOPRRI.Ctzw) rs1))
+
+;; Helper for emitting the `cpop` ("Count Population") instruction.
+(decl rv_cpop (XReg) XReg)
+(rule (rv_cpop rs1)
+  (alu_rr_funct12 (AluOPRRI.Cpop) rs1))
+
+;; Helper for emitting the `max` instruction.
+(decl rv_max (XReg XReg) XReg)
+(rule (rv_max rs1 rs2)
+  (alu_rrr (AluOPRRR.Max) rs1 rs2))
+
+;; Helper for emitting the `sext.b` instruction.
+(decl rv_sextb (XReg) XReg)
+(rule (rv_sextb rs1)
+  (alu_rr_imm12 (AluOPRRI.Sextb) rs1 (imm12_const 0)))
+
+;; Helper for emitting the `sext.h` instruction.
+(decl rv_sexth (XReg) XReg)
+(rule (rv_sexth rs1)
+  (alu_rr_imm12 (AluOPRRI.Sexth) rs1 (imm12_const 0)))
+
+;; Helper for emitting the `zext.h` instruction.
+(decl rv_zexth (XReg) XReg)
+(rule (rv_zexth rs1)
+  (alu_rr_imm12 (AluOPRRI.Zexth) rs1 (imm12_const 0)))
+
+;; Helper for emitting the `rol` ("Rotate Left") instruction.
+(decl rv_rol (XReg XReg) XReg)
+(rule (rv_rol rs1 rs2)
+  (alu_rrr (AluOPRRR.Rol) rs1 rs2))
+
+;; Helper for emitting the `rolw` ("Rotate Left Word") instruction.
+(decl rv_rolw (XReg XReg) XReg)
+(rule (rv_rolw rs1 rs2)
+  (alu_rrr (AluOPRRR.Rolw) rs1 rs2))
+
+;; Helper for emitting the `ror` ("Rotate Right") instruction.
+(decl rv_ror (XReg XReg) XReg)
+(rule (rv_ror rs1 rs2)
+  (alu_rrr (AluOPRRR.Ror) rs1 rs2))
+
+;; Helper for emitting the `rorw` ("Rotate Right Word") instruction.
+(decl rv_rorw (XReg XReg) XReg)
+(rule (rv_rorw rs1 rs2)
+  (alu_rrr (AluOPRRR.Rorw) rs1 rs2))
+
+;; Helper for emitting the `rev8` ("Byte Reverse") instruction.
+(decl rv_rev8 (XReg) XReg)
+(rule (rv_rev8 rs1)
+  (alu_rr_funct12 (AluOPRRI.Rev8) rs1))
+
+;; Helper for emitting the `brev8` ("Bit Reverse Inside Bytes") instruction.
+;; TODO: This instruction is mentioned in some older versions of the
+;; spec, but has since disappeared, we should follow up on this.
+;; It probably was renamed to `rev.b` which seems to be the closest match.
+(decl rv_brev8 (XReg) XReg)
+(rule (rv_brev8 rs1)
+  (alu_rr_funct12 (AluOPRRI.Brev8) rs1))
+
+;; Helper for emitting the `bseti` ("Single-Bit Set Immediate") instruction.
+(decl rv_bseti (XReg Imm12) XReg)
+(rule (rv_bseti rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Bseti) rs1 imm))
+
+
+;; `Zbkb` Extension Instructions
+
+;; Helper for emitting the `pack` ("Pack low halves of registers") instruction.
+(decl rv_pack (XReg XReg) XReg)
+(rule (rv_pack rs1 rs2)
+  (alu_rrr (AluOPRRR.Pack) rs1 rs2))
+
+;; Helper for emitting the `packw` ("Pack low 16-bits of registers") instruction.
+(decl rv_packw (XReg XReg) XReg)
+(rule (rv_packw rs1 rs2)
+  (alu_rrr (AluOPRRR.Packw) rs1 rs2))
+
+
+
+
+;; Generate a mask for the bit-width of the given type
+(decl pure shift_mask (Type) u64)
+(rule (shift_mask ty) (u64_sub (ty_bits (lane_type ty)) 1))
+
+;; for load immediate
+(decl imm (Type u64) Reg)
+(extern constructor imm imm)
+
+;; Imm12 Rules
+
+(decl pure imm12_zero () Imm12)
+(rule
+  (imm12_zero)
+  (imm12_const 0))
+
+(decl pure imm12_const (i32) Imm12)
+(extern constructor imm12_const imm12_const)
+
+(decl load_imm12 (i32) Reg)
+(rule
+  (load_imm12 x)
+  (rv_addi (zero_reg) (imm12_const x)))
+
+;; for load immediate
+(decl imm_from_bits (u64) Imm12)
+(extern constructor imm_from_bits imm_from_bits)
+
+(decl imm_from_neg_bits (i64) Imm12)
+(extern constructor imm_from_neg_bits imm_from_neg_bits)
+
+(decl imm12_const_add (i32 i32) Imm12)
+(extern constructor imm12_const_add imm12_const_add)
+
+(decl imm12_and (Imm12 u64) Imm12)
+(extern constructor imm12_and imm12_and)
+
+;; Helper for get negative of Imm12
+(decl neg_imm12 (Imm12) Imm12)
+(extern constructor neg_imm12 neg_imm12)
+
+;; Imm12 Extractors
+
+;; Helper to go directly from a `Value`, when it's an `iconst`, to an `Imm12`.
+(decl imm12_from_value (Imm12) Value)
+(extractor
+  (imm12_from_value n)
+  (def_inst (iconst (u64_from_imm64 (imm12_from_u64 n)))))
+
+(decl imm32_from_value (Imm32) Value)
+(extractor
+  (imm32_from_value n)
+  (def_inst (iconst (u64_from_imm64 (imm32_from_u64 n)))))
+
+(decl imm12_from_u64 (Imm12) u64)
+(extern extractor imm12_from_u64 imm12_from_u64)
+
+(decl imm32_from_u64 (Imm32) u64)
+(extern extractor imm32_from_u64 imm32_from_u64)
+
+(decl pure partial u64_to_imm12 (u64) Imm12)
+(rule (u64_to_imm12 (imm12_from_u64 n)) n)
+
+
+;; Imm5 Extractors
+
+(decl imm5_from_u64 (Imm5) u64)
+(extern extractor imm5_from_u64 imm5_from_u64)
+
+;; Construct a Imm5 from an i8
+(decl pure partial imm5_from_i8 (i8) Imm5)
+(extern constructor imm5_from_i8 imm5_from_i8)
+
+;; Extractor that matches a `Value` equivalent to a replicated Imm5 on all lanes.
+;; TODO(#6527): Try matching vconst here as well
+(decl replicated_imm5 (Imm5) Value)
+(extractor (replicated_imm5 n)
+  (def_inst (splat (iconst (u64_from_imm64 (imm5_from_u64 n))))))
+
+;; UImm5 Helpers
+
+;; Extractor that matches a `Value` equivalent to a replicated UImm5 on all lanes.
+;; TODO(#6527): Try matching vconst here as well
+(decl replicated_uimm5 (UImm5) Value)
+(extractor (replicated_uimm5 n)
+  (def_inst (splat (uimm5_from_value n))))
+
+;; Helper to go directly from a `Value`, when it's an `iconst`, to an `UImm5`.
+(decl uimm5_from_value (UImm5) Value)
+(extractor (uimm5_from_value n)
+  (iconst (u64_from_imm64 (uimm5_from_u64 n))))
+
+;; Extract a `UImm5` from an `u8`.
+(decl pure partial uimm5_from_u8 (UImm5) u8)
+(extern extractor uimm5_from_u8 uimm5_from_u8)
+
+;; Extract a `UImm5` from an `u64`.
+(decl pure partial uimm5_from_u64 (UImm5) u64)
+(extern extractor uimm5_from_u64 uimm5_from_u64)
+
+;; Convert a `u64` into an `UImm5`
+(decl pure partial u64_to_uimm5 (u64) UImm5)
+(rule (u64_to_uimm5 (uimm5_from_u64 n)) n)
+
+(decl uimm5_bitcast_to_imm5 (UImm5) Imm5)
+(extern constructor uimm5_bitcast_to_imm5 uimm5_bitcast_to_imm5)
+
+;; Float Helpers
+
+;; Returns the bitpattern of the Canonical NaN for the given type.
+(decl pure canonical_nan_u64 (Type) u64)
+(rule (canonical_nan_u64 $F32) 0x7fc00000)
+(rule (canonical_nan_u64 $F64) 0x7ff8000000000000)
+
+(decl gen_default_frm () OptionFloatRoundingMode)
+(extern constructor gen_default_frm gen_default_frm)
+
+;; Helper for emitting `MInst.FpuRR` instructions.
+(decl fpu_rr (FpuOPRR Type Reg) Reg)
+(rule (fpu_rr op ty src)
+      (let ((dst WritableReg (temp_writable_reg ty))
+            (_ Unit (emit (MInst.FpuRR op (gen_default_frm) dst src))))
+        dst))
+
+;; Helper for emitting `MInst.AluRRR` instructions.
+(decl alu_rrr (AluOPRRR Reg Reg) Reg)
+(rule (alu_rrr op src1 src2)
+      (let ((dst WritableXReg (temp_writable_xreg))
+            (_ Unit (emit (MInst.AluRRR op dst src1 src2))))
+        dst))
+
+
+(decl pack_float_rounding_mode (FRM) OptionFloatRoundingMode)
+(extern constructor pack_float_rounding_mode pack_float_rounding_mode)
+
+;; Helper for emitting `MInst.AluRRR` instructions.
+(decl fpu_rrr (FpuOPRRR Type Reg Reg) Reg)
+(rule (fpu_rrr op ty src1 src2)
+      (let ((dst WritableReg (temp_writable_reg ty))
+            (_ Unit (emit (MInst.FpuRRR op (gen_default_frm) dst src1 src2))))
+        dst))
+
+
+;; Helper for emitting `MInst.FpuRRRR` instructions.
+(decl fpu_rrrr (FpuOPRRRR Type Reg Reg Reg) Reg)
+(rule (fpu_rrrr op ty src1 src2 src3)
+      (let ((dst WritableReg (temp_writable_reg ty))
+            (_ Unit (emit (MInst.FpuRRRR op (gen_default_frm) dst src1 src2 src3))))
+        dst))
+
+
+;; Helper for emitting `MInst.AluRRImm12` instructions.
+(decl alu_rr_imm12 (AluOPRRI Reg Imm12) Reg)
+(rule (alu_rr_imm12 op src imm)
+      (let ((dst WritableXReg (temp_writable_xreg))
+            (_ Unit (emit (MInst.AluRRImm12 op dst src imm))))
+        dst))
+
+;; some instruction use imm12 as funct12.
+;; so we don't need the imm12 paramter.
+(decl alu_rr_funct12 (AluOPRRI Reg) Reg)
+(rule (alu_rr_funct12 op src)
+      (let ((dst WritableXReg (temp_writable_xreg))
+            (_ Unit (emit (MInst.AluRRImm12 op dst src (imm12_zero)))))
+        dst))
+
+(decl select_addi (Type) AluOPRRI)
+(rule 1 (select_addi (fits_in_32 ty)) (AluOPRRI.Addiw))
+(rule (select_addi (fits_in_64 ty)) (AluOPRRI.Addi))
+
+
+(decl gen_bnot (Type ValueRegs) ValueRegs)
+(rule 2 (gen_bnot (ty_scalar_float ty) x)
+  (let ((val FReg (value_regs_get x 0))
+        (x_val XReg (move_f_to_x val ty))
+        (inverted XReg (rv_not x_val))
+        (res FReg (move_x_to_f inverted (float_int_of_same_size ty))))
+    (value_reg res)))
+
+(rule 1 (gen_bnot $I128 x)
+  (let ((lo XReg (rv_not (value_regs_get x 0)))
+        (hi XReg (rv_not (value_regs_get x 1))))
+    (value_regs lo hi)))
+
+(rule 0 (gen_bnot (ty_int_ref_scalar_64 _) x)
+  (rv_not (value_regs_get x 0)))
+
+
+(decl gen_and (Type ValueRegs ValueRegs) ValueRegs)
+(rule 1 (gen_and $I128 x y)
+  (value_regs
+    (rv_and (value_regs_get x 0) (value_regs_get y 0))
+    (rv_and (value_regs_get x 1) (value_regs_get y 1))))
+
+(rule 0 (gen_and (fits_in_64 _) x y)
+  (rv_and (value_regs_get x 0) (value_regs_get y 0)))
+
+
+(decl gen_andi (XReg u64) XReg)
+(rule 1 (gen_andi x (imm12_from_u64 y))
+  (rv_andi x y))
+
+(rule 0 (gen_andi x y)
+  (rv_and x (imm $I64 y)))
+
+
+(decl gen_or (Type ValueRegs ValueRegs) ValueRegs)
+(rule 1 (gen_or $I128 x y)
+  (value_regs
+    (rv_or (value_regs_get x 0) (value_regs_get y 0))
+    (rv_or (value_regs_get x 1) (value_regs_get y 1))))
+
+(rule 0 (gen_or (fits_in_64 _) x y)
+  (rv_or (value_regs_get x 0) (value_regs_get y 0)))
+
+
+
+(decl gen_bswap (Type XReg) XReg)
+
+;; This is only here to make the rule below work. bswap.i8 isn't valid
+(rule 0 (gen_bswap $I8 x) x)
+
+(rule 1 (gen_bswap (ty_int_ref_16_to_64 ty) x)
+  (if-let half_ty (ty_half_width ty))
+  (if-let half_size (u64_to_imm12 (ty_bits half_ty)))
+  (let (;; This swaps the top bytes and zeroes the bottom bytes, so that
+        ;; we can or it with the bottom bytes later.
+        (swap_top XReg (gen_bswap half_ty x))
+        (top XReg (rv_slli swap_top half_size))
+
+        ;; Get the top half, swap it, and zero extend it so we can `or` it
+        ;; with the bottom half.
+        (shifted XReg (rv_srli x half_size))
+        (swap_bot XReg (gen_bswap half_ty shifted))
+        (bot XReg (zext swap_bot half_ty $I64)))
+    (rv_or top bot)))
+
+;; With `zbb` we can use `rev8` and shift the result
+(rule 2 (gen_bswap (int_fits_in_32 ty) x)
+  (if-let $true (has_zbb))
+  (if-let shift_amt (u64_to_imm12 (u64_sub 64 (ty_bits ty))))
+  (rv_srli (rv_rev8 x) shift_amt))
+
+;; With `zbb` we can use `rev8` that does this
+(rule 3 (gen_bswap $I64 x)
+  (if-let $true (has_zbb))
+  (rv_rev8 x))
+
+
+
+(decl lower_bit_reverse (Reg Type) Reg)
+
+(rule
+  (lower_bit_reverse r $I8)
+  (gen_brev8 r $I8))
+
+(rule
+  (lower_bit_reverse r $I16)
+  (let
+    ((tmp XReg (gen_brev8 r $I16))
+      (tmp2 XReg (gen_rev8 tmp))
+      (result XReg (rv_srli tmp2 (imm12_const 48))))
+    result))
+
+(rule
+  (lower_bit_reverse r $I32)
+  (let
+    ((tmp XReg (gen_brev8 r $I32))
+      (tmp2 XReg (gen_rev8 tmp))
+      (result XReg (rv_srli tmp2 (imm12_const 32))))
+    result))
+
+(rule
+  (lower_bit_reverse r $I64)
+  (let
+    ((tmp XReg (gen_rev8 r)))
+    (gen_brev8 tmp $I64)))
+
+
+(decl lower_ctz (Type Reg) Reg)
+(rule (lower_ctz ty x)
+  (gen_cltz $false x ty))
+
+(rule 1 (lower_ctz (fits_in_16 ty) x)
+  (if-let $true (has_zbb))
+  (let ((tmp Reg (gen_bseti x (ty_bits ty))))
+    (rv_ctzw tmp)))
+
+(rule 2 (lower_ctz $I32 x)
+  (if-let $true (has_zbb))
+  (rv_ctzw x))
+
+(rule 2 (lower_ctz $I64 x)
+  (if-let $true (has_zbb))
+  (rv_ctz x))
+
+;; Count trailing zeros from a i128 bit value.
+;; We count both halves separately and conditionally add them if it makes sense.
+(decl lower_ctz_128 (ValueRegs) ValueRegs)
+(rule (lower_ctz_128 x)
+  (let ((x_lo XReg (value_regs_get x 0))
+        (x_hi XReg (value_regs_get x 1))
+        ;; Count both halves
+        (high XReg (lower_ctz $I64 x_hi))
+        (low XReg (lower_ctz $I64 x_lo))
+        ;; Only add the top half if the bottom is zero
+        (high XReg (gen_select_reg (IntCC.Equal) x_lo (zero_reg) high (zero_reg)))
+        (result XReg (rv_add low high)))
+    (extend result (ExtendOp.Zero) $I64 $I128)))
+
+(decl lower_clz (Type XReg) XReg)
+(rule (lower_clz ty rs)
+  (gen_cltz $true rs ty))
+
+(rule 1 (lower_clz (fits_in_16 ty) r)
+  (if-let $true (has_zbb))
+  (let ((tmp XReg (zext r ty $I64))
+        (count XReg (rv_clz tmp))
+        ;; We always do the operation on the full 64-bit register, so subtract 64 from the result.
+        (result XReg (rv_addi count (imm12_const_add (ty_bits ty) -64))))
+    result))
+
+(rule 2 (lower_clz $I32 r)
+  (if-let $true (has_zbb))
+  (rv_clzw r))
+
+(rule 2 (lower_clz $I64 r)
+  (if-let $true (has_zbb))
+  (rv_clz r))
+
+
+;; Count leading zeros from a i128 bit value.
+;; We count both halves separately and conditionally add them if it makes sense.
+(decl lower_clz_i128 (ValueRegs) ValueRegs)
+(rule (lower_clz_i128 x)
+  (let ((x_lo XReg (value_regs_get x 0))
+        (x_hi XReg (value_regs_get x 1))
+        ;; Count both halves
+        (high XReg (lower_clz $I64 x_hi))
+        (low XReg (lower_clz $I64 x_lo))
+        ;; Only add the bottom zeros if the top half is zero
+        (low XReg (gen_select_reg (IntCC.Equal) x_hi (zero_reg) low (zero_reg)))
+        (result XReg (rv_add high low)))
+    (extend result (ExtendOp.Zero) $I64 $I128)))
+
+
+(decl lower_cls (Type XReg) XReg)
+(rule (lower_cls ty r)
+  (let ((tmp XReg (sext r ty $I64))
+        (tmp2 XReg (gen_select_reg (IntCC.SignedLessThan) tmp (zero_reg) (rv_not tmp) tmp))
+        (tmp3 XReg (lower_clz ty tmp2)))
+    (rv_addi tmp3 (imm12_const -1))))
+
+;; If the sign bit is set, we count the leading zeros of the inverted value.
+;; Otherwise we can just count the leading zeros of the original value.
+;; Subtract 1 since the sign bit does not count.
+(decl lower_cls_i128 (ValueRegs) ValueRegs)
+(rule (lower_cls_i128 x)
+  (let ((low XReg (value_regs_get x 0))
+        (high XReg (value_regs_get x 1))
+        (low XReg (gen_select_reg (IntCC.SignedLessThan) high (zero_reg) (rv_not low) low))
+        (high XReg (gen_select_reg (IntCC.SignedLessThan) high (zero_reg) (rv_not high) high))
+        (tmp ValueRegs (lower_clz_i128 (value_regs low high)))
+        (count XReg (value_regs_get tmp 0))
+        (result XReg (rv_addi count (imm12_const -1))))
+    (extend result (ExtendOp.Zero) $I64 $I128)))
+
+
+(decl gen_cltz (bool XReg Type) XReg)
+(rule (gen_cltz leading rs ty)
+  (let ((tmp WritableXReg (temp_writable_xreg))
+        (step WritableXReg (temp_writable_xreg))
+        (sum WritableXReg (temp_writable_xreg))
+        (_ Unit (emit (MInst.Cltz leading sum step tmp rs ty))))
+    sum))
+
+
+;; Extends an integer if it is smaller than 64 bits.
+(decl ext_int_if_need (bool ValueRegs Type) ValueRegs)
+;;; For values smaller than 64 bits, we need to extend them to 64 bits
+(rule 0 (ext_int_if_need $true val (fits_in_32 (ty_int ty)))
+  (extend val (ExtendOp.Signed) ty $I64))
+(rule 0 (ext_int_if_need $false val (fits_in_32 (ty_int ty)))
+  (extend val (ExtendOp.Zero) ty $I64))
+;; If the value is larger than one machine register, we don't need to do anything
+(rule 1 (ext_int_if_need _ r $I64) r)
+(rule 2 (ext_int_if_need _ r $I128) r)
+
+
+;; Performs a zero extension of the given value
+(decl zext (XReg Type Type) XReg)
+(rule (zext val from_ty (fits_in_64 to_ty)) (value_regs_get (extend val (ExtendOp.Zero) from_ty to_ty) 0))
+
+;; Performs a signed extension of the given value
+(decl sext (XReg Type Type) XReg)
+(rule (sext val from_ty (fits_in_64 to_ty)) (value_regs_get (extend val (ExtendOp.Signed) from_ty to_ty) 0))
+
+(type ExtendOp
+  (enum
+    (Zero)
+    (Signed)))
+
+;; Performs either a sign or zero extension of the given value
+(decl extend (ValueRegs ExtendOp Type Type) ValueRegs)
+
+;;; Generic Rules Extending to I64
+(decl pure extend_shift_op (ExtendOp) AluOPRRI)
+(rule (extend_shift_op (ExtendOp.Zero)) (AluOPRRI.Srli))
+(rule (extend_shift_op (ExtendOp.Signed)) (AluOPRRI.Srai))
+
+;; In the most generic case, we shift left and then shift right.
+;; The type of right shift is determined by the extend op.
+; (rule 0 (extend val extend_op (fits_in_32 from_ty) (fits_in_64 to_ty))
+;   (let ((val XReg (value_regs_get val 0))
+;         (shift Imm12 (imm_from_bits (u64_sub 64 (ty_bits from_ty))))
+;         (left XReg (rv_slli val shift))
+;         (shift_op AluOPRRI (extend_shift_op extend_op))
+;         (right XReg (alu_rr_imm12 shift_op left shift)))
+;     right))
+
+;; Hacky no-op version.
+(rule 0 (extend val extend_op (fits_in_32 from_ty) (fits_in_64 to_ty))
+  (let ((right XReg (value_regs_get val 0)))
+    right))
+
+;; If we are zero extending a U8 we can use a `andi` instruction.
+(rule 1 (extend val (ExtendOp.Zero) $I8 (fits_in_64 to_ty))
+  (let ((val XReg (value_regs_get val 0)))
+    (rv_andi val (imm12_const 255))))
+
+;; When signed extending from 32 to 64 bits we can use a
+;; `addiw val 0`. Also known as a `sext.w`
+(rule 1 (extend val (ExtendOp.Signed) $I32 $I64)
+  (let ((val XReg (value_regs_get val 0)))
+    (rv_sextw val)))
+
+
+;; No point in trying to use `packh` here to zero extend 8 bit values
+;; since we can just use `andi` instead which is part of the base ISA.
+
+;; If we have the `zbkb` extension `packw` can be used to zero extend 16 bit values
+(rule 1 (extend val (ExtendOp.Zero) $I16 (fits_in_64 _))
+  (if-let $true (has_zbkb))
+  (let ((val XReg (value_regs_get val 0)))
+    (rv_packw val (zero_reg))))
+
+;; If we have the `zbkb` extension `pack` can be used to zero extend 32 bit registers
+(rule 1 (extend val (ExtendOp.Zero) $I32 $I64)
+  (if-let $true (has_zbkb))
+  (let ((val XReg (value_regs_get val 0)))
+    (rv_pack val (zero_reg))))
+
+
+;; If we have the `zbb` extension we can use the dedicated `sext.b` instruction.
+(rule 1 (extend val (ExtendOp.Signed) $I8 (fits_in_64 _))
+  (if-let $true (has_zbb))
+  (let ((val XReg (value_regs_get val 0)))
+    (rv_sextb val)))
+
+;; If we have the `zbb` extension we can use the dedicated `sext.h` instruction.
+(rule 1 (extend val (ExtendOp.Signed) $I16 (fits_in_64 _))
+  (if-let $true (has_zbb))
+  (let ((val XReg (value_regs_get val 0)))
+    (rv_sexth val)))
+
+;; If we have the `zbb` extension we can use the dedicated `zext.h` instruction.
+(rule 2 (extend val (ExtendOp.Zero) $I16 (fits_in_64 _))
+  (if-let $true (has_zbb))
+  (let ((val XReg (value_regs_get val 0)))
+    (rv_zexth val)))
+
+;; With `zba` we have a `zext.w` instruction
+(rule 2 (extend val (ExtendOp.Zero) $I32 $I64)
+  (if-let $true (has_zba))
+  (let ((val XReg (value_regs_get val 0)))
+    (rv_zextw val)))
+
+;;; Signed rules extending to I128
+;; Extend the bottom part, and extract the sign bit from the bottom as the top
+(rule 3 (extend val (ExtendOp.Signed) (fits_in_64 from_ty) $I128)
+  (let ((val XReg (value_regs_get val 0))
+        (low XReg (sext val from_ty $I64))
+        (high XReg (rv_srai low (imm12_const 63))))
+    (value_regs low high)))
+
+;;; Unsigned rules extending to I128
+;; Extend the bottom register to I64 and then just zero out the top half.
+(rule 3 (extend val (ExtendOp.Zero) (fits_in_64 from_ty) $I128)
+  (let ((val XReg (value_regs_get val 0))
+        (low XReg (zext val from_ty $I64))
+        (high XReg (load_u64_constant 0)))
+    (value_regs low high)))
+
+;; Catch all rule for ignoring extensions of the same type.
+(rule 4 (extend val _ ty ty) val)
+
+
+
+(decl lower_b128_binary (AluOPRRR ValueRegs ValueRegs) ValueRegs)
+(rule
+  (lower_b128_binary op a b)
+  (let
+    ( ;; low part.
+      (low XReg (alu_rrr op (value_regs_get a 0) (value_regs_get b 0)))
+      ;; high part.
+      (high XReg (alu_rrr op (value_regs_get a 1) (value_regs_get b 1))))
+    (value_regs low high)))
+
+(decl lower_umlhi (Type XReg XReg) XReg)
+(rule 1 (lower_umlhi $I64 rs1 rs2)
+  (rv_mulhu rs1 rs2))
+
+(rule (lower_umlhi ty rs1 rs2)
+  (let
+    ((tmp XReg (rv_mul (zext rs1 ty $I64) (zext rs2 ty $I64))))
+    (rv_srli tmp (imm12_const (ty_bits ty)))))
+
+(decl lower_smlhi (Type XReg XReg) XReg)
+(rule 1
+  (lower_smlhi $I64 rs1 rs2)
+  (rv_mulh rs1 rs2))
+
+(rule
+  (lower_smlhi ty rs1 rs2)
+  (let
+    ((tmp XReg (rv_mul rs1 rs2)))
+    (rv_srli tmp (imm12_const (ty_bits ty)))))
+
+
+(decl lower_rotl (Type XReg XReg) XReg)
+
+(rule 1
+  (lower_rotl $I64 rs amount)
+  (if-let $true (has_zbb))
+  (rv_rol rs amount))
+
+(rule
+  (lower_rotl $I64 rs amount)
+  (if-let $false (has_zbb))
+  (lower_rotl_shift $I64 rs amount))
+
+(rule 1
+  (lower_rotl $I32 rs amount)
+  (if-let $true (has_zbb))
+  (rv_rolw rs amount))
+
+(rule
+  (lower_rotl $I32 rs amount)
+  (if-let $false (has_zbb))
+  (lower_rotl_shift $I32 rs amount))
+
+(rule -1
+  (lower_rotl ty rs amount)
+  (lower_rotl_shift ty rs amount))
+
+;;; using shift to implement rotl.
+(decl lower_rotl_shift (Type XReg XReg) XReg)
+
+;;; for I8 and I16 ...
+(rule
+  (lower_rotl_shift ty rs amount)
+  (let
+    ((x ValueRegs (gen_shamt ty amount))
+      (shamt Reg (value_regs_get x 0))
+      (len_sub_shamt Reg (value_regs_get x 1))
+      ;;
+      (part1 Reg (rv_sll rs shamt))
+      ;;
+      (part2 Reg (rv_srl rs len_sub_shamt))
+      (part3 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) part2)))
+    (rv_or part1 part3)))
+
+
+;;;; construct shift amount.rotl on i128 will use shift to implement. So can call this function.
+;;;; this will return shift amount and (ty_bits - "shift amount")
+;;;; if ty_bits is greater than 64 like i128, then shmat will fallback to 64.because We are 64 bit platform.
+(decl gen_shamt (Type XReg) ValueRegs)
+(extern constructor gen_shamt gen_shamt)
+
+(decl lower_rotr (Type XReg XReg) XReg)
+
+(rule 1
+  (lower_rotr $I64 rs amount)
+  (if-let $true (has_zbb))
+  (rv_ror rs amount))
+(rule
+  (lower_rotr $I64 rs amount)
+  (if-let $false (has_zbb))
+  (lower_rotr_shift $I64 rs amount))
+
+(rule 1
+  (lower_rotr $I32 rs amount)
+  (if-let $true (has_zbb))
+  (rv_rorw rs amount))
+
+(rule
+  (lower_rotr $I32 rs amount)
+  (if-let $false (has_zbb))
+  (lower_rotr_shift $I32 rs amount))
+
+(rule -1
+  (lower_rotr ty rs amount)
+  (lower_rotr_shift ty rs amount))
+
+(decl lower_rotr_shift (Type XReg XReg) XReg)
+
+;;;
+(rule
+  (lower_rotr_shift ty rs amount)
+  (let
+    ((x ValueRegs (gen_shamt ty amount))
+      (shamt XReg (value_regs_get x 0))
+      (len_sub_shamt XReg (value_regs_get x 1))
+      ;;
+      (part1 XReg (rv_srl rs shamt))
+      ;;
+      (part2 XReg (rv_sll rs len_sub_shamt))
+      ;;
+      (part3 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) part2)))
+    (rv_or part1 part3)))
+
+
+
+;; bseti: Set a single bit in a register, indexed by a constant.
+(decl gen_bseti (Reg u64) Reg)
+(rule (gen_bseti val bit)
+  (if-let $false (has_zbs))
+  (if-let $false (u64_le bit 12))
+  (let ((const XReg (load_u64_constant (u64_shl 1 bit))))
+    (rv_or val const)))
+
+(rule (gen_bseti val bit)
+  (if-let $false (has_zbs))
+  (if-let $true (u64_le bit 12))
+  (rv_ori val (imm12_const (u64_as_i32 (u64_shl 1 bit)))))
+
+(rule (gen_bseti val bit)
+  (if-let $true (has_zbs))
+  (rv_bseti val (imm12_const (u64_as_i32 bit))))
+
+
+(decl gen_popcnt (Reg Type) Reg)
+(rule
+  (gen_popcnt rs ty)
+  (let
+    ((tmp WritableXReg (temp_writable_xreg))
+      (step WritableXReg (temp_writable_xreg))
+      (sum WritableXReg (temp_writable_xreg))
+      (_ Unit (emit (MInst.Popcnt sum step tmp rs ty))))
+    (writable_reg_to_reg sum)))
+
+(decl lower_popcnt (XReg Type) XReg)
+(rule 1 (lower_popcnt rs ty)
+  (if-let $true (has_zbb))
+  (rv_cpop (zext rs ty $I64)))
+
+(rule (lower_popcnt rs ty)
+  (if-let $false (has_zbb))
+  (gen_popcnt rs ty))
+
+(decl lower_popcnt_i128 (ValueRegs) ValueRegs)
+(rule
+  (lower_popcnt_i128 a)
+  (let
+    ( ;; low part.
+      (low XReg (lower_popcnt (value_regs_get a 0) $I64))
+      ;; high part.
+      (high XReg (lower_popcnt (value_regs_get a 1) $I64))
+      ;; add toghter.
+      (result XReg (rv_add low high)))
+    (value_regs result (load_u64_constant 0))))
+
+(decl lower_i128_rotl (ValueRegs ValueRegs) ValueRegs)
+(rule
+  (lower_i128_rotl x y)
+  (let
+    ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0)))
+      (shamt XReg (value_regs_get tmp 0))
+      (len_sub_shamt XReg (value_regs_get tmp 1))
+      ;;
+      (low_part1 XReg (rv_sll (value_regs_get x 0) shamt))
+      (low_part2 XReg (rv_srl (value_regs_get x 1) len_sub_shamt))
+      ;;; if shamt == 0 low_part2 will overflow we should zero instead.
+      (low_part3 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part2))
+      (low XReg (rv_or low_part1 low_part3))
+      ;;
+      (high_part1 XReg (rv_sll (value_regs_get x 1) shamt))
+      (high_part2 XReg (rv_srl (value_regs_get x 0) len_sub_shamt))
+      (high_part3 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) high_part2))
+      (high XReg (rv_or high_part1 high_part3))
+      ;;
+      (const64 XReg (load_u64_constant 64))
+      (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127))))
+    ;; right now we only rotate less than 64 bits.
+    ;; if shamt is greater than or equal 64 , we should switch low and high.
+    (value_regs
+      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low)
+      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high)
+    )))
+
+
+(decl lower_i128_rotr (ValueRegs ValueRegs) ValueRegs)
+(rule
+  (lower_i128_rotr x y)
+  (let
+    ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0)))
+      (shamt XReg (value_regs_get tmp 0))
+      (len_sub_shamt XReg (value_regs_get tmp 1))
+      ;;
+      (low_part1 XReg (rv_srl (value_regs_get x 0) shamt))
+      (low_part2 XReg (rv_sll (value_regs_get x 1) len_sub_shamt))
+      ;;; if shamt == 0 low_part2 will overflow we should zero instead.
+      (low_part3 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part2))
+      (low XReg (rv_or low_part1 low_part3))
+      ;;
+      (high_part1 XReg (rv_srl (value_regs_get x 1) shamt))
+      (high_part2 XReg (rv_sll (value_regs_get x 0) len_sub_shamt))
+      (high_part3 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) high_part2))
+      (high XReg (rv_or high_part1 high_part3))
+
+      ;;
+      (const64 XReg (load_u64_constant 64))
+      (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127))))
+    ;; right now we only rotate less than 64 bits.
+    ;; if shamt is greater than or equal 64 , we should switch low and high.
+    (value_regs
+      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low)
+      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high)
+    )))
+
+(decl gen_amode (Reg Offset32 Type) AMode)
+(extern constructor gen_amode gen_amode)
+
+;; Generates a AMode that points to a constant in the constant pool.
+(decl gen_const_amode (VCodeConstant) AMode)
+(extern constructor gen_const_amode gen_const_amode)
+
+(decl offset32_imm (i32) Offset32)
+(extern constructor offset32_imm offset32_imm)
+
+;; helper function to load from memory.
+(decl gen_load (Reg Offset32 LoadOP MemFlags Type) Reg)
+(rule
+  (gen_load p offset op flags ty)
+  (let
+    ((tmp WritableReg (temp_writable_reg ty))
+      (_ Unit (emit (MInst.Load tmp op flags (gen_amode p offset $I64)))))
+    tmp))
+
+(decl gen_load_128 (Reg Offset32 MemFlags) ValueRegs)
+(rule
+  (gen_load_128 p offset flags)
+  (let
+    ((low Reg (gen_load p offset (LoadOP.Ld) flags $I64))
+      (high Reg (gen_load p (offset32_add offset 8) (LoadOP.Ld) flags $I64)))
+    (value_regs low high)))
+
+(decl default_memflags () MemFlags)
+(extern constructor default_memflags default_memflags)
+
+(decl offset32_add (Offset32 i64) Offset32)
+(extern constructor offset32_add offset32_add)
+
+;; helper function to store to memory.
+(decl gen_store (Reg Offset32 StoreOP MemFlags Reg) InstOutput)
+(rule
+  (gen_store base offset op flags src)
+  (side_effect (SideEffectNoResult.Inst (MInst.Store (gen_amode base offset $I64) op flags src)))
+)
+
+(decl gen_store_128 (Reg Offset32 MemFlags ValueRegs) InstOutput)
+(rule
+  (gen_store_128 p offset flags src)
+  (side_effect
+    (SideEffectNoResult.Inst2
+      (MInst.Store (gen_amode p offset $I64) (StoreOP.Sd) flags (value_regs_get src 0))
+      (MInst.Store (gen_amode p (offset32_add offset 8) $I64) (StoreOP.Sd) flags (value_regs_get src 1)))))
+
+(decl valid_atomic_transaction (Type) Type)
+(extern extractor valid_atomic_transaction valid_atomic_transaction)
+
+;;helper function.
+;;construct an atomic instruction.
+(decl gen_atomic (AtomicOP Reg Reg AMO) Reg)
+(rule
+  (gen_atomic op addr src amo)
+  (let
+    ((tmp WritableXReg (temp_writable_xreg))
+      (_ Unit (emit (MInst.Atomic op tmp addr src amo))))
+    tmp))
+
+;; helper function
+(decl get_atomic_rmw_op (Type AtomicRmwOp) AtomicOP)
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.Add))
+  (AtomicOP.AmoaddW))
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.Add))
+  (AtomicOP.AmoaddD))
+
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.And))
+  (AtomicOP.AmoandW))
+
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.And))
+  (AtomicOP.AmoandD))
+
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.Or))
+  (AtomicOP.AmoorW))
+
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.Or))
+  (AtomicOP.AmoorD))
+
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.Smax))
+  (AtomicOP.AmomaxW))
+
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.Smax))
+  (AtomicOP.AmomaxD))
+
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.Smin))
+  (AtomicOP.AmominW))
+
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.Smin))
+  (AtomicOP.AmominD))
+
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.Umax))
+  (AtomicOP.AmomaxuW)
+)
+
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.Umax))
+  (AtomicOP.AmomaxuD))
+
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.Umin))
+  (AtomicOP.AmominuW))
+
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.Umin))
+  (AtomicOP.AmominuD))
+
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.Xchg))
+  (AtomicOP.AmoswapW))
+
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.Xchg))
+  (AtomicOP.AmoswapD))
+
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.Xor))
+  (AtomicOP.AmoxorW))
+
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.Xor))
+  (AtomicOP.AmoxorD))
+
+(decl atomic_amo () AMO)
+(extern constructor atomic_amo atomic_amo)
+
+
+(decl gen_atomic_load (Reg Type) Reg)
+(rule
+  (gen_atomic_load p ty)
+  (let
+    ((tmp WritableXReg (temp_writable_xreg))
+      (_ Unit (emit (MInst.AtomicLoad tmp ty p))))
+    (writable_reg_to_reg tmp)))
+
+;;;
+(decl gen_atomic_store (Reg Type Reg) InstOutput)
+(rule
+  (gen_atomic_store p ty src)
+  (side_effect (SideEffectNoResult.Inst (MInst.AtomicStore src ty p)))
+)
+
+
+(decl gen_stack_addr (StackSlot Offset32) Reg)
+(extern constructor gen_stack_addr gen_stack_addr)
+
+;;
+(decl gen_select (Type Reg ValueRegs ValueRegs) ValueRegs)
+(rule
+  (gen_select ty c x y)
+  (let
+    ((dst VecWritableReg (alloc_vec_writable ty))
+      ;;
+      (reuslt VecWritableReg (vec_writable_clone dst))
+      (_ Unit (emit (MInst.Select dst ty c x y))))
+    (vec_writable_to_regs reuslt)))
+
+;; Parameters are "intcc compare_a compare_b rs1 rs2".
+(decl gen_select_reg (IntCC XReg XReg Reg Reg) Reg)
+(extern constructor gen_select_reg gen_select_reg)
+
+;; load a constant into reg.
+(decl load_u64_constant (u64) Reg)
+(extern constructor load_u64_constant load_u64_constant)
+
+;;; clone WritableReg
+;;; if not rust compiler will complain about use moved value.
+(decl vec_writable_clone (VecWritableReg) VecWritableReg)
+(extern constructor vec_writable_clone vec_writable_clone)
+
+(decl vec_writable_to_regs (VecWritableReg) ValueRegs)
+(extern constructor vec_writable_to_regs vec_writable_to_regs)
+
+(decl alloc_vec_writable (Type) VecWritableReg)
+(extern constructor alloc_vec_writable alloc_vec_writable)
+
+(decl gen_int_select (Type IntSelectOP ValueRegs ValueRegs) ValueRegs)
+(rule
+  (gen_int_select ty op x y)
+  (let
+    ( ;;;
+      (dst VecWritableReg (alloc_vec_writable ty))
+      ;;;
+      (_ Unit (emit (MInst.IntSelect op (vec_writable_clone dst) x y ty))))
+    (vec_writable_to_regs dst)))
+
+(decl udf (TrapCode) InstOutput)
+(rule
+  (udf code)
+  (side_effect (SideEffectNoResult.Inst (MInst.Udf code))))
+
+(decl load_op (Type) LoadOP)
+(extern constructor load_op load_op)
+
+(decl store_op (Type) StoreOP)
+(extern constructor store_op store_op)
+
+;; bool is "is_signed"
+(decl int_load_op (bool u8) LoadOP)
+(rule
+  (int_load_op $false 8)
+  (LoadOP.Lbu))
+
+(rule
+  (int_load_op $true 8)
+  (LoadOP.Lb))
+
+(rule
+  (int_load_op $false 16)
+  (LoadOP.Lhu))
+(rule
+  (int_load_op $true 16)
+  (LoadOP.Lh))
+(rule
+  (int_load_op $false 32)
+  (LoadOP.Lwu))
+(rule
+  (int_load_op $true 32)
+  (LoadOP.Lw))
+
+(rule
+  (int_load_op _ 64)
+  (LoadOP.Ld))
+
+;;;; load extern name
+(decl load_ext_name (ExternalName i64) Reg)
+(extern constructor load_ext_name load_ext_name)
+
+(decl int_convert_2_float_op (Type bool Type) FpuOPRR)
+(extern constructor int_convert_2_float_op int_convert_2_float_op)
+
+;;;;
+(decl gen_fcvt_int (bool FReg bool Type Type) XReg)
+(rule
+  (gen_fcvt_int is_sat rs is_signed in_type out_type)
+  (let
+    ((result WritableReg (temp_writable_reg out_type))
+      (tmp WritableFReg (temp_writable_freg))
+      (_ Unit (emit (MInst.FcvtToInt is_sat result tmp rs is_signed in_type out_type))))
+    (writable_reg_to_reg result)))
+
+;;; some float binary operation
+;;; 1. need move into x reister.
+;;; 2. do the operation.
+;;; 3. move back.
+(decl lower_float_binary (AluOPRRR FReg FReg Type) FReg)
+(rule
+  (lower_float_binary op rs1 rs2 ty)
+  (let ((x_rs1 XReg (move_f_to_x rs1 ty))
+        (x_rs2 XReg (move_f_to_x rs2 ty))
+        (tmp XReg (alu_rrr op x_rs1 x_rs2)))
+    (move_x_to_f tmp (float_int_of_same_size ty))))
+
+
+;;; lower icmp
+(decl lower_icmp (IntCC ValueRegs ValueRegs Type) Reg)
+(rule 1 (lower_icmp cc x y ty)
+  (if (signed_cond_code cc))
+  (gen_icmp cc (ext_int_if_need $true x ty) (ext_int_if_need $true y ty) ty))
+(rule (lower_icmp cc x y ty)
+  (gen_icmp cc (ext_int_if_need $false x ty) (ext_int_if_need $false y ty) ty))
+
+
+(decl i128_sub (ValueRegs ValueRegs) ValueRegs)
+(rule
+  (i128_sub x y )
+  (let
+    (;; low part.
+      (low XReg (rv_sub (value_regs_get x 0) (value_regs_get y 0)))
+      ;; compute borrow.
+      (borrow XReg (rv_sltu (value_regs_get x 0) low))
+      ;;
+      (high_tmp XReg (rv_sub (value_regs_get x 1) (value_regs_get y 1)))
+      ;;
+      (high XReg (rv_sub high_tmp borrow)))
+    (value_regs low high)))
+
+
+;;; Returns the sum in the first register, and the overflow test in the second.
+(decl lower_uadd_overflow (XReg XReg Type) ValueRegs)
+
+(rule 1
+  (lower_uadd_overflow x y $I64)
+  (let ((tmp XReg (rv_add x y))
+        (test XReg (gen_icmp (IntCC.UnsignedLessThan) tmp x $I64)))
+    (value_regs tmp test)))
+
+(rule
+  (lower_uadd_overflow x y (fits_in_32 ty))
+  (let ((tmp_x XReg (zext x ty $I64))
+        (tmp_y XReg (zext y ty $I64))
+        (sum XReg (rv_add tmp_x tmp_y))
+        (test XReg (rv_srli sum (imm12_const (ty_bits ty)))))
+    (value_regs sum test)))
+
+(decl label_to_br_target (MachLabel) BranchTarget)
+(extern constructor label_to_br_target label_to_br_target)
+
+(decl gen_jump (MachLabel) MInst)
+(rule
+  (gen_jump v)
+  (MInst.Jal (label_to_br_target v)))
+
+(decl vec_label_get (VecMachLabel u8) MachLabel )
+(extern constructor vec_label_get vec_label_get)
+
+(decl partial lower_branch (Inst VecMachLabel) Unit)
+(rule (lower_branch (jump _) targets )
+      (emit_side_effect (SideEffectNoResult.Inst (gen_jump (vec_label_get targets 0)))))
+
+;;; cc a b targets Type
+(decl lower_br_icmp (IntCC ValueRegs ValueRegs VecMachLabel Type) Unit)
+(extern constructor lower_br_icmp lower_br_icmp)
+
+;; int scalar zero regs.
+(decl int_zero_reg (Type) ValueRegs)
+(extern constructor int_zero_reg int_zero_reg)
+
+(decl lower_cond_br (IntCC ValueRegs VecMachLabel Type) Unit)
+(extern constructor lower_cond_br lower_cond_br)
+
+(decl intcc_to_extend_op (IntCC) ExtendOp)
+(extern constructor intcc_to_extend_op intcc_to_extend_op)
+
+;; Normalize a value for comparision.
+;;
+;; This ensures that types smaller than a register don't accidentally
+;; pass undefined high bits when being compared as a full register.
+(decl normalize_cmp_value (Type ValueRegs ExtendOp) ValueRegs)
+
+(rule 1 (normalize_cmp_value (fits_in_32 ity) r op)
+      (extend r op ity $I64))
+
+(rule (normalize_cmp_value $I64  r _) r)
+(rule (normalize_cmp_value $I128 r _) r)
+
+(decl normalize_fcvt_from_int (XReg Type ExtendOp) XReg)
+(rule 2 (normalize_fcvt_from_int r (fits_in_16 ty) op)
+  (value_regs_get (extend r op ty $I64) 0))
+(rule 1 (normalize_fcvt_from_int r _ _)
+  r)
+
+;; Convert a truthy value, possibly of more than one register (an
+;; I128), to one register. If narrower than 64 bits, must have already
+;; been masked (e.g. by `normalize_cmp_value`).
+(decl truthy_to_reg (Type ValueRegs) XReg)
+(rule 1 (truthy_to_reg (fits_in_64 _) regs)
+      (value_regs_get regs 0))
+(rule 0 (truthy_to_reg $I128 regs)
+      (let ((lo XReg (value_regs_get regs 0))
+            (hi XReg (value_regs_get regs 1)))
+        (rv_or lo hi)))
+
+;; Default behavior for branching based on an input value.
+(rule
+  (lower_branch (brif v @ (value_type ty) _ _) targets)
+  (lower_cond_br (IntCC.NotEqual) (normalize_cmp_value ty v (ExtendOp.Zero)) targets ty))
+
+;; Special case for SI128 to reify the comparison value and branch on it.
+(rule 2
+  (lower_branch (brif v @ (value_type $I128) _ _) targets)
+  (let ((zero ValueRegs (value_regs (zero_reg) (zero_reg)))
+        (cmp XReg (gen_icmp (IntCC.NotEqual) v zero $I128)))
+    (lower_cond_br (IntCC.NotEqual) cmp targets $I64)))
+
+;; Branching on the result of an icmp
+(rule 1
+  (lower_branch (brif (maybe_uextend (icmp cc a @ (value_type ty) b)) _ _) targets)
+  (lower_br_icmp cc a b targets ty))
+
+;; Branching on the result of an fcmp
+(rule 1
+  (lower_branch (brif (maybe_uextend (fcmp cc a @ (value_type ty) b)) _ _) targets)
+  (if-let $true (floatcc_unordered cc))
+  (let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
+        (else BranchTarget (label_to_br_target (vec_label_get targets 1))))
+    (emit_side_effect (cond_br (emit_fcmp (floatcc_complement cc) ty a b) else then))))
+
+(rule 1
+  (lower_branch (brif (maybe_uextend (fcmp cc a @ (value_type ty) b)) _ _) targets)
+  (if-let $false (floatcc_unordered cc))
+  (let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
+        (else BranchTarget (label_to_br_target (vec_label_get targets 1))))
+    (emit_side_effect (cond_br (emit_fcmp cc ty a b) then else))))
+
+;;;
+(decl lower_br_table (Reg VecMachLabel) Unit)
+(extern constructor lower_br_table lower_br_table)
+
+(rule
+  (lower_branch (br_table index _) targets)
+  (lower_br_table index targets))
+
+(decl load_ra () Reg)
+(extern constructor load_ra load_ra)
+
+
+;; Generates a bitcast instruction.
+;; Args are: src, src_ty, dst_ty
+(decl gen_bitcast (Reg Type Type) Reg)
+(rule 1 (gen_bitcast r $F32 $I32) (rv_fmvxw r))
+(rule 1 (gen_bitcast r $F64 $I64) (rv_fmvxd r))
+(rule 1 (gen_bitcast r $I32 $F32) (rv_fmvwx r))
+(rule 1 (gen_bitcast r $I64 $F64) (rv_fmvdx r))
+(rule (gen_bitcast r _ _) r)
+
+(decl move_f_to_x (FReg Type) XReg)
+(rule (move_f_to_x r $F32) (gen_bitcast r $F32 $I32))
+(rule (move_f_to_x r $F64) (gen_bitcast r $F64 $I64))
+
+(decl move_x_to_f (XReg Type) FReg)
+(rule (move_x_to_f r $I32) (gen_bitcast r $I32 $F32))
+(rule (move_x_to_f r $I64) (gen_bitcast r $I64 $F64))
+
+(decl float_int_of_same_size (Type) Type)
+(rule (float_int_of_same_size $F32) $I32)
+(rule (float_int_of_same_size $F64) $I64)
+
+
+(decl gen_rev8 (XReg) XReg)
+(rule 1
+  (gen_rev8 rs)
+  (if-let $true (has_zbb))
+  (rv_rev8 rs))
+
+(rule
+  (gen_rev8 rs)
+  (if-let $false (has_zbb))
+  (let
+    ((rd WritableXReg (temp_writable_xreg))
+      (tmp WritableXReg (temp_writable_xreg))
+      (step WritableXReg (temp_writable_xreg))
+      (_ Unit (emit (MInst.Rev8 rs step tmp rd))))
+    (writable_reg_to_reg rd)))
+
+
+(decl gen_brev8 (Reg Type) Reg)
+(rule 1
+  (gen_brev8 rs _)
+  (if-let $true (has_zbkb))
+  (rv_brev8 rs))
+(rule
+  (gen_brev8 rs ty)
+  (if-let $false (has_zbkb))
+  (let
+    ((tmp WritableXReg (temp_writable_xreg))
+      (tmp2 WritableXReg (temp_writable_xreg))
+      (step WritableXReg (temp_writable_xreg))
+      (rd WritableXReg (temp_writable_xreg))
+      (_ Unit (emit (MInst.Brev8 rs ty step tmp tmp2 rd))))
+    (writable_reg_to_reg rd)))
+
+;; Negates x
+;; Equivalent to 0 - x
+(decl neg (Type ValueRegs) ValueRegs)
+(rule 1 (neg (fits_in_64 (ty_int ty)) val)
+  (value_reg
+    (rv_neg (value_regs_get val 0))))
+
+(rule 2 (neg $I128 val)
+  (i128_sub (value_regs_zero) val))
+
+
+;; Selects the greatest of two registers as signed values.
+(decl max (Type XReg XReg) XReg)
+(rule (max (fits_in_64 (ty_int ty)) x y)
+  (if-let $true (has_zbb))
+  (rv_max x y))
+
+(rule (max (fits_in_64 (ty_int ty)) x y)
+  (if-let $false (has_zbb))
+  (gen_select_reg (IntCC.SignedGreaterThan) x y x y))
+
+
+(decl gen_trapif (XReg TrapCode) InstOutput)
+(rule
+  (gen_trapif test trap_code)
+  (side_effect (SideEffectNoResult.Inst (MInst.TrapIf test trap_code))))
+
+(decl gen_trapifc (IntCC XReg XReg TrapCode) InstOutput)
+(rule
+  (gen_trapifc cc a b trap_code)
+  (side_effect (SideEffectNoResult.Inst (MInst.TrapIfC a b cc trap_code))))
+
+(decl shift_int_to_most_significant (XReg Type) XReg)
+(extern constructor shift_int_to_most_significant shift_int_to_most_significant)
+
+;;; generate div overflow.
+(decl gen_div_overflow (XReg XReg Type) InstOutput)
+(rule
+  (gen_div_overflow rs1 rs2 ty)
+  (let
+    ((r_const_neg_1 XReg (load_imm12 -1))
+      (r_const_min XReg (rv_slli (load_imm12 1) (imm12_const 63)))
+      (tmp_rs1 XReg (shift_int_to_most_significant rs1 ty))
+      (t1 XReg (gen_icmp (IntCC.Equal) r_const_neg_1 rs2 ty))
+      (t2 XReg (gen_icmp (IntCC.Equal) r_const_min tmp_rs1 ty))
+      (test XReg (rv_and t1 t2)))
+    (gen_trapif test (TrapCode.IntegerOverflow))))
+
+(decl gen_div_by_zero (XReg) InstOutput)
+(rule
+  (gen_div_by_zero r)
+  (gen_trapifc (IntCC.Equal) (zero_reg) r (TrapCode.IntegerDivisionByZero)))
+
+;;;; Helpers for Emitting Calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl gen_call (SigRef ExternalName RelocDistance ValueSlice) InstOutput)
+(extern constructor gen_call gen_call)
+
+(decl gen_call_indirect (SigRef Value ValueSlice) InstOutput)
+(extern constructor gen_call_indirect gen_call_indirect)
+
+;;; this is trying to imitate aarch64 `madd` instruction.
+(decl madd (XReg XReg XReg) XReg)
+(rule
+  (madd n m a)
+  (let
+    ((t XReg (rv_mul n m)))
+    (rv_add t a)))
+
+;;;; Helpers for bmask ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl lower_bmask (Type Type ValueRegs) ValueRegs)
+
+;; Produces -1 if the 64-bit value is non-zero, and 0 otherwise.
+;; If the type is smaller than 64 bits, we need to mask off the
+;; high bits.
+(rule
+  0
+  (lower_bmask (fits_in_64 _) (fits_in_64 in_ty) val)
+  (let ((input XReg (truthy_to_reg in_ty (normalize_cmp_value in_ty val (ExtendOp.Zero))))
+        (non_zero XReg (rv_snez input)))
+    (value_reg (rv_neg non_zero))))
+
+;; Bitwise-or the two registers that make up the 128-bit value, then recurse as
+;; though it was a 64-bit value.
+(rule
+  1
+  (lower_bmask (fits_in_64 ty) $I128 val)
+  (let ((lo XReg (value_regs_get val 0))
+        (hi XReg (value_regs_get val 1))
+        (combined XReg (rv_or lo hi)))
+    (lower_bmask ty $I64 (value_reg combined))))
+
+;; Conversion of one 64-bit value to a 128-bit one. Duplicate the result of the
+;; bmask of the 64-bit value into both result registers of the i128.
+(rule
+  2
+  (lower_bmask $I128 (fits_in_64 in_ty) val)
+  (let ((res ValueRegs (lower_bmask $I64 in_ty val)))
+    (value_regs (value_regs_get res 0) (value_regs_get res 0))))
+
+;; Conversion of one 64-bit value to a 128-bit one. Duplicate the result of
+;; bmasking the 128-bit value to a 64-bit value into both registers of the
+;; 128-bit result.
+(rule
+  3
+  (lower_bmask $I128 $I128 val)
+  (let ((res ValueRegs (lower_bmask $I64 $I128 val)))
+    (value_regs (value_regs_get res 0) (value_regs_get res 0))))
+
+
+;;;; Helpers for physical registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl gen_mov_from_preg (PReg) Reg)
+
+(rule
+  (gen_mov_from_preg rm)
+  (let ((rd WritableXReg (temp_writable_xreg))
+        (_ Unit (emit (MInst.MovFromPReg rd rm))))
+    rd))
+
+(decl fp_reg () PReg)
+(extern constructor fp_reg fp_reg)
+
+(decl sp_reg () PReg)
+(extern constructor sp_reg sp_reg)
+
+;; Helper for creating the zero register.
+(decl zero_reg () Reg)
+(extern constructor zero_reg zero_reg)
+
+(decl value_regs_zero () ValueRegs)
+(rule (value_regs_zero)
+  (value_regs (imm $I64 0) (imm $I64 0)))
+
+(decl writable_zero_reg () WritableReg)
+(extern constructor writable_zero_reg writable_zero_reg)
+
+
+;;;; Helpers for floating point comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl not (XReg) XReg)
+(rule (not x) (rv_xori x (imm_from_bits 1)))
+
+(decl is_not_nan (Type FReg) XReg)
+(rule (is_not_nan ty a) (rv_feq ty a a))
+
+(decl ordered (Type FReg FReg) XReg)
+(rule (ordered ty a b) (rv_and (is_not_nan ty a) (is_not_nan ty b)))
+
+(type CmpResult (enum
+                  (Result
+                    (result XReg)
+                    (invert bool))))
+
+;; Wrapper for the common case when constructing comparison results. It assumes
+;; that the result isn't negated.
+(decl cmp_result (XReg) CmpResult)
+(rule (cmp_result result) (CmpResult.Result result $false))
+
+;; Wrapper for the case where it's more convenient to construct the negated
+;; version of the comparison.
+(decl cmp_result_invert (XReg) CmpResult)
+(rule (cmp_result_invert result) (CmpResult.Result result $true))
+
+;; Consume a CmpResult, producing a branch on its result.
+(decl cond_br (CmpResult BranchTarget BranchTarget) SideEffectNoResult)
+(rule (cond_br cmp then else)
+      (SideEffectNoResult.Inst
+        (MInst.CondBr then else (cmp_integer_compare cmp))))
+
+;; Construct an IntegerCompare value.
+(decl int_compare (IntCC XReg XReg) IntegerCompare)
+(extern constructor int_compare int_compare)
+
+;; Convert a comparison into a branch test.
+(decl cmp_integer_compare (CmpResult) IntegerCompare)
+
+(rule
+  (cmp_integer_compare (CmpResult.Result res $false))
+  (int_compare (IntCC.NotEqual) res (zero_reg)))
+
+(rule
+  (cmp_integer_compare (CmpResult.Result res $true))
+  (int_compare (IntCC.Equal) res (zero_reg)))
+
+;; Convert a comparison into a boolean value.
+(decl cmp_value (CmpResult) XReg)
+(rule (cmp_value (CmpResult.Result res $false)) res)
+(rule (cmp_value (CmpResult.Result res $true)) (not res))
+
+;; Compare two floating point numbers and return a zero/non-zero result.
+(decl emit_fcmp (FloatCC Type FReg FReg) CmpResult)
+
+;; a is not nan && b is not nan
+(rule
+  (emit_fcmp (FloatCC.Ordered) ty a b)
+  (cmp_result (ordered ty a b)))
+
+;; a is nan || b is nan
+;; == !(a is not nan && b is not nan)
+(rule
+  (emit_fcmp (FloatCC.Unordered) ty a b)
+  (cmp_result_invert (ordered ty a b)))
+
+;; a == b
+(rule
+  (emit_fcmp (FloatCC.Equal) ty a b)
+  (cmp_result (rv_feq ty a b)))
+
+;; a != b
+;; == !(a == b)
+(rule
+  (emit_fcmp (FloatCC.NotEqual) ty a b)
+  (cmp_result_invert (rv_feq ty a b)))
+
+;; a < b || a > b
+(rule
+  (emit_fcmp (FloatCC.OrderedNotEqual) ty a b)
+  (cmp_result (rv_or (rv_flt ty a b) (rv_fgt ty a b))))
+
+;; !(ordered a b) || a == b
+(rule
+  (emit_fcmp (FloatCC.UnorderedOrEqual) ty a b)
+  (cmp_result (rv_or (not (ordered ty a b)) (rv_feq ty a b))))
+
+;; a < b
+(rule
+  (emit_fcmp (FloatCC.LessThan) ty a b)
+  (cmp_result (rv_flt ty a b)))
+
+;; a <= b
+(rule
+  (emit_fcmp (FloatCC.LessThanOrEqual) ty a b)
+  (cmp_result (rv_fle ty a b)))
+
+;; a > b
+(rule
+  (emit_fcmp (FloatCC.GreaterThan) ty a b)
+  (cmp_result (rv_fgt ty a b)))
+
+;; a >= b
+(rule
+  (emit_fcmp (FloatCC.GreaterThanOrEqual) ty a b)
+  (cmp_result (rv_fge ty a b)))
+
+;; !(ordered a b) || a < b
+;; == !(ordered a b && a >= b)
+(rule
+  (emit_fcmp (FloatCC.UnorderedOrLessThan) ty a b)
+  (cmp_result_invert (rv_and (ordered ty a b) (rv_fge ty a b))))
+
+;; !(ordered a b) || a <= b
+;; == !(ordered a b && a > b)
+(rule
+  (emit_fcmp (FloatCC.UnorderedOrLessThanOrEqual) ty a b)
+  (cmp_result_invert (rv_and (ordered ty a b) (rv_fgt ty a b))))
+
+;; !(ordered a b) || a > b
+;; == !(ordered a b && a <= b)
+(rule
+  (emit_fcmp (FloatCC.UnorderedOrGreaterThan) ty a b)
+  (cmp_result_invert (rv_and (ordered ty a b) (rv_fle ty a b))))
+
+;; !(ordered a b) || a >= b
+;; == !(ordered a b && a < b)
+(rule
+  (emit_fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) ty a b)
+  (cmp_result_invert (rv_and (ordered ty a b) (rv_flt ty a b))))
diff --git a/cranelift/codegen/src/isa/zkasm/inst/args.rs b/cranelift/codegen/src/isa/zkasm/inst/args.rs
new file mode 100644
index 000000000000..d71cef8a32c4
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/inst/args.rs
@@ -0,0 +1,1812 @@
+//! Riscv64 ISA definitions: instruction arguments.
+
+// Some variants are never constructed, but we still want them as options in the future.
+#![allow(dead_code)]
+use super::*;
+use crate::ir::condcodes::CondCode;
+
+use crate::isa::zkasm::inst::{reg_name, reg_to_gpr_num};
+use crate::machinst::isle::WritableReg;
+
+use std::fmt::{Display, Formatter, Result};
+
+/// A macro for defining a newtype of `Reg` that enforces some invariant about
+/// the wrapped `Reg` (such as that it is of a particular register class).
+macro_rules! newtype_of_reg {
+    (
+        $newtype_reg:ident,
+        $newtype_writable_reg:ident,
+        |$check_reg:ident| $check:expr
+    ) => {
+        /// A newtype wrapper around `Reg`.
+        #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+        pub struct $newtype_reg(Reg);
+
+        impl PartialEq<Reg> for $newtype_reg {
+            fn eq(&self, other: &Reg) -> bool {
+                self.0 == *other
+            }
+        }
+
+        impl From<$newtype_reg> for Reg {
+            fn from(r: $newtype_reg) -> Self {
+                r.0
+            }
+        }
+
+        impl $newtype_reg {
+            /// Create this newtype from the given register, or return `None` if the register
+            /// is not a valid instance of this newtype.
+            pub fn new($check_reg: Reg) -> Option<Self> {
+                if $check {
+                    Some(Self($check_reg))
+                } else {
+                    None
+                }
+            }
+
+            /// Get this newtype's underlying `Reg`.
+            pub fn to_reg(self) -> Reg {
+                self.0
+            }
+        }
+
+        // Convenience impl so that people working with this newtype can use it
+        // "just like" a plain `Reg`.
+        //
+        // NB: We cannot implement `DerefMut` because that would let people do
+        // nasty stuff like `*my_xreg.deref_mut() = some_freg`, breaking the
+        // invariants that `XReg` provides.
+        impl std::ops::Deref for $newtype_reg {
+            type Target = Reg;
+
+            fn deref(&self) -> &Reg {
+                &self.0
+            }
+        }
+
+        /// Writable Reg.
+        pub type $newtype_writable_reg = Writable<$newtype_reg>;
+    };
+}
+
+// Newtypes for registers classes.
+newtype_of_reg!(XReg, WritableXReg, |reg| reg.class() == RegClass::Int);
+newtype_of_reg!(FReg, WritableFReg, |reg| reg.class() == RegClass::Float);
+newtype_of_reg!(VReg, WritableVReg, |reg| reg.class() == RegClass::Vector);
+
+/// An addressing mode specified for a load/store operation.
+#[derive(Clone, Debug, Copy)]
+pub enum AMode {
+    /// Arbitrary offset from a register. Converted to generation of large
+    /// offsets with multiple instructions as necessary during code emission.
+    RegOffset(Reg, i64, Type),
+    /// Offset from the stack pointer.
+    SPOffset(i64, Type),
+
+    /// Offset from the frame pointer.
+    FPOffset(i64, Type),
+
+    /// Offset from the "nominal stack pointer", which is where the real SP is
+    /// just after stack and spill slots are allocated in the function prologue.
+    /// At emission time, this is converted to `SPOffset` with a fixup added to
+    /// the offset constant. The fixup is a running value that is tracked as
+    /// emission iterates through instructions in linear order, and can be
+    /// adjusted up and down with [Inst::VirtualSPOffsetAdj].
+    ///
+    /// The standard ABI is in charge of handling this (by emitting the
+    /// adjustment meta-instructions). It maintains the invariant that "nominal
+    /// SP" is where the actual SP is after the function prologue and before
+    /// clobber pushes. See the diagram in the documentation for
+    /// [crate::isa::zkasm::abi](the ABI module) for more details.
+    NominalSPOffset(i64, Type),
+
+    /// A reference to a constant which is placed outside of the function's
+    /// body, typically at the end.
+    Const(VCodeConstant),
+
+    /// A reference to a label.
+    Label(MachLabel),
+}
+
+impl AMode {
+    pub(crate) fn with_allocs(self, allocs: &mut AllocationConsumer<'_>) -> Self {
+        match self {
+            AMode::RegOffset(reg, offset, ty) => AMode::RegOffset(allocs.next(reg), offset, ty),
+            AMode::SPOffset(..)
+            | AMode::FPOffset(..)
+            | AMode::NominalSPOffset(..)
+            | AMode::Const(..)
+            | AMode::Label(..) => self,
+        }
+    }
+
+    /// Returns the registers that known to the register allocator.
+    /// Keep this in sync with `with_allocs`.
+    pub(crate) fn get_allocatable_register(&self) -> Option<Reg> {
+        match self {
+            AMode::RegOffset(reg, ..) => Some(*reg),
+            AMode::SPOffset(..)
+            | AMode::FPOffset(..)
+            | AMode::NominalSPOffset(..)
+            | AMode::Const(..)
+            | AMode::Label(..) => None,
+        }
+    }
+
+    pub(crate) fn get_base_register(&self) -> Option<Reg> {
+        match self {
+            &AMode::RegOffset(reg, ..) => Some(reg),
+            &AMode::SPOffset(..) => Some(stack_reg()),
+            &AMode::FPOffset(..) => Some(fp_reg()),
+            &AMode::NominalSPOffset(..) => Some(stack_reg()),
+            &AMode::Const(..) | AMode::Label(..) => None,
+        }
+    }
+
+    pub(crate) fn get_offset_with_state(&self, state: &EmitState) -> i64 {
+        match self {
+            &AMode::NominalSPOffset(offset, _) => offset + state.virtual_sp_offset,
+            _ => self.get_offset(),
+        }
+    }
+
+    fn get_offset(&self) -> i64 {
+        match self {
+            &AMode::RegOffset(_, offset, ..) => offset,
+            &AMode::SPOffset(offset, _) => offset,
+            &AMode::FPOffset(offset, _) => offset,
+            &AMode::NominalSPOffset(offset, _) => offset,
+            &AMode::Const(_) | &AMode::Label(_) => 0,
+        }
+    }
+
+    pub(crate) fn to_string_with_alloc(&self, allocs: &mut AllocationConsumer<'_>) -> String {
+        format!("{}", self.clone().with_allocs(allocs))
+    }
+}
+
+impl Display for AMode {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        match self {
+            &AMode::RegOffset(r, offset, ..) => {
+                write!(f, "{}({})", offset, reg_name(r))
+            }
+            &AMode::SPOffset(offset, ..) => {
+                write!(f, "{}(sp)", offset)
+            }
+            &AMode::NominalSPOffset(offset, ..) => {
+                write!(f, "{}(nominal_sp)", offset)
+            }
+            &AMode::FPOffset(offset, ..) => {
+                write!(f, "{}(fp)", offset)
+            }
+            &AMode::Const(addr, ..) => {
+                write!(f, "[const({})]", addr.as_u32())
+            }
+            &AMode::Label(label) => {
+                write!(f, "[label{}]", label.as_u32())
+            }
+        }
+    }
+}
+
+impl Into<AMode> for StackAMode {
+    fn into(self) -> AMode {
+        match self {
+            StackAMode::FPOffset(offset, ty) => AMode::FPOffset(offset, ty),
+            StackAMode::SPOffset(offset, ty) => AMode::SPOffset(offset, ty),
+            StackAMode::NominalSPOffset(offset, ty) => AMode::NominalSPOffset(offset, ty),
+        }
+    }
+}
+
+/// risc-v always take two register to compare
+#[derive(Clone, Copy, Debug)]
+pub struct IntegerCompare {
+    pub(crate) kind: IntCC,
+    pub(crate) rs1: Reg,
+    pub(crate) rs2: Reg,
+}
+
+pub(crate) enum BranchFunct3 {
+    // ==
+    Eq,
+    // !=
+    Ne,
+    // signed <
+    Lt,
+    // signed >=
+    Ge,
+    // unsigned <
+    Ltu,
+    // unsigned >=
+    Geu,
+}
+
+impl BranchFunct3 {
+    pub(crate) fn funct3(self) -> u32 {
+        match self {
+            BranchFunct3::Eq => 0b000,
+            BranchFunct3::Ne => 0b001,
+            BranchFunct3::Lt => 0b100,
+            BranchFunct3::Ge => 0b101,
+            BranchFunct3::Ltu => 0b110,
+            BranchFunct3::Geu => 0b111,
+        }
+    }
+    pub(crate) fn op_name(self) -> &'static str {
+        match self {
+            BranchFunct3::Eq => "eq",
+            BranchFunct3::Ne => "ne",
+            BranchFunct3::Lt => "lt",
+            BranchFunct3::Ge => "ge",
+            BranchFunct3::Ltu => "ltu",
+            BranchFunct3::Geu => "geu",
+        }
+    }
+}
+impl IntegerCompare {
+    pub(crate) fn op_code(self) -> u32 {
+        0b1100011
+    }
+
+    // funct3 and if need inverse the register
+    pub(crate) fn funct3(&self) -> (BranchFunct3, bool) {
+        match self.kind {
+            IntCC::Equal => (BranchFunct3::Eq, false),
+            IntCC::NotEqual => (BranchFunct3::Ne, false),
+            IntCC::SignedLessThan => (BranchFunct3::Lt, false),
+            IntCC::SignedGreaterThanOrEqual => (BranchFunct3::Ge, false),
+
+            IntCC::SignedGreaterThan => (BranchFunct3::Lt, true),
+            IntCC::SignedLessThanOrEqual => (BranchFunct3::Ge, true),
+
+            IntCC::UnsignedLessThan => (BranchFunct3::Ltu, false),
+            IntCC::UnsignedGreaterThanOrEqual => (BranchFunct3::Geu, false),
+
+            IntCC::UnsignedGreaterThan => (BranchFunct3::Ltu, true),
+            IntCC::UnsignedLessThanOrEqual => (BranchFunct3::Geu, true),
+        }
+    }
+
+    #[inline]
+    pub(crate) fn op_name(&self) -> &'static str {
+        match self.kind {
+            IntCC::Equal => "EQ",
+            IntCC::NotEqual => "bne",
+            IntCC::SignedLessThan => "SLT",
+            IntCC::SignedGreaterThanOrEqual => "bge",
+            IntCC::SignedGreaterThan => "bgt",
+            IntCC::SignedLessThanOrEqual => "ble",
+            IntCC::UnsignedLessThan => "LT",
+            IntCC::UnsignedGreaterThanOrEqual => "bgeu",
+            IntCC::UnsignedGreaterThan => "bgtu",
+            IntCC::UnsignedLessThanOrEqual => "bleu",
+        }
+    }
+
+    pub(crate) fn emit(self) -> u32 {
+        let (funct3, reverse) = self.funct3();
+        let (rs1, rs2) = if reverse {
+            (self.rs2, self.rs1)
+        } else {
+            (self.rs1, self.rs2)
+        };
+
+        self.op_code()
+            | funct3.funct3() << 12
+            | reg_to_gpr_num(rs1) << 15
+            | reg_to_gpr_num(rs2) << 20
+    }
+
+    pub(crate) fn inverse(self) -> Self {
+        Self {
+            kind: self.kind.complement(),
+            ..self
+        }
+    }
+}
+
+impl FpuOPRRRR {
+    pub(crate) fn op_name(self) -> &'static str {
+        match self {
+            Self::FmaddS => "fmadd.s",
+            Self::FmsubS => "fmsub.s",
+            Self::FnmsubS => "fnmsub.s",
+            Self::FnmaddS => "fnmadd.s",
+            Self::FmaddD => "fmadd.d",
+            Self::FmsubD => "fmsub.d",
+            Self::FnmsubD => "fnmsub.d",
+            Self::FnmaddD => "fnmadd.d",
+        }
+    }
+
+    pub(crate) fn funct2(self) -> u32 {
+        match self {
+            FpuOPRRRR::FmaddS | FpuOPRRRR::FmsubS | FpuOPRRRR::FnmsubS | FpuOPRRRR::FnmaddS => 0,
+            FpuOPRRRR::FmaddD | FpuOPRRRR::FmsubD | FpuOPRRRR::FnmsubD | FpuOPRRRR::FnmaddD => 1,
+        }
+    }
+
+    pub(crate) fn funct3(self, rounding_mode: Option<FRM>) -> u32 {
+        rounding_mode.unwrap_or_default().as_u32()
+    }
+
+    pub(crate) fn op_code(self) -> u32 {
+        match self {
+            FpuOPRRRR::FmaddS => 0b1000011,
+            FpuOPRRRR::FmsubS => 0b1000111,
+            FpuOPRRRR::FnmsubS => 0b1001011,
+            FpuOPRRRR::FnmaddS => 0b1001111,
+            FpuOPRRRR::FmaddD => 0b1000011,
+            FpuOPRRRR::FmsubD => 0b1000111,
+            FpuOPRRRR::FnmsubD => 0b1001011,
+            FpuOPRRRR::FnmaddD => 0b1001111,
+        }
+    }
+}
+
+impl FpuOPRR {
+    pub(crate) fn op_name(self) -> &'static str {
+        match self {
+            Self::FsqrtS => "fsqrt.s",
+            Self::FcvtWS => "fcvt.w.s",
+            Self::FcvtWuS => "fcvt.wu.s",
+            Self::FmvXW => "fmv.x.w",
+            Self::FclassS => "fclass.s",
+            Self::FcvtSw => "fcvt.s.w",
+            Self::FcvtSwU => "fcvt.s.wu",
+            Self::FmvWX => "fmv.w.x",
+            Self::FcvtLS => "fcvt.l.s",
+            Self::FcvtLuS => "fcvt.lu.s",
+            Self::FcvtSL => "fcvt.s.l",
+            Self::FcvtSLU => "fcvt.s.lu",
+            Self::FcvtLD => "fcvt.l.d",
+            Self::FcvtLuD => "fcvt.lu.d",
+            Self::FmvXD => "fmv.x.d",
+            Self::FcvtDL => "fcvt.d.l",
+            Self::FcvtDLu => "fcvt.d.lu",
+            Self::FmvDX => "fmv.d.x",
+            Self::FsqrtD => "fsqrt.d",
+            Self::FcvtSD => "fcvt.s.d",
+            Self::FcvtDS => "fcvt.d.s",
+            Self::FclassD => "fclass.d",
+            Self::FcvtWD => "fcvt.w.d",
+            Self::FcvtWuD => "fcvt.wu.d",
+            Self::FcvtDW => "fcvt.d.w",
+            Self::FcvtDWU => "fcvt.d.wu",
+        }
+    }
+
+    pub(crate) fn is_convert_to_int(self) -> bool {
+        match self {
+            Self::FcvtWS
+            | Self::FcvtWuS
+            | Self::FcvtLS
+            | Self::FcvtLuS
+            | Self::FcvtWD
+            | Self::FcvtWuD
+            | Self::FcvtLD
+            | Self::FcvtLuD => true,
+            _ => false,
+        }
+    }
+    // move from x register to float register.
+    pub(crate) fn move_x_to_f_op(ty: Type) -> Self {
+        match ty {
+            F32 => Self::FmvWX,
+            F64 => Self::FmvDX,
+            _ => unreachable!("ty:{:?}", ty),
+        }
+    }
+
+    // move from f register to x register.
+    pub(crate) fn move_f_to_x_op(ty: Type) -> Self {
+        match ty {
+            F32 => Self::FmvXW,
+            F64 => Self::FmvXD,
+            _ => unreachable!("ty:{:?}", ty),
+        }
+    }
+
+    pub(crate) fn float_convert_2_int_op(from: Type, is_type_signed: bool, to: Type) -> Self {
+        let type_32 = to.bits() <= 32;
+        match from {
+            F32 => {
+                if is_type_signed {
+                    if type_32 {
+                        Self::FcvtWS
+                    } else {
+                        Self::FcvtLS
+                    }
+                } else {
+                    if type_32 {
+                        Self::FcvtWuS
+                    } else {
+                        Self::FcvtLuS
+                    }
+                }
+            }
+            F64 => {
+                if is_type_signed {
+                    if type_32 {
+                        Self::FcvtWD
+                    } else {
+                        Self::FcvtLD
+                    }
+                } else {
+                    if type_32 {
+                        Self::FcvtWuD
+                    } else {
+                        Self::FcvtLuD
+                    }
+                }
+            }
+            _ => unreachable!("from type:{}", from),
+        }
+    }
+
+    pub(crate) fn int_convert_2_float_op(from: Type, is_type_signed: bool, to: Type) -> Self {
+        let type_32 = from.bits() == 32;
+        match to {
+            F32 => {
+                if is_type_signed {
+                    if type_32 {
+                        Self::FcvtSw
+                    } else {
+                        Self::FcvtSL
+                    }
+                } else {
+                    if type_32 {
+                        Self::FcvtSwU
+                    } else {
+                        Self::FcvtSLU
+                    }
+                }
+            }
+            F64 => {
+                if is_type_signed {
+                    if type_32 {
+                        Self::FcvtDW
+                    } else {
+                        Self::FcvtDL
+                    }
+                } else {
+                    if type_32 {
+                        Self::FcvtDWU
+                    } else {
+                        Self::FcvtDLu
+                    }
+                }
+            }
+            _ => unreachable!("to type:{}", to),
+        }
+    }
+
+    pub(crate) fn op_code(self) -> u32 {
+        match self {
+            FpuOPRR::FsqrtS
+            | FpuOPRR::FcvtWS
+            | FpuOPRR::FcvtWuS
+            | FpuOPRR::FmvXW
+            | FpuOPRR::FclassS
+            | FpuOPRR::FcvtSw
+            | FpuOPRR::FcvtSwU
+            | FpuOPRR::FmvWX => 0b1010011,
+
+            FpuOPRR::FcvtLS | FpuOPRR::FcvtLuS | FpuOPRR::FcvtSL | FpuOPRR::FcvtSLU => 0b1010011,
+
+            FpuOPRR::FcvtLD
+            | FpuOPRR::FcvtLuD
+            | FpuOPRR::FmvXD
+            | FpuOPRR::FcvtDL
+            | FpuOPRR::FcvtDLu
+            | FpuOPRR::FmvDX => 0b1010011,
+
+            FpuOPRR::FsqrtD
+            | FpuOPRR::FcvtSD
+            | FpuOPRR::FcvtDS
+            | FpuOPRR::FclassD
+            | FpuOPRR::FcvtWD
+            | FpuOPRR::FcvtWuD
+            | FpuOPRR::FcvtDW
+            | FpuOPRR::FcvtDWU => 0b1010011,
+        }
+    }
+
+    pub(crate) fn rs2_funct5(self) -> u32 {
+        match self {
+            FpuOPRR::FsqrtS => 0b00000,
+            FpuOPRR::FcvtWS => 0b00000,
+            FpuOPRR::FcvtWuS => 0b00001,
+            FpuOPRR::FmvXW => 0b00000,
+            FpuOPRR::FclassS => 0b00000,
+            FpuOPRR::FcvtSw => 0b00000,
+            FpuOPRR::FcvtSwU => 0b00001,
+            FpuOPRR::FmvWX => 0b00000,
+            FpuOPRR::FcvtLS => 0b00010,
+            FpuOPRR::FcvtLuS => 0b00011,
+            FpuOPRR::FcvtSL => 0b00010,
+            FpuOPRR::FcvtSLU => 0b00011,
+            FpuOPRR::FcvtLD => 0b00010,
+            FpuOPRR::FcvtLuD => 0b00011,
+            FpuOPRR::FmvXD => 0b00000,
+            FpuOPRR::FcvtDL => 0b00010,
+            FpuOPRR::FcvtDLu => 0b00011,
+            FpuOPRR::FmvDX => 0b00000,
+            FpuOPRR::FcvtSD => 0b00001,
+            FpuOPRR::FcvtDS => 0b00000,
+            FpuOPRR::FclassD => 0b00000,
+            FpuOPRR::FcvtWD => 0b00000,
+            FpuOPRR::FcvtWuD => 0b00001,
+            FpuOPRR::FcvtDW => 0b00000,
+            FpuOPRR::FcvtDWU => 0b00001,
+            FpuOPRR::FsqrtD => 0b00000,
+        }
+    }
+    pub(crate) fn funct7(self) -> u32 {
+        match self {
+            FpuOPRR::FsqrtS => 0b0101100,
+            FpuOPRR::FcvtWS => 0b1100000,
+            FpuOPRR::FcvtWuS => 0b1100000,
+            FpuOPRR::FmvXW => 0b1110000,
+            FpuOPRR::FclassS => 0b1110000,
+            FpuOPRR::FcvtSw => 0b1101000,
+            FpuOPRR::FcvtSwU => 0b1101000,
+            FpuOPRR::FmvWX => 0b1111000,
+            FpuOPRR::FcvtLS => 0b1100000,
+            FpuOPRR::FcvtLuS => 0b1100000,
+            FpuOPRR::FcvtSL => 0b1101000,
+            FpuOPRR::FcvtSLU => 0b1101000,
+            FpuOPRR::FcvtLD => 0b1100001,
+            FpuOPRR::FcvtLuD => 0b1100001,
+            FpuOPRR::FmvXD => 0b1110001,
+            FpuOPRR::FcvtDL => 0b1101001,
+            FpuOPRR::FcvtDLu => 0b1101001,
+            FpuOPRR::FmvDX => 0b1111001,
+            FpuOPRR::FcvtSD => 0b0100000,
+            FpuOPRR::FcvtDS => 0b0100001,
+            FpuOPRR::FclassD => 0b1110001,
+            FpuOPRR::FcvtWD => 0b1100001,
+            FpuOPRR::FcvtWuD => 0b1100001,
+            FpuOPRR::FcvtDW => 0b1101001,
+            FpuOPRR::FcvtDWU => 0b1101001,
+            FpuOPRR::FsqrtD => 0b0101101,
+        }
+    }
+
+    pub(crate) fn funct3(self, rounding_mode: Option<FRM>) -> u32 {
+        let rounding_mode = rounding_mode.unwrap_or_default().as_u32();
+        match self {
+            FpuOPRR::FsqrtS => rounding_mode,
+            FpuOPRR::FcvtWS => rounding_mode,
+            FpuOPRR::FcvtWuS => rounding_mode,
+            FpuOPRR::FmvXW => 0b000,
+            FpuOPRR::FclassS => 0b001,
+            FpuOPRR::FcvtSw => rounding_mode,
+            FpuOPRR::FcvtSwU => rounding_mode,
+            FpuOPRR::FmvWX => 0b000,
+            FpuOPRR::FcvtLS => rounding_mode,
+            FpuOPRR::FcvtLuS => rounding_mode,
+            FpuOPRR::FcvtSL => rounding_mode,
+            FpuOPRR::FcvtSLU => rounding_mode,
+            FpuOPRR::FcvtLD => rounding_mode,
+            FpuOPRR::FcvtLuD => rounding_mode,
+            FpuOPRR::FmvXD => 0b000,
+            FpuOPRR::FcvtDL => rounding_mode,
+            FpuOPRR::FcvtDLu => rounding_mode,
+            FpuOPRR::FmvDX => 0b000,
+            FpuOPRR::FcvtSD => rounding_mode,
+            FpuOPRR::FcvtDS => rounding_mode,
+            FpuOPRR::FclassD => 0b001,
+            FpuOPRR::FcvtWD => rounding_mode,
+            FpuOPRR::FcvtWuD => rounding_mode,
+            FpuOPRR::FcvtDW => rounding_mode,
+            FpuOPRR::FcvtDWU => 0b000,
+            FpuOPRR::FsqrtD => rounding_mode,
+        }
+    }
+}
+
+impl FpuOPRRR {
+    pub(crate) const fn op_name(self) -> &'static str {
+        match self {
+            Self::FaddS => "fadd.s",
+            Self::FsubS => "fsub.s",
+            Self::FmulS => "fmul.s",
+            Self::FdivS => "fdiv.s",
+            Self::FsgnjS => "fsgnj.s",
+            Self::FsgnjnS => "fsgnjn.s",
+            Self::FsgnjxS => "fsgnjx.s",
+            Self::FminS => "fmin.s",
+            Self::FmaxS => "fmax.s",
+            Self::FeqS => "feq.s",
+            Self::FltS => "flt.s",
+            Self::FleS => "fle.s",
+            Self::FaddD => "fadd.d",
+            Self::FsubD => "fsub.d",
+            Self::FmulD => "fmul.d",
+            Self::FdivD => "fdiv.d",
+            Self::FsgnjD => "fsgnj.d",
+            Self::FsgnjnD => "fsgnjn.d",
+            Self::FsgnjxD => "fsgnjx.d",
+            Self::FminD => "fmin.d",
+            Self::FmaxD => "fmax.d",
+            Self::FeqD => "feq.d",
+            Self::FltD => "flt.d",
+            Self::FleD => "fle.d",
+        }
+    }
+
+    pub fn funct3(self, rounding_mode: Option<FRM>) -> u32 {
+        let rounding_mode = rounding_mode.unwrap_or_default();
+        let rounding_mode = rounding_mode.as_u32();
+        match self {
+            Self::FaddS => rounding_mode,
+            Self::FsubS => rounding_mode,
+            Self::FmulS => rounding_mode,
+            Self::FdivS => rounding_mode,
+
+            Self::FsgnjS => 0b000,
+            Self::FsgnjnS => 0b001,
+            Self::FsgnjxS => 0b010,
+            Self::FminS => 0b000,
+            Self::FmaxS => 0b001,
+
+            Self::FeqS => 0b010,
+            Self::FltS => 0b001,
+            Self::FleS => 0b000,
+
+            Self::FaddD => rounding_mode,
+            Self::FsubD => rounding_mode,
+            Self::FmulD => rounding_mode,
+            Self::FdivD => rounding_mode,
+
+            Self::FsgnjD => 0b000,
+            Self::FsgnjnD => 0b001,
+            Self::FsgnjxD => 0b010,
+            Self::FminD => 0b000,
+            Self::FmaxD => 0b001,
+            Self::FeqD => 0b010,
+            Self::FltD => 0b001,
+            Self::FleD => 0b000,
+        }
+    }
+
+    pub fn op_code(self) -> u32 {
+        match self {
+            Self::FaddS
+            | Self::FsubS
+            | Self::FmulS
+            | Self::FdivS
+            | Self::FsgnjS
+            | Self::FsgnjnS
+            | Self::FsgnjxS
+            | Self::FminS
+            | Self::FmaxS
+            | Self::FeqS
+            | Self::FltS
+            | Self::FleS => 0b1010011,
+
+            Self::FaddD
+            | Self::FsubD
+            | Self::FmulD
+            | Self::FdivD
+            | Self::FsgnjD
+            | Self::FsgnjnD
+            | Self::FsgnjxD
+            | Self::FminD
+            | Self::FmaxD
+            | Self::FeqD
+            | Self::FltD
+            | Self::FleD => 0b1010011,
+        }
+    }
+
+    pub const fn funct7(self) -> u32 {
+        match self {
+            Self::FaddS => 0b0000000,
+            Self::FsubS => 0b0000100,
+            Self::FmulS => 0b0001000,
+            Self::FdivS => 0b0001100,
+
+            Self::FsgnjS => 0b0010000,
+            Self::FsgnjnS => 0b0010000,
+            Self::FsgnjxS => 0b0010000,
+            Self::FminS => 0b0010100,
+            Self::FmaxS => 0b0010100,
+            Self::FeqS => 0b1010000,
+            Self::FltS => 0b1010000,
+            Self::FleS => 0b1010000,
+
+            Self::FaddD => 0b0000001,
+            Self::FsubD => 0b0000101,
+            Self::FmulD => 0b0001001,
+            Self::FdivD => 0b0001101,
+            Self::FsgnjD => 0b0010001,
+            Self::FsgnjnD => 0b0010001,
+            Self::FsgnjxD => 0b0010001,
+            Self::FminD => 0b0010101,
+            Self::FmaxD => 0b0010101,
+            Self::FeqD => 0b1010001,
+            Self::FltD => 0b1010001,
+            Self::FleD => 0b1010001,
+        }
+    }
+    pub fn is_32(self) -> bool {
+        match self {
+            Self::FaddS
+            | Self::FsubS
+            | Self::FmulS
+            | Self::FdivS
+            | Self::FsgnjS
+            | Self::FsgnjnS
+            | Self::FsgnjxS
+            | Self::FminS
+            | Self::FmaxS
+            | Self::FeqS
+            | Self::FltS
+            | Self::FleS => true,
+            _ => false,
+        }
+    }
+
+    pub fn is_copy_sign(self) -> bool {
+        match self {
+            Self::FsgnjD | Self::FsgnjS => true,
+            _ => false,
+        }
+    }
+
+    pub fn is_copy_neg_sign(self) -> bool {
+        match self {
+            Self::FsgnjnD | Self::FsgnjnS => true,
+            _ => false,
+        }
+    }
+    pub fn is_copy_xor_sign(self) -> bool {
+        match self {
+            Self::FsgnjxS | Self::FsgnjxD => true,
+            _ => false,
+        }
+    }
+}
+impl AluOPRRR {
+    pub(crate) const fn op_name(self) -> &'static str {
+        match self {
+            Self::Add => "ADD",
+            Self::Sub => "SUB",
+            Self::Sll => "sll",
+            Self::Slt => "slt",
+            Self::Sgt => "sgt",
+            Self::SltU => "sltu",
+            Self::Sgtu => "sgtu",
+            Self::Xor => "xor",
+            Self::Srl => "srl",
+            Self::Sra => "sra",
+            Self::Or => "or",
+            Self::And => "and",
+            Self::Addw => "ADD",
+            Self::Subw => "SUB",
+            Self::Sllw => "sllw",
+            Self::Srlw => "srlw",
+            Self::Sraw => "sraw",
+            Self::Mul => "mul",
+            Self::Mulh => "mulh",
+            Self::Mulhsu => "mulhsu",
+            Self::Mulhu => "mulhu",
+            Self::Div => "div",
+            Self::DivU => "divu",
+            Self::Rem => "rem",
+            Self::RemU => "remu",
+            Self::Mulw => "mulw",
+            Self::Divw => "divw",
+            Self::Divuw => "divuw",
+            Self::Remw => "remw",
+            Self::Remuw => "remuw",
+            Self::Adduw => "add.uw",
+            Self::Andn => "andn",
+            Self::Bclr => "bclr",
+            Self::Bext => "bext",
+            Self::Binv => "binv",
+            Self::Bset => "bset",
+            Self::Clmul => "clmul",
+            Self::Clmulh => "clmulh",
+            Self::Clmulr => "clmulr",
+            Self::Max => "max",
+            Self::Maxu => "maxu",
+            Self::Min => "min",
+            Self::Minu => "minu",
+            Self::Orn => "orn",
+            Self::Rol => "rol",
+            Self::Rolw => "rolw",
+            Self::Ror => "ror",
+            Self::Rorw => "rorw",
+            Self::Sh1add => "sh1add",
+            Self::Sh1adduw => "sh1add.uw",
+            Self::Sh2add => "sh2add",
+            Self::Sh2adduw => "sh2add.uw",
+            Self::Sh3add => "sh3add",
+            Self::Sh3adduw => "sh3add.uw",
+            Self::Xnor => "xnor",
+            Self::Pack => "pack",
+            Self::Packw => "packw",
+            Self::Packh => "packh",
+        }
+    }
+
+    pub fn funct3(self) -> u32 {
+        match self {
+            AluOPRRR::Add => 0b000,
+            AluOPRRR::Sll => 0b001,
+            AluOPRRR::Slt => 0b010,
+            AluOPRRR::Sgt => 0b010,
+            AluOPRRR::SltU => 0b011,
+            AluOPRRR::Sgtu => 0b011,
+            AluOPRRR::Xor => 0b100,
+            AluOPRRR::Srl => 0b101,
+            AluOPRRR::Sra => 0b101,
+            AluOPRRR::Or => 0b110,
+            AluOPRRR::And => 0b111,
+            AluOPRRR::Sub => 0b000,
+
+            AluOPRRR::Addw => 0b000,
+            AluOPRRR::Subw => 0b000,
+            AluOPRRR::Sllw => 0b001,
+            AluOPRRR::Srlw => 0b101,
+            AluOPRRR::Sraw => 0b101,
+
+            AluOPRRR::Mul => 0b000,
+            AluOPRRR::Mulh => 0b001,
+            AluOPRRR::Mulhsu => 0b010,
+            AluOPRRR::Mulhu => 0b011,
+            AluOPRRR::Div => 0b100,
+            AluOPRRR::DivU => 0b101,
+            AluOPRRR::Rem => 0b110,
+            AluOPRRR::RemU => 0b111,
+
+            AluOPRRR::Mulw => 0b000,
+            AluOPRRR::Divw => 0b100,
+            AluOPRRR::Divuw => 0b101,
+            AluOPRRR::Remw => 0b110,
+            AluOPRRR::Remuw => 0b111,
+
+            // Zbb
+            AluOPRRR::Adduw => 0b000,
+            AluOPRRR::Andn => 0b111,
+            AluOPRRR::Bclr => 0b001,
+            AluOPRRR::Bext => 0b101,
+            AluOPRRR::Binv => 0b001,
+            AluOPRRR::Bset => 0b001,
+            AluOPRRR::Clmul => 0b001,
+            AluOPRRR::Clmulh => 0b011,
+            AluOPRRR::Clmulr => 0b010,
+            AluOPRRR::Max => 0b110,
+            AluOPRRR::Maxu => 0b111,
+            AluOPRRR::Min => 0b100,
+            AluOPRRR::Minu => 0b101,
+            AluOPRRR::Orn => 0b110,
+            AluOPRRR::Rol => 0b001,
+            AluOPRRR::Rolw => 0b001,
+            AluOPRRR::Ror => 0b101,
+            AluOPRRR::Rorw => 0b101,
+            AluOPRRR::Sh1add => 0b010,
+            AluOPRRR::Sh1adduw => 0b010,
+            AluOPRRR::Sh2add => 0b100,
+            AluOPRRR::Sh2adduw => 0b100,
+            AluOPRRR::Sh3add => 0b110,
+            AluOPRRR::Sh3adduw => 0b110,
+            AluOPRRR::Xnor => 0b100,
+
+            // Zbkb
+            AluOPRRR::Pack => 0b100,
+            AluOPRRR::Packw => 0b100,
+            AluOPRRR::Packh => 0b111,
+        }
+    }
+
+    pub fn op_code(self) -> u32 {
+        match self {
+            AluOPRRR::Add
+            | AluOPRRR::Sub
+            | AluOPRRR::Sll
+            | AluOPRRR::Slt
+            | AluOPRRR::Sgt
+            | AluOPRRR::SltU
+            | AluOPRRR::Sgtu
+            | AluOPRRR::Xor
+            | AluOPRRR::Srl
+            | AluOPRRR::Sra
+            | AluOPRRR::Or
+            | AluOPRRR::And
+            | AluOPRRR::Pack
+            | AluOPRRR::Packh => 0b0110011,
+
+            AluOPRRR::Addw
+            | AluOPRRR::Subw
+            | AluOPRRR::Sllw
+            | AluOPRRR::Srlw
+            | AluOPRRR::Sraw
+            | AluOPRRR::Packw => 0b0111011,
+
+            AluOPRRR::Mul
+            | AluOPRRR::Mulh
+            | AluOPRRR::Mulhsu
+            | AluOPRRR::Mulhu
+            | AluOPRRR::Div
+            | AluOPRRR::DivU
+            | AluOPRRR::Rem
+            | AluOPRRR::RemU => 0b0110011,
+
+            AluOPRRR::Mulw
+            | AluOPRRR::Divw
+            | AluOPRRR::Divuw
+            | AluOPRRR::Remw
+            | AluOPRRR::Remuw => 0b0111011,
+
+            AluOPRRR::Adduw => 0b0111011,
+            AluOPRRR::Andn
+            | AluOPRRR::Bclr
+            | AluOPRRR::Bext
+            | AluOPRRR::Binv
+            | AluOPRRR::Bset
+            | AluOPRRR::Clmul
+            | AluOPRRR::Clmulh
+            | AluOPRRR::Clmulr
+            | AluOPRRR::Max
+            | AluOPRRR::Maxu
+            | AluOPRRR::Min
+            | AluOPRRR::Minu
+            | AluOPRRR::Orn
+            | AluOPRRR::Rol
+            | AluOPRRR::Ror
+            | AluOPRRR::Sh1add
+            | AluOPRRR::Sh2add
+            | AluOPRRR::Sh3add
+            | AluOPRRR::Xnor => 0b0110011,
+
+            AluOPRRR::Rolw
+            | AluOPRRR::Rorw
+            | AluOPRRR::Sh2adduw
+            | AluOPRRR::Sh3adduw
+            | AluOPRRR::Sh1adduw => 0b0111011,
+        }
+    }
+
+    pub const fn funct7(self) -> u32 {
+        match self {
+            AluOPRRR::Add => 0b0000000,
+            AluOPRRR::Sub => 0b0100000,
+            AluOPRRR::Sll => 0b0000000,
+            AluOPRRR::Slt => 0b0000000,
+            AluOPRRR::Sgt => 0b0000000,
+            AluOPRRR::SltU => 0b0000000,
+            AluOPRRR::Sgtu => 0b0000000,
+
+            AluOPRRR::Xor => 0b0000000,
+            AluOPRRR::Srl => 0b0000000,
+            AluOPRRR::Sra => 0b0100000,
+            AluOPRRR::Or => 0b0000000,
+            AluOPRRR::And => 0b0000000,
+
+            AluOPRRR::Addw => 0b0000000,
+            AluOPRRR::Subw => 0b0100000,
+            AluOPRRR::Sllw => 0b0000000,
+            AluOPRRR::Srlw => 0b0000000,
+            AluOPRRR::Sraw => 0b0100000,
+
+            AluOPRRR::Mul => 0b0000001,
+            AluOPRRR::Mulh => 0b0000001,
+            AluOPRRR::Mulhsu => 0b0000001,
+            AluOPRRR::Mulhu => 0b0000001,
+            AluOPRRR::Div => 0b0000001,
+            AluOPRRR::DivU => 0b0000001,
+            AluOPRRR::Rem => 0b0000001,
+            AluOPRRR::RemU => 0b0000001,
+
+            AluOPRRR::Mulw => 0b0000001,
+            AluOPRRR::Divw => 0b0000001,
+            AluOPRRR::Divuw => 0b0000001,
+            AluOPRRR::Remw => 0b0000001,
+            AluOPRRR::Remuw => 0b0000001,
+            AluOPRRR::Adduw => 0b0000100,
+            AluOPRRR::Andn => 0b0100000,
+            AluOPRRR::Bclr => 0b0100100,
+            AluOPRRR::Bext => 0b0100100,
+            AluOPRRR::Binv => 0b0110100,
+            AluOPRRR::Bset => 0b0010100,
+            AluOPRRR::Clmul => 0b0000101,
+            AluOPRRR::Clmulh => 0b0000101,
+            AluOPRRR::Clmulr => 0b0000101,
+            AluOPRRR::Max => 0b0000101,
+            AluOPRRR::Maxu => 0b0000101,
+            AluOPRRR::Min => 0b0000101,
+            AluOPRRR::Minu => 0b0000101,
+            AluOPRRR::Orn => 0b0100000,
+            AluOPRRR::Rol => 0b0110000,
+            AluOPRRR::Rolw => 0b0110000,
+            AluOPRRR::Ror => 0b0110000,
+            AluOPRRR::Rorw => 0b0110000,
+            AluOPRRR::Sh1add => 0b0010000,
+            AluOPRRR::Sh1adduw => 0b0010000,
+            AluOPRRR::Sh2add => 0b0010000,
+            AluOPRRR::Sh2adduw => 0b0010000,
+            AluOPRRR::Sh3add => 0b0010000,
+            AluOPRRR::Sh3adduw => 0b0010000,
+            AluOPRRR::Xnor => 0b0100000,
+
+            // Zbkb
+            AluOPRRR::Pack => 0b0000100,
+            AluOPRRR::Packw => 0b0000100,
+            AluOPRRR::Packh => 0b0000100,
+        }
+    }
+
+    pub(crate) fn reverse_rs(self) -> bool {
+        // special case.
+        // sgt and sgtu is not defined in isa.
+        // emit should reverse rs1 and rs2.
+        self == AluOPRRR::Sgt || self == AluOPRRR::Sgtu
+    }
+}
+
+impl AluOPRRI {
+    pub(crate) fn option_funct6(self) -> Option<u32> {
+        let x: Option<u32> = match self {
+            Self::Slli => Some(0b00_0000),
+            Self::Srli => Some(0b00_0000),
+            Self::Srai => Some(0b01_0000),
+            Self::Bclri => Some(0b010010),
+            Self::Bexti => Some(0b010010),
+            Self::Binvi => Some(0b011010),
+            Self::Bseti => Some(0b001010),
+            Self::Rori => Some(0b011000),
+            Self::SlliUw => Some(0b000010),
+            _ => None,
+        };
+        x
+    }
+
+    pub(crate) fn option_funct7(self) -> Option<u32> {
+        let x = match self {
+            Self::Slliw => Some(0b000_0000),
+            Self::SrliW => Some(0b000_0000),
+            Self::Sraiw => Some(0b010_0000),
+            Self::Roriw => Some(0b0110000),
+            _ => None,
+        };
+        x
+    }
+
+    pub(crate) fn imm12(self, imm12: Imm12) -> u32 {
+        let x = imm12.as_u32();
+        if let Some(func) = self.option_funct6() {
+            func << 6 | (x & 0b11_1111)
+        } else if let Some(func) = self.option_funct7() {
+            func << 5 | (x & 0b1_1111)
+        } else if let Some(func) = self.option_funct12() {
+            func
+        } else {
+            x
+        }
+    }
+
+    pub(crate) fn option_funct12(self) -> Option<u32> {
+        match self {
+            Self::Clz => Some(0b011000000000),
+            Self::Clzw => Some(0b011000000000),
+            Self::Cpop => Some(0b011000000010),
+            Self::Cpopw => Some(0b011000000010),
+            Self::Ctz => Some(0b011000000001),
+            Self::Ctzw => Some(0b011000000001),
+            Self::Rev8 => Some(0b011010111000),
+            Self::Sextb => Some(0b011000000100),
+            Self::Sexth => Some(0b011000000101),
+            Self::Zexth => Some(0b000010000000),
+            Self::Orcb => Some(0b001010000111),
+            Self::Brev8 => Some(0b0110_1000_0111),
+            _ => None,
+        }
+    }
+
+    pub(crate) fn op_name(self) -> &'static str {
+        match self {
+            Self::Addi => "ADD",
+            Self::Slti => "slti",
+            Self::SltiU => "sltiu",
+            Self::Xori => "xori",
+            Self::Ori => "ori",
+            Self::Andi => "andi",
+            Self::Slli => "slli",
+            Self::Srli => "srli",
+            Self::Srai => "srai",
+            Self::Addiw => "addiw",
+            Self::Slliw => "slliw",
+            Self::SrliW => "srliw",
+            Self::Sraiw => "sraiw",
+            Self::Bclri => "bclri",
+            Self::Bexti => "bexti",
+            Self::Binvi => "binvi",
+            Self::Bseti => "bseti",
+            Self::Rori => "rori",
+            Self::Roriw => "roriw",
+            Self::SlliUw => "slli.uw",
+            Self::Clz => "clz",
+            Self::Clzw => "clzw",
+            Self::Cpop => "cpop",
+            Self::Cpopw => "cpopw",
+            Self::Ctz => "ctz",
+            Self::Ctzw => "ctzw",
+            Self::Rev8 => "rev8",
+            Self::Sextb => "sext.b",
+            Self::Sexth => "sext.h",
+            Self::Zexth => "zext.h",
+            Self::Orcb => "orc.b",
+            Self::Brev8 => "brev8",
+        }
+    }
+
+    pub fn funct3(self) -> u32 {
+        match self {
+            AluOPRRI::Addi => 0b000,
+            AluOPRRI::Slti => 0b010,
+            AluOPRRI::SltiU => 0b011,
+            AluOPRRI::Xori => 0b100,
+            AluOPRRI::Ori => 0b110,
+            AluOPRRI::Andi => 0b111,
+            AluOPRRI::Slli => 0b001,
+            AluOPRRI::Srli => 0b101,
+            AluOPRRI::Srai => 0b101,
+            AluOPRRI::Addiw => 0b000,
+            AluOPRRI::Slliw => 0b001,
+            AluOPRRI::SrliW => 0b101,
+            AluOPRRI::Sraiw => 0b101,
+            AluOPRRI::Bclri => 0b001,
+            AluOPRRI::Bexti => 0b101,
+            AluOPRRI::Binvi => 0b001,
+            AluOPRRI::Bseti => 0b001,
+            AluOPRRI::Rori => 0b101,
+            AluOPRRI::Roriw => 0b101,
+            AluOPRRI::SlliUw => 0b001,
+            AluOPRRI::Clz => 0b001,
+            AluOPRRI::Clzw => 0b001,
+            AluOPRRI::Cpop => 0b001,
+            AluOPRRI::Cpopw => 0b001,
+            AluOPRRI::Ctz => 0b001,
+            AluOPRRI::Ctzw => 0b001,
+            AluOPRRI::Rev8 => 0b101,
+            AluOPRRI::Sextb => 0b001,
+            AluOPRRI::Sexth => 0b001,
+            AluOPRRI::Zexth => 0b100,
+            AluOPRRI::Orcb => 0b101,
+            AluOPRRI::Brev8 => 0b101,
+        }
+    }
+
+    pub fn op_code(self) -> u32 {
+        match self {
+            AluOPRRI::Addi
+            | AluOPRRI::Slti
+            | AluOPRRI::SltiU
+            | AluOPRRI::Xori
+            | AluOPRRI::Ori
+            | AluOPRRI::Andi
+            | AluOPRRI::Slli
+            | AluOPRRI::Srli
+            | AluOPRRI::Srai
+            | AluOPRRI::Bclri
+            | AluOPRRI::Bexti
+            | AluOPRRI::Binvi
+            | AluOPRRI::Bseti
+            | AluOPRRI::Rori
+            | AluOPRRI::Clz
+            | AluOPRRI::Cpop
+            | AluOPRRI::Ctz
+            | AluOPRRI::Rev8
+            | AluOPRRI::Sextb
+            | AluOPRRI::Sexth
+            | AluOPRRI::Orcb
+            | AluOPRRI::Brev8 => 0b0010011,
+
+            AluOPRRI::Addiw
+            | AluOPRRI::Slliw
+            | AluOPRRI::SrliW
+            | AluOPRRI::Sraiw
+            | AluOPRRI::Roriw
+            | AluOPRRI::SlliUw
+            | AluOPRRI::Clzw
+            | AluOPRRI::Cpopw
+            | AluOPRRI::Ctzw => 0b0011011,
+            AluOPRRI::Zexth => 0b0111011,
+        }
+    }
+}
+
+impl Default for FRM {
+    fn default() -> Self {
+        Self::Fcsr
+    }
+}
+
+/// float rounding mode.
+impl FRM {
+    pub(crate) fn to_static_str(self) -> &'static str {
+        match self {
+            FRM::RNE => "rne",
+            FRM::RTZ => "rtz",
+            FRM::RDN => "rdn",
+            FRM::RUP => "rup",
+            FRM::RMM => "rmm",
+            FRM::Fcsr => "fcsr",
+        }
+    }
+
+    #[inline]
+    pub(crate) fn bits(self) -> u8 {
+        match self {
+            FRM::RNE => 0b000,
+            FRM::RTZ => 0b001,
+            FRM::RDN => 0b010,
+            FRM::RUP => 0b011,
+            FRM::RMM => 0b100,
+            FRM::Fcsr => 0b111,
+        }
+    }
+    pub(crate) fn as_u32(self) -> u32 {
+        self.bits() as u32
+    }
+}
+
+impl FFlagsException {
+    #[inline]
+    pub(crate) fn mask(self) -> u32 {
+        match self {
+            FFlagsException::NV => 1 << 4,
+            FFlagsException::DZ => 1 << 3,
+            FFlagsException::OF => 1 << 2,
+            FFlagsException::UF => 1 << 1,
+            FFlagsException::NX => 1 << 0,
+        }
+    }
+}
+
+impl LoadOP {
+    pub(crate) fn op_name(self) -> &'static str {
+        match self {
+            Self::Lb => "lb",
+            Self::Lh => "lh",
+            Self::Lw => "lw",
+            Self::Lbu => "lbu",
+            Self::Lhu => "lhu",
+            Self::Lwu => "lwu",
+            Self::Ld => "ld",
+            Self::Flw => "flw",
+            Self::Fld => "fld",
+        }
+    }
+
+    pub(crate) fn from_type(t: Type) -> Self {
+        if t.is_float() {
+            return if t == F32 { Self::Flw } else { Self::Fld };
+        }
+        match t {
+            R32 => Self::Lwu,
+            R64 | I64 => Self::Ld,
+
+            I8 => Self::Lb,
+            I16 => Self::Lh,
+            I32 => Self::Lw,
+            _ => unreachable!(),
+        }
+    }
+
+    pub(crate) fn op_code(self) -> u32 {
+        match self {
+            Self::Lb | Self::Lh | Self::Lw | Self::Lbu | Self::Lhu | Self::Lwu | Self::Ld => {
+                0b0000011
+            }
+            Self::Flw | Self::Fld => 0b0000111,
+        }
+    }
+    pub(crate) fn funct3(self) -> u32 {
+        match self {
+            Self::Lb => 0b000,
+            Self::Lh => 0b001,
+            Self::Lw => 0b010,
+            Self::Lwu => 0b110,
+            Self::Lbu => 0b100,
+            Self::Lhu => 0b101,
+            Self::Ld => 0b011,
+            Self::Flw => 0b010,
+            Self::Fld => 0b011,
+        }
+    }
+}
+
+impl StoreOP {
+    pub(crate) fn op_name(self) -> &'static str {
+        match self {
+            Self::Sb => "sb",
+            Self::Sh => "sh",
+            Self::Sw => "sw",
+            Self::Sd => "sd",
+            Self::Fsw => "fsw",
+            Self::Fsd => "fsd",
+        }
+    }
+    pub(crate) fn from_type(t: Type) -> Self {
+        if t.is_float() {
+            return if t == F32 { Self::Fsw } else { Self::Fsd };
+        }
+        match t.bits() {
+            1 | 8 => Self::Sb,
+            16 => Self::Sh,
+            32 => Self::Sw,
+            64 => Self::Sd,
+            _ => unreachable!(),
+        }
+    }
+    pub(crate) fn op_code(self) -> u32 {
+        match self {
+            Self::Sb | Self::Sh | Self::Sw | Self::Sd => 0b0100011,
+            Self::Fsw | Self::Fsd => 0b0100111,
+        }
+    }
+    pub(crate) fn funct3(self) -> u32 {
+        match self {
+            Self::Sb => 0b000,
+            Self::Sh => 0b001,
+            Self::Sw => 0b010,
+            Self::Sd => 0b011,
+            Self::Fsw => 0b010,
+            Self::Fsd => 0b011,
+        }
+    }
+}
+
+impl FClassResult {
+    pub(crate) const fn bit(self) -> u32 {
+        match self {
+            FClassResult::NegInfinite => 1 << 0,
+            FClassResult::NegNormal => 1 << 1,
+            FClassResult::NegSubNormal => 1 << 2,
+            FClassResult::NegZero => 1 << 3,
+            FClassResult::PosZero => 1 << 4,
+            FClassResult::PosSubNormal => 1 << 5,
+            FClassResult::PosNormal => 1 << 6,
+            FClassResult::PosInfinite => 1 << 7,
+            FClassResult::SNaN => 1 << 8,
+            FClassResult::QNaN => 1 << 9,
+        }
+    }
+
+    #[inline]
+    pub(crate) const fn is_nan_bits() -> u32 {
+        Self::SNaN.bit() | Self::QNaN.bit()
+    }
+    #[inline]
+    pub(crate) fn is_zero_bits() -> u32 {
+        Self::NegZero.bit() | Self::PosZero.bit()
+    }
+
+    #[inline]
+    pub(crate) fn is_infinite_bits() -> u32 {
+        Self::PosInfinite.bit() | Self::NegInfinite.bit()
+    }
+}
+
+impl AtomicOP {
+    #[inline]
+    pub(crate) fn is_load(self) -> bool {
+        match self {
+            Self::LrW | Self::LrD => true,
+            _ => false,
+        }
+    }
+
+    #[inline]
+    pub(crate) fn op_name(self, amo: AMO) -> String {
+        let s = match self {
+            Self::LrW => "lr.w",
+            Self::ScW => "sc.w",
+
+            Self::AmoswapW => "amoswap.w",
+            Self::AmoaddW => "amoadd.w",
+            Self::AmoxorW => "amoxor.w",
+            Self::AmoandW => "amoand.w",
+            Self::AmoorW => "amoor.w",
+            Self::AmominW => "amomin.w",
+            Self::AmomaxW => "amomax.w",
+            Self::AmominuW => "amominu.w",
+            Self::AmomaxuW => "amomaxu.w",
+            Self::LrD => "lr.d",
+            Self::ScD => "sc.d",
+            Self::AmoswapD => "amoswap.d",
+            Self::AmoaddD => "amoadd.d",
+            Self::AmoxorD => "amoxor.d",
+            Self::AmoandD => "amoand.d",
+            Self::AmoorD => "amoor.d",
+            Self::AmominD => "amomin.d",
+            Self::AmomaxD => "amomax.d",
+            Self::AmominuD => "amominu.d",
+            Self::AmomaxuD => "amomaxu.d",
+        };
+        format!("{}{}", s, amo.to_static_str())
+    }
+    #[inline]
+    pub(crate) fn op_code(self) -> u32 {
+        0b0101111
+    }
+
+    #[inline]
+    pub(crate) fn funct7(self, amo: AMO) -> u32 {
+        self.funct5() << 2 | amo.as_u32() & 0b11
+    }
+
+    pub(crate) fn funct3(self) -> u32 {
+        match self {
+            AtomicOP::LrW
+            | AtomicOP::ScW
+            | AtomicOP::AmoswapW
+            | AtomicOP::AmoaddW
+            | AtomicOP::AmoxorW
+            | AtomicOP::AmoandW
+            | AtomicOP::AmoorW
+            | AtomicOP::AmominW
+            | AtomicOP::AmomaxW
+            | AtomicOP::AmominuW
+            | AtomicOP::AmomaxuW => 0b010,
+            AtomicOP::LrD
+            | AtomicOP::ScD
+            | AtomicOP::AmoswapD
+            | AtomicOP::AmoaddD
+            | AtomicOP::AmoxorD
+            | AtomicOP::AmoandD
+            | AtomicOP::AmoorD
+            | AtomicOP::AmominD
+            | AtomicOP::AmomaxD
+            | AtomicOP::AmominuD
+            | AtomicOP::AmomaxuD => 0b011,
+        }
+    }
+    pub(crate) fn funct5(self) -> u32 {
+        match self {
+            AtomicOP::LrW => 0b00010,
+            AtomicOP::ScW => 0b00011,
+            AtomicOP::AmoswapW => 0b00001,
+            AtomicOP::AmoaddW => 0b00000,
+            AtomicOP::AmoxorW => 0b00100,
+            AtomicOP::AmoandW => 0b01100,
+            AtomicOP::AmoorW => 0b01000,
+            AtomicOP::AmominW => 0b10000,
+            AtomicOP::AmomaxW => 0b10100,
+            AtomicOP::AmominuW => 0b11000,
+            AtomicOP::AmomaxuW => 0b11100,
+            AtomicOP::LrD => 0b00010,
+            AtomicOP::ScD => 0b00011,
+            AtomicOP::AmoswapD => 0b00001,
+            AtomicOP::AmoaddD => 0b00000,
+            AtomicOP::AmoxorD => 0b00100,
+            AtomicOP::AmoandD => 0b01100,
+            AtomicOP::AmoorD => 0b01000,
+            AtomicOP::AmominD => 0b10000,
+            AtomicOP::AmomaxD => 0b10100,
+            AtomicOP::AmominuD => 0b11000,
+            AtomicOP::AmomaxuD => 0b11100,
+        }
+    }
+
+    pub(crate) fn load_op(t: Type) -> Self {
+        if t.bits() <= 32 {
+            Self::LrW
+        } else {
+            Self::LrD
+        }
+    }
+    pub(crate) fn store_op(t: Type) -> Self {
+        if t.bits() <= 32 {
+            Self::ScW
+        } else {
+            Self::ScD
+        }
+    }
+
+    /// extract
+    pub(crate) fn extract(rd: WritableReg, offset: Reg, rs: Reg, ty: Type) -> SmallInstVec<Inst> {
+        let mut insts = SmallInstVec::new();
+        insts.push(Inst::AluRRR {
+            alu_op: AluOPRRR::Srl,
+            rd: rd,
+            rs1: rs,
+            rs2: offset,
+        });
+        //
+        insts.push(Inst::Extend {
+            rd: rd,
+            rn: rd.to_reg(),
+            signed: false,
+            from_bits: ty.bits() as u8,
+            to_bits: 64,
+        });
+        insts
+    }
+
+    /// like extract but sign extend the value.
+    /// suitable for smax,etc.
+    pub(crate) fn extract_sext(
+        rd: WritableReg,
+        offset: Reg,
+        rs: Reg,
+        ty: Type,
+    ) -> SmallInstVec<Inst> {
+        let mut insts = SmallInstVec::new();
+        insts.push(Inst::AluRRR {
+            alu_op: AluOPRRR::Srl,
+            rd: rd,
+            rs1: rs,
+            rs2: offset,
+        });
+        //
+        insts.push(Inst::Extend {
+            rd: rd,
+            rn: rd.to_reg(),
+            signed: true,
+            from_bits: ty.bits() as u8,
+            to_bits: 64,
+        });
+        insts
+    }
+
+    pub(crate) fn unset(
+        rd: WritableReg,
+        tmp: WritableReg,
+        offset: Reg,
+        ty: Type,
+    ) -> SmallInstVec<Inst> {
+        assert!(rd != tmp);
+        let mut insts = SmallInstVec::new();
+        insts.extend(Inst::load_int_mask(tmp, ty));
+        insts.push(Inst::AluRRR {
+            alu_op: AluOPRRR::Sll,
+            rd: tmp,
+            rs1: tmp.to_reg(),
+            rs2: offset,
+        });
+        insts.push(Inst::construct_bit_not(tmp, tmp.to_reg()));
+        insts.push(Inst::AluRRR {
+            alu_op: AluOPRRR::And,
+            rd: rd,
+            rs1: rd.to_reg(),
+            rs2: tmp.to_reg(),
+        });
+        insts
+    }
+
+    pub(crate) fn set(
+        rd: WritableReg,
+        tmp: WritableReg,
+        offset: Reg,
+        rs: Reg,
+        ty: Type,
+    ) -> SmallInstVec<Inst> {
+        assert!(rd != tmp);
+        let mut insts = SmallInstVec::new();
+        // make rs into tmp.
+        insts.push(Inst::Extend {
+            rd: tmp,
+            rn: rs,
+            signed: false,
+            from_bits: ty.bits() as u8,
+            to_bits: 64,
+        });
+        insts.push(Inst::AluRRR {
+            alu_op: AluOPRRR::Sll,
+            rd: tmp,
+            rs1: tmp.to_reg(),
+            rs2: offset,
+        });
+        insts.push(Inst::AluRRR {
+            alu_op: AluOPRRR::Or,
+            rd: rd,
+            rs1: rd.to_reg(),
+            rs2: tmp.to_reg(),
+        });
+        insts
+    }
+
+    /// Merge reset part of rs into rd.
+    /// Call this function must make sure that other part of value is already in rd.
+    pub(crate) fn merge(
+        rd: WritableReg,
+        tmp: WritableReg,
+        offset: Reg,
+        rs: Reg,
+        ty: Type,
+    ) -> SmallInstVec<Inst> {
+        let mut insts = Self::unset(rd, tmp, offset, ty);
+        insts.extend(Self::set(rd, tmp, offset, rs, ty));
+        insts
+    }
+}
+
+impl IntSelectOP {
+    #[inline]
+    pub(crate) fn from_ir_op(op: crate::ir::Opcode) -> Self {
+        match op {
+            crate::ir::Opcode::Smax => Self::Smax,
+            crate::ir::Opcode::Umax => Self::Umax,
+            crate::ir::Opcode::Smin => Self::Smin,
+            crate::ir::Opcode::Umin => Self::Umin,
+            _ => unreachable!(),
+        }
+    }
+    #[inline]
+    pub(crate) fn op_name(self) -> &'static str {
+        match self {
+            IntSelectOP::Smax => "smax",
+            IntSelectOP::Umax => "umax",
+            IntSelectOP::Smin => "smin",
+            IntSelectOP::Umin => "umin",
+        }
+    }
+    #[inline]
+    pub(crate) fn to_int_cc(self) -> IntCC {
+        match self {
+            IntSelectOP::Smax => IntCC::SignedGreaterThan,
+            IntSelectOP::Umax => IntCC::UnsignedGreaterThan,
+            IntSelectOP::Smin => IntCC::SignedLessThan,
+            IntSelectOP::Umin => IntCC::UnsignedLessThan,
+        }
+    }
+}
+
+///Atomic Memory ordering.
+#[derive(Copy, Clone, Debug)]
+pub enum AMO {
+    Relax = 0b00,
+    Release = 0b01,
+    Aquire = 0b10,
+    SeqCst = 0b11,
+}
+
+impl AMO {
+    pub(crate) fn to_static_str(self) -> &'static str {
+        match self {
+            AMO::Relax => "",
+            AMO::Release => ".rl",
+            AMO::Aquire => ".aq",
+            AMO::SeqCst => ".aqrl",
+        }
+    }
+    pub(crate) fn as_u32(self) -> u32 {
+        self as u32
+    }
+}
+
+impl Inst {
+    /// fence request bits.
+    pub(crate) const FENCE_REQ_I: u8 = 1 << 3;
+    pub(crate) const FENCE_REQ_O: u8 = 1 << 2;
+    pub(crate) const FENCE_REQ_R: u8 = 1 << 1;
+    pub(crate) const FENCE_REQ_W: u8 = 1 << 0;
+    pub(crate) fn fence_req_to_string(x: u8) -> String {
+        let mut s = String::default();
+        if x & Self::FENCE_REQ_I != 0 {
+            s.push_str("i");
+        }
+        if x & Self::FENCE_REQ_O != 0 {
+            s.push_str("o");
+        }
+        if x & Self::FENCE_REQ_R != 0 {
+            s.push_str("r");
+        }
+        if x & Self::FENCE_REQ_W != 0 {
+            s.push_str("w");
+        }
+        s
+    }
+}
+
+impl FloatRoundOP {
+    pub(crate) fn op_name(self) -> &'static str {
+        match self {
+            FloatRoundOP::Nearest => "nearest",
+            FloatRoundOP::Ceil => "ceil",
+            FloatRoundOP::Floor => "floor",
+            FloatRoundOP::Trunc => "trunc",
+        }
+    }
+
+    pub(crate) fn to_frm(self) -> FRM {
+        match self {
+            FloatRoundOP::Nearest => FRM::RNE,
+            FloatRoundOP::Ceil => FRM::RUP,
+            FloatRoundOP::Floor => FRM::RDN,
+            FloatRoundOP::Trunc => FRM::RTZ,
+        }
+    }
+}
+
+impl FloatSelectOP {
+    pub(crate) fn op_name(self) -> &'static str {
+        match self {
+            FloatSelectOP::Max => "max",
+            FloatSelectOP::Min => "min",
+        }
+    }
+
+    pub(crate) fn to_fpuoprrr(self, ty: Type) -> FpuOPRRR {
+        match self {
+            FloatSelectOP::Max => {
+                if ty == F32 {
+                    FpuOPRRR::FmaxS
+                } else {
+                    FpuOPRRR::FmaxD
+                }
+            }
+            FloatSelectOP::Min => {
+                if ty == F32 {
+                    FpuOPRRR::FminS
+                } else {
+                    FpuOPRRR::FminD
+                }
+            }
+        }
+    }
+    // move qnan bits into int register.
+    // pub(crate) fn snan_bits(self, rd: Writable<Reg>, ty: Type) -> SmallInstVec<Inst> {
+    //     let mut insts = SmallInstVec::new();
+    //     insts.push(Inst::load_imm12(rd, Imm12::from_bits(-1)));
+    //     let x = if ty == F32 { 22 } else { 51 };
+    //     insts.push(Inst::AluRRImm12 {
+    //         alu_op: AluOPRRI::Srli,
+    //         rd: rd,
+    //         rs: rd.to_reg(),
+    //         imm12: Imm12::from_bits(x),
+    //     });
+    //     insts.push(Inst::AluRRImm12 {
+    //         alu_op: AluOPRRI::Slli,
+    //         rd: rd,
+    //         rs: rd.to_reg(),
+    //         imm12: Imm12::from_bits(x),
+    //     });
+    //     insts
+    // }
+}
+
+pub(crate) fn f32_bits(f: f32) -> u32 {
+    u32::from_le_bytes(f.to_le_bytes())
+}
+pub(crate) fn f64_bits(f: f64) -> u64 {
+    u64::from_le_bytes(f.to_le_bytes())
+}
+
+///
+pub(crate) fn f32_cvt_to_int_bounds(signed: bool, out_bits: u8) -> (f32, f32) {
+    match (signed, out_bits) {
+        (true, 8) => (i8::min_value() as f32 - 1., i8::max_value() as f32 + 1.),
+        (true, 16) => (i16::min_value() as f32 - 1., i16::max_value() as f32 + 1.),
+        (true, 32) => (-2147483904.0, 2147483648.0),
+        (true, 64) => (-9223373136366403584.0, 9223372036854775808.0),
+        (false, 8) => (-1., u8::max_value() as f32 + 1.),
+        (false, 16) => (-1., u16::max_value() as f32 + 1.),
+        (false, 32) => (-1., 4294967296.0),
+        (false, 64) => (-1., 18446744073709551616.0),
+        _ => unreachable!(),
+    }
+}
+
+pub(crate) fn f64_cvt_to_int_bounds(signed: bool, out_bits: u8) -> (f64, f64) {
+    match (signed, out_bits) {
+        (true, 8) => (i8::min_value() as f64 - 1., i8::max_value() as f64 + 1.),
+        (true, 16) => (i16::min_value() as f64 - 1., i16::max_value() as f64 + 1.),
+        (true, 32) => (-2147483649.0, 2147483648.0),
+        (true, 64) => (-9223372036854777856.0, 9223372036854775808.0),
+        (false, 8) => (-1., u8::max_value() as f64 + 1.),
+        (false, 16) => (-1., u16::max_value() as f64 + 1.),
+        (false, 32) => (-1., 4294967296.0),
+        (false, 64) => (-1., 18446744073709551616.0),
+        _ => unreachable!(),
+    }
+}
diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit.rs b/cranelift/codegen/src/isa/zkasm/inst/emit.rs
new file mode 100644
index 000000000000..122e7bc97c1c
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/inst/emit.rs
@@ -0,0 +1,3426 @@
+//! Riscv64 ISA: binary code emission.
+
+use crate::binemit::StackMap;
+use crate::ir::{self, RelSourceLoc, TrapCode};
+use crate::isa::zkasm::inst::*;
+use crate::machinst::{AllocationConsumer, Reg, Writable};
+use crate::trace;
+use cranelift_control::ControlPlane;
+use cranelift_entity::EntityRef;
+use regalloc2::Allocation;
+
+pub struct EmitInfo {
+    shared_flag: settings::Flags,
+    isa_flags: super::super::riscv_settings::Flags,
+}
+
+impl EmitInfo {
+    pub(crate) fn new(
+        shared_flag: settings::Flags,
+        isa_flags: super::super::riscv_settings::Flags,
+    ) -> Self {
+        Self {
+            shared_flag,
+            isa_flags,
+        }
+    }
+}
+
+/// load constant by put the constant in the code stream.
+/// calculate the pc and using load instruction.
+/// This is only allow used in the emit stage.
+/// Because of those instruction must execute together.
+/// see https://github.com/bytecodealliance/wasmtime/pull/5612
+#[derive(Clone, Copy)]
+pub(crate) enum LoadConstant {
+    U32(u32),
+    U64(u64),
+}
+
+#[allow(unused)]
+impl LoadConstant {
+    fn to_le_bytes(self) -> Vec<u8> {
+        match self {
+            LoadConstant::U32(x) => Vec::from_iter(x.to_le_bytes().into_iter()),
+            LoadConstant::U64(x) => Vec::from_iter(x.to_le_bytes().into_iter()),
+        }
+    }
+    fn load_op(self) -> LoadOP {
+        match self {
+            LoadConstant::U32(_) => LoadOP::Lwu,
+            LoadConstant::U64(_) => LoadOP::Ld,
+        }
+    }
+    fn load_ty(self) -> Type {
+        match self {
+            LoadConstant::U32(_) => R32,
+            LoadConstant::U64(_) => R64,
+        }
+    }
+
+    pub(crate) fn load_constant<F: FnMut(Type) -> Writable<Reg>>(
+        self,
+        rd: Writable<Reg>,
+        alloc_tmp: &mut F,
+    ) -> SmallInstVec<Inst> {
+        todo!()
+        /*
+        let mut insts = SmallInstVec::new();
+        // get current pc.
+        let pc = alloc_tmp(I64);
+        insts.push(Inst::Auipc {
+            rd: pc,
+            imm: Imm20 { bits: 0 },
+        });
+        // load
+        insts.push(Inst::Load {
+            rd,
+            op: self.load_op(),
+            flags: MemFlags::new(),
+            from: AMode::RegOffset(pc.to_reg(), 12, self.load_ty()),
+        });
+        let data = self.to_le_bytes();
+        // jump over.
+        insts.push(Inst::Jal {
+            dest: BranchTarget::ResolvedOffset(Inst::INSTRUCTION_SIZE + data.len() as i32),
+        });
+        insts.push(Inst::RawData { data });
+        insts
+        */
+    }
+
+    // load and perform an extra add.
+    pub(crate) fn load_constant_and_add(self, rd: Writable<Reg>, rs: Reg) -> SmallInstVec<Inst> {
+        todo!()
+        /*
+        let mut insts = self.load_constant(rd, &mut |_| rd);
+        insts.push(Inst::AluRRR {
+            alu_op: AluOPRRR::Add,
+            rd,
+            rs1: rd.to_reg(),
+            rs2: rs,
+        });
+        insts
+        */
+    }
+}
+
+pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
+    u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap()
+}
+
+#[derive(Clone, Debug, PartialEq, Default)]
+pub enum EmitVState {
+    #[default]
+    Unknown,
+    Known(VState),
+}
+
+/// State carried between emissions of a sequence of instructions.
+#[derive(Default, Clone, Debug)]
+pub struct EmitState {
+    pub(crate) virtual_sp_offset: i64,
+    pub(crate) nominal_sp_to_fp: i64,
+    /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`.
+    stack_map: Option<StackMap>,
+    /// Current source-code location corresponding to instruction to be emitted.
+    cur_srcloc: RelSourceLoc,
+    /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
+    /// optimized away at compiletime. See [cranelift_control].
+    ctrl_plane: ControlPlane,
+    /// Vector State
+    /// Controls the current state of the vector unit at the emission point.
+    vstate: EmitVState,
+}
+
+impl EmitState {
+    fn take_stack_map(&mut self) -> Option<StackMap> {
+        self.stack_map.take()
+    }
+
+    fn clear_post_insn(&mut self) {
+        self.stack_map = None;
+    }
+
+    fn cur_srcloc(&self) -> RelSourceLoc {
+        self.cur_srcloc
+    }
+}
+
+impl MachInstEmitState<Inst> for EmitState {
+    fn new(
+        abi: &Callee<crate::isa::zkasm::abi::Riscv64MachineDeps>,
+        ctrl_plane: ControlPlane,
+    ) -> Self {
+        EmitState {
+            virtual_sp_offset: 0,
+            nominal_sp_to_fp: abi.frame_size() as i64,
+            stack_map: None,
+            cur_srcloc: RelSourceLoc::default(),
+            ctrl_plane,
+            vstate: EmitVState::Unknown,
+        }
+    }
+
+    fn pre_safepoint(&mut self, stack_map: StackMap) {
+        self.stack_map = Some(stack_map);
+    }
+
+    fn pre_sourceloc(&mut self, srcloc: RelSourceLoc) {
+        self.cur_srcloc = srcloc;
+    }
+
+    fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
+        &mut self.ctrl_plane
+    }
+
+    fn take_ctrl_plane(self) -> ControlPlane {
+        self.ctrl_plane
+    }
+
+    fn on_new_block(&mut self) {
+        // Reset the vector state.
+        self.vstate = EmitVState::Unknown;
+    }
+}
+
+#[allow(unused)]
+impl Inst {
+    /// construct a "imm - rs".
+    pub(crate) fn construct_imm_sub_rs(rd: Writable<Reg>, imm: u64, rs: Reg) -> SmallInstVec<Inst> {
+        todo!()
+        /* let mut insts = Inst::load_constant_u64(rd, imm, &mut |_| rd);
+        insts.push(Inst::AluRRR {
+            alu_op: AluOPRRR::Sub,
+            rd,
+            rs1: rd.to_reg(),
+            rs2: rs,
+        });
+        insts */
+    }
+
+    /// Load int mask.
+    /// If ty is int then 0xff in rd.
+    pub(crate) fn load_int_mask(rd: Writable<Reg>, ty: Type) -> SmallInstVec<Inst> {
+        todo!()
+        /* let mut insts = SmallInstVec::new();
+        assert!(ty.is_int() && ty.bits() <= 64);
+        match ty {
+            I64 => {
+                insts.push(Inst::load_imm12(rd, Imm12::from_bits(-1)));
+            }
+            I32 | I16 => {
+                insts.push(Inst::load_imm12(rd, Imm12::from_bits(-1)));
+                insts.push(Inst::Extend {
+                    rd: rd,
+                    rn: rd.to_reg(),
+                    signed: false,
+                    from_bits: ty.bits() as u8,
+                    to_bits: 64,
+                });
+            }
+            I8 => {
+                insts.push(Inst::load_imm12(rd, Imm12::from_bits(255)));
+            }
+            _ => unreachable!("ty:{:?}", ty),
+        }
+        insts */
+    }
+    ///  inverse all bit
+    pub(crate) fn construct_bit_not(rd: Writable<Reg>, rs: Reg) -> Inst {
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Xori,
+            rd,
+            rs,
+            imm12: Imm12::from_bits(-1),
+        }
+    }
+
+    // emit a float is not a nan.
+    pub(crate) fn emit_not_nan(rd: Writable<Reg>, rs: Reg, ty: Type) -> Inst {
+        Inst::FpuRRR {
+            alu_op: if ty == F32 {
+                FpuOPRRR::FeqS
+            } else {
+                FpuOPRRR::FeqD
+            },
+            frm: None,
+            rd: rd,
+            rs1: rs,
+            rs2: rs,
+        }
+    }
+
+    pub(crate) fn emit_fabs(rd: Writable<Reg>, rs: Reg, ty: Type) -> Inst {
+        Inst::FpuRRR {
+            alu_op: if ty == F32 {
+                FpuOPRRR::FsgnjxS
+            } else {
+                FpuOPRRR::FsgnjxD
+            },
+            frm: None,
+            rd: rd,
+            rs1: rs,
+            rs2: rs,
+        }
+    }
+    /// If a float is zero.
+    pub(crate) fn emit_if_float_not_zero(
+        tmp: Writable<Reg>,
+        rs: Reg,
+        ty: Type,
+        taken: BranchTarget,
+        not_taken: BranchTarget,
+    ) -> SmallInstVec<Inst> {
+        todo!()
+        /* let mut insts = SmallInstVec::new();
+        let class_op = if ty == F32 {
+            FpuOPRR::FclassS
+        } else {
+            FpuOPRR::FclassD
+        };
+        insts.push(Inst::FpuRR {
+            alu_op: class_op,
+            frm: None,
+            rd: tmp,
+            rs: rs,
+        });
+        insts.push(Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Andi,
+            rd: tmp,
+            rs: tmp.to_reg(),
+            imm12: Imm12::from_bits(FClassResult::is_zero_bits() as i16),
+        });
+        insts.push(Inst::CondBr {
+            taken,
+            not_taken,
+            kind: IntegerCompare {
+                kind: IntCC::Equal,
+                rs1: tmp.to_reg(),
+                rs2: zero_reg(),
+            },
+        });
+        insts */
+    }
+    pub(crate) fn emit_fneg(rd: Writable<Reg>, rs: Reg, ty: Type) -> Inst {
+        Inst::FpuRRR {
+            alu_op: if ty == F32 {
+                FpuOPRRR::FsgnjnS
+            } else {
+                FpuOPRRR::FsgnjnD
+            },
+            frm: None,
+            rd: rd,
+            rs1: rs,
+            rs2: rs,
+        }
+    }
+
+    pub(crate) fn lower_br_icmp(
+        cc: IntCC,
+        a: ValueRegs<Reg>,
+        b: ValueRegs<Reg>,
+        taken: BranchTarget,
+        not_taken: BranchTarget,
+        ty: Type,
+    ) -> SmallInstVec<Inst> {
+        todo!()
+        /* let mut insts = SmallInstVec::new();
+        if ty.bits() <= 64 {
+            let rs1 = a.only_reg().unwrap();
+            let rs2 = b.only_reg().unwrap();
+            let inst = Inst::CondBr {
+                taken,
+                not_taken,
+                kind: IntegerCompare { kind: cc, rs1, rs2 },
+            };
+            insts.push(inst);
+            return insts;
+        }
+        // compare i128
+        let low = |cc: IntCC| -> IntegerCompare {
+            IntegerCompare {
+                rs1: a.regs()[0],
+                rs2: b.regs()[0],
+                kind: cc,
+            }
+        };
+        let high = |cc: IntCC| -> IntegerCompare {
+            IntegerCompare {
+                rs1: a.regs()[1],
+                rs2: b.regs()[1],
+                kind: cc,
+            }
+        };
+        match cc {
+            IntCC::Equal => {
+                // if high part not equal,
+                // then we can go to not_taken otherwise fallthrough.
+                insts.push(Inst::CondBr {
+                    taken: not_taken,
+                    not_taken: BranchTarget::zero(),
+                    kind: high(IntCC::NotEqual),
+                });
+                // the rest part.
+                insts.push(Inst::CondBr {
+                    taken,
+                    not_taken,
+                    kind: low(IntCC::Equal),
+                });
+            }
+
+            IntCC::NotEqual => {
+                // if the high part not equal ,
+                // we know the whole must be not equal,
+                // we can goto the taken part , otherwise fallthrought.
+                insts.push(Inst::CondBr {
+                    taken,
+                    not_taken: BranchTarget::zero(), //  no branch
+                    kind: high(IntCC::NotEqual),
+                });
+
+                insts.push(Inst::CondBr {
+                    taken,
+                    not_taken,
+                    kind: low(IntCC::NotEqual),
+                });
+            }
+            IntCC::SignedGreaterThanOrEqual
+            | IntCC::SignedLessThanOrEqual
+            | IntCC::UnsignedGreaterThanOrEqual
+            | IntCC::UnsignedLessThanOrEqual
+            | IntCC::SignedGreaterThan
+            | IntCC::SignedLessThan
+            | IntCC::UnsignedLessThan
+            | IntCC::UnsignedGreaterThan => {
+                //
+                insts.push(Inst::CondBr {
+                    taken,
+                    not_taken: BranchTarget::zero(),
+                    kind: high(cc.without_equal()),
+                });
+                //
+                insts.push(Inst::CondBr {
+                    taken: not_taken,
+                    not_taken: BranchTarget::zero(),
+                    kind: high(IntCC::NotEqual),
+                });
+                insts.push(Inst::CondBr {
+                    taken,
+                    not_taken,
+                    kind: low(cc.unsigned()),
+                });
+            }
+        }
+        insts */
+    }
+
+    /// Returns Some(VState) if this insturction is expecting a specific vector state
+    /// before emission.
+    fn expected_vstate(&self) -> Option<&VState> {
+        match self {
+            Inst::Nop0
+            | Inst::Nop4
+            | Inst::Label { .. }
+            | Inst::BrTable { .. }
+            | Inst::Auipc { .. }
+            | Inst::Lui { .. }
+            | Inst::LoadConst32 { .. }
+            | Inst::LoadConst64 { .. }
+            | Inst::AluRRR { .. }
+            | Inst::AddImm32 { .. }
+            | Inst::MulImm32 { .. }
+            | Inst::FpuRRR { .. }
+            | Inst::AluRRImm12 { .. }
+            | Inst::Load { .. }
+            | Inst::Store { .. }
+            | Inst::Args { .. }
+            | Inst::Ret { .. }
+            | Inst::Extend { .. }
+            | Inst::AdjustSp { .. }
+            | Inst::Call { .. }
+            | Inst::CallInd { .. }
+            | Inst::ReturnCall { .. }
+            | Inst::ReturnCallInd { .. }
+            | Inst::TrapIf { .. }
+            | Inst::Jal { .. }
+            | Inst::CondBr { .. }
+            | Inst::LoadExtName { .. }
+            | Inst::LoadAddr { .. }
+            | Inst::VirtualSPOffsetAdj { .. }
+            | Inst::Mov { .. }
+            | Inst::MovFromPReg { .. }
+            | Inst::Fence { .. }
+            | Inst::FenceI
+            | Inst::ECall
+            | Inst::EBreak
+            | Inst::Udf { .. }
+            | Inst::FpuRR { .. }
+            | Inst::FpuRRRR { .. }
+            | Inst::Jalr { .. }
+            | Inst::Atomic { .. }
+            | Inst::Select { .. }
+            | Inst::AtomicCas { .. }
+            | Inst::IntSelect { .. }
+            | Inst::Icmp { .. }
+            | Inst::SelectReg { .. }
+            | Inst::FcvtToInt { .. }
+            | Inst::RawData { .. }
+            | Inst::AtomicStore { .. }
+            | Inst::AtomicLoad { .. }
+            | Inst::AtomicRmwLoop { .. }
+            | Inst::TrapIfC { .. }
+            | Inst::Unwind { .. }
+            | Inst::DummyUse { .. }
+            | Inst::FloatRound { .. }
+            | Inst::FloatSelect { .. }
+            | Inst::Popcnt { .. }
+            | Inst::Rev8 { .. }
+            | Inst::Cltz { .. }
+            | Inst::Brev8 { .. }
+            | Inst::StackProbeLoop { .. } => None,
+
+            // VecSetState does not expect any vstate, rather it updates it.
+            Inst::VecSetState { .. } => None,
+
+            // `vmv` instructions copy a set of registers and ignore vstate.
+            Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None,
+
+            Inst::VecAluRR { vstate, .. } |
+            Inst::VecAluRRR { vstate, .. } |
+            Inst::VecAluRRRR { vstate, .. } |
+            Inst::VecAluRImm5 { vstate, .. } |
+            Inst::VecAluRRImm5 { vstate, .. } |
+            Inst::VecAluRRRImm5 { vstate, .. } |
+            // TODO: Unit-stride loads and stores only need the AVL to be correct, not
+            // the full vtype. A future optimization could be to decouple these two when
+            // updating vstate. This would allow us to avoid emitting a VecSetState in
+            // some cases.
+            Inst::VecLoad { vstate, .. }
+            | Inst::VecStore { vstate, .. } => Some(vstate),
+        }
+    }
+}
+
+fn put_string(s: &str, sink: &mut MachBuffer<Inst>) {
+    sink.put_data("  ".as_bytes());
+    sink.put_data(s.as_bytes());
+}
+
+fn access_reg_with_offset(reg: Reg, offset: i16) -> String {
+    let name = reg_name(reg);
+    match offset.cmp(&0) {
+        core::cmp::Ordering::Less => format!("{name} - {}", -offset),
+        core::cmp::Ordering::Equal => name,
+        core::cmp::Ordering::Greater => format!("{name} + {}", offset),
+    }
+}
+
+#[allow(unused)]
+impl MachInstEmit for Inst {
+    type State = EmitState;
+    type Info = EmitInfo;
+
+    fn emit(
+        &self,
+        allocs: &[Allocation],
+        sink: &mut MachBuffer<Inst>,
+        emit_info: &Self::Info,
+        state: &mut EmitState,
+    ) {
+        let mut allocs = AllocationConsumer::new(allocs);
+
+        // Check if we need to update the vector state before emitting this instruction
+        if let Some(expected) = self.expected_vstate() {
+            if state.vstate != EmitVState::Known(expected.clone()) {
+                // Update the vector state.
+                Inst::VecSetState {
+                    rd: writable_zero_reg(),
+                    vstate: expected.clone(),
+                }
+                .emit(&[], sink, emit_info, state);
+            }
+        }
+
+        // N.B.: we *must* not exceed the "worst-case size" used to compute
+        // where to insert islands, except when islands are explicitly triggered
+        // (with an `EmitIsland`). We check this in debug builds. This is `mut`
+        // to allow disabling the check for `JTSequence`, which is always
+        // emitted following an `EmitIsland`.
+        let mut start_off = sink.cur_offset();
+        match self {
+            &Inst::Nop0 => {
+                // do nothing
+            }
+            // Addi x0, x0, 0
+            &Inst::Nop4 => {
+                todo!() /* let x = Inst::AluRRImm12 {
+                            alu_op: AluOPRRI::Addi,
+                            rd: Writable::from_reg(zero_reg()),
+                            rs: zero_reg(),
+                            imm12: Imm12::zero(),
+                        };
+                        x.emit(&[], sink, emit_info, state) */
+            }
+            &Inst::Label { imm } => {
+                sink.put_data(format!("label_{imm}:\n").as_bytes());
+            }
+            &Inst::RawData { ref data } => {
+                // Right now we only put a u32 or u64 in this instruction.
+                // It is not very long, no need to check if need `emit_island`.
+                // If data is very long , this is a bug because RawData is typecial
+                // use to load some data and rely on some positon in the code stream.
+                // and we may exceed `Inst::worst_case_size`.
+                // for more information see https://github.com/bytecodealliance/wasmtime/pull/5612.
+                todo!() // sink.put_data(&data[..]);
+            }
+            &Inst::Lui { rd, ref imm } => {
+                todo!() /* let rd = allocs.next_writable(rd);
+                        let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.as_u32() << 12);
+                        sink.put4(x); */
+            }
+            &Inst::LoadConst32 { rd, imm } => {
+                todo!() /* let rd = allocs.next_writable(rd);
+                        LoadConstant::U32(imm)
+                            .load_constant(rd, &mut |_| rd)
+                            .into_iter()
+                            .for_each(|inst| inst.emit(&[], sink, emit_info, state)); */
+            }
+            &Inst::LoadConst64 { rd, imm } => {
+                todo!() /* let rd = allocs.next_writable(rd);
+                        LoadConstant::U64(imm)
+                            .load_constant(rd, &mut |_| rd)
+                            .into_iter()
+                            .for_each(|inst| inst.emit(&[], sink, emit_info, state)); */
+            }
+            &Inst::FpuRR {
+                frm,
+                alu_op,
+                rd,
+                rs,
+            } => {
+                todo!() /* let rs = allocs.next(rs);
+                        let rd = allocs.next_writable(rd);
+                        let x = alu_op.op_code()
+                            | reg_to_gpr_num(rd.to_reg()) << 7
+                            | alu_op.funct3(frm) << 12
+                            | reg_to_gpr_num(rs) << 15
+                            | alu_op.rs2_funct5() << 20
+                            | alu_op.funct7() << 25;
+                        let srcloc = state.cur_srcloc();
+                        if !srcloc.is_default() && alu_op.is_convert_to_int() {
+                            sink.add_trap(TrapCode::BadConversionToInteger);
+                        }
+                        sink.put4(x); */
+            }
+            &Inst::FpuRRRR {
+                alu_op,
+                rd,
+                rs1,
+                rs2,
+                rs3,
+                frm,
+            } => {
+                todo!() /* let rs1 = allocs.next(rs1);
+                        let rs2 = allocs.next(rs2);
+                        let rs3 = allocs.next(rs3);
+                        let rd = allocs.next_writable(rd);
+                        let x = alu_op.op_code()
+                            | reg_to_gpr_num(rd.to_reg()) << 7
+                            | alu_op.funct3(frm) << 12
+                            | reg_to_gpr_num(rs1) << 15
+                            | reg_to_gpr_num(rs2) << 20
+                            | alu_op.funct2() << 25
+                            | reg_to_gpr_num(rs3) << 27;
+
+                        sink.put4(x); */
+            }
+            &Inst::FpuRRR {
+                alu_op,
+                frm,
+                rd,
+                rs1,
+                rs2,
+            } => {
+                todo!() /* let rs1 = allocs.next(rs1);
+                        let rs2 = allocs.next(rs2);
+                        let rd = allocs.next_writable(rd);
+
+                        let x: u32 = alu_op.op_code()
+                            | reg_to_gpr_num(rd.to_reg()) << 7
+                            | (alu_op.funct3(frm)) << 12
+                            | reg_to_gpr_num(rs1) << 15
+                            | reg_to_gpr_num(rs2) << 20
+                            | alu_op.funct7() << 25;
+                        sink.put4(x); */
+            }
+            &Inst::Unwind { ref inst } => {
+                put_string(&format!("Unwind\n"), sink);
+                // sink.add_unwind(inst.clone());
+            }
+            &Inst::DummyUse { reg } => {
+                todo!() // allocs.next(reg);
+            }
+            &Inst::AddImm32 { rd, src1, src2 } => {
+                let rd = allocs.next(rd.to_reg());
+                // TODO(akashin): Should we have a function for `bits` field?
+                put_string(
+                    &format!("{} + {} => {}\n", src1.bits, src2.bits, reg_name(rd)),
+                    sink,
+                );
+            }
+            &Inst::MulImm32 { rd, src1, src2 } => {
+                let rd = allocs.next(rd.to_reg());
+                // TODO(akashin): Should we have a function for `bits` field?
+                put_string(
+                    &format!("{} * {} => {}\n", src1.bits, src2.bits, reg_name(rd)),
+                    sink,
+                );
+            }
+            &Inst::AluRRR {
+                alu_op,
+                rd,
+                rs1,
+                rs2,
+            } => {
+                let rs1 = allocs.next(rs1);
+                let rs2 = allocs.next(rs2);
+                debug_assert_eq!(rs1, a0());
+                debug_assert_eq!(rs2, b0());
+                let rd = allocs.next_writable(rd);
+                put_string(
+                    &format!("$ => {} :{}\n", reg_name(rd.to_reg()), alu_op.op_name()),
+                    sink,
+                );
+
+                /*
+                let (rs1, rs2) = if alu_op.reverse_rs() {
+                    (rs2, rs1)
+                } else {
+                    (rs1, rs2)
+                };
+
+                sink.put4(encode_r_type(
+                    alu_op.op_code(),
+                    rd,
+                    alu_op.funct3(),
+                    rs1,
+                    rs2,
+                    alu_op.funct7(),
+                )); */
+            }
+            &Inst::AluRRImm12 {
+                alu_op,
+                rd,
+                rs,
+                imm12,
+            } => {
+                let rs = allocs.next(rs);
+                let rd = allocs.next_writable(rd);
+                match alu_op {
+                    AluOPRRI::Addi => {
+                        put_string(
+                            &format!(
+                                "{} + {} => {}\n",
+                                reg_name(rs),
+                                imm12.bits,
+                                reg_name(rd.to_reg())
+                            ),
+                            sink,
+                        );
+                    }
+                    AluOPRRI::Slli => {
+                        put_string(
+                            &format!(
+                                "{} << {} => {}\n",
+                                reg_name(rs),
+                                imm12.bits,
+                                reg_name(rd.to_reg())
+                            ),
+                            sink,
+                        );
+                    }
+                    AluOPRRI::Srli => {
+                        put_string(
+                            &format!(
+                                "{} >> {} => {}\n",
+                                reg_name(rs),
+                                imm12.bits,
+                                reg_name(rd.to_reg())
+                            ),
+                            sink,
+                        );
+                    }
+                    _ => unreachable!("Op {:?} is not implemented", alu_op),
+                };
+
+                // let x = alu_op.op_code()
+                //     | reg_to_gpr_num(rd.to_reg()) << 7
+                //     | alu_op.funct3() << 12
+                //     | reg_to_gpr_num(rs) << 15
+                //     | alu_op.imm12(imm12) << 20;
+                // sink.put4(x);
+            }
+            &Inst::Load {
+                rd,
+                op,
+                from,
+                flags,
+            } => {
+                let from = from.clone().with_allocs(&mut allocs);
+                let base = from.get_base_register();
+                let offset = from.get_offset_with_state(state);
+                let offset_imm12 = Imm12::maybe_from_u64(offset as u64);
+                let rd = allocs.next_writable(rd);
+
+                let (addr, imm12) = match (base, offset_imm12) {
+                    // If the offset fits into an imm12 we can directly encode it.
+                    (Some(base), Some(imm12)) => (base, imm12),
+                    // Otherwise load the address it into a reg and load from it.
+                    _ => {
+                        let tmp = writable_spilltmp_reg();
+                        Inst::LoadAddr { rd: tmp, mem: from }.emit(&[], sink, emit_info, state);
+                        (tmp.to_reg(), Imm12::zero())
+                    }
+                };
+                put_string(
+                    &format!(
+                        "$ => {} :MLOAD({})\n",
+                        reg_name(rd.to_reg()),
+                        access_reg_with_offset(addr, imm12.bits),
+                    ),
+                    sink,
+                );
+
+                // let srcloc = state.cur_srcloc();
+                // if !srcloc.is_default() && !flags.notrap() {
+                //     // Register the offset at which the actual load instruction starts.
+                //     sink.add_trap(TrapCode::HeapOutOfBounds);
+                // }
+                //
+                // sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12));
+            }
+            &Inst::Store { op, src, flags, to } => {
+                let to = to.clone().with_allocs(&mut allocs);
+                let src = allocs.next(src);
+
+                let base = to.get_base_register();
+                let offset = to.get_offset_with_state(state);
+                let offset_imm12 = Imm12::maybe_from_u64(offset as u64);
+
+                let (addr, imm12) = match (base, offset_imm12) {
+                    // If the offset fits into an imm12 we can directly encode it.
+                    (Some(base), Some(imm12)) => (base, imm12),
+                    // Otherwise load the address it into a reg and load from it.
+                    _ => {
+                        let tmp = writable_spilltmp_reg();
+                        Inst::LoadAddr { rd: tmp, mem: to }.emit(&[], sink, emit_info, state);
+                        (tmp.to_reg(), Imm12::zero())
+                    }
+                };
+                put_string(
+                    &format!(
+                        "{} :MSTORE({})\n",
+                        reg_name(src),
+                        access_reg_with_offset(addr, imm12.bits),
+                    ),
+                    sink,
+                );
+
+                // let srcloc = state.cur_srcloc();
+                // if !srcloc.is_default() && !flags.notrap() {
+                //     // Register the offset at which the actual load instruction starts.
+                //     sink.add_trap(TrapCode::HeapOutOfBounds);
+                // }
+                //
+                // sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12));
+            }
+            &Inst::Args { .. } => {
+                // Nothing: this is a pseudoinstruction that serves
+                // only to constrain registers at a certain point.
+            }
+            &Inst::Ret {
+                stack_bytes_to_pop, ..
+            } => {
+                // put_string(&format!("RETURN\n"), sink);
+                put_string(&format!(":JMP(RR)\n"), sink);
+
+                /* if stack_bytes_to_pop != 0 {
+                    Inst::AdjustSp {
+                        amount: i64::from(stack_bytes_to_pop),
+                    }
+                    .emit(&[], sink, emit_info, state);
+                }
+                //jalr x0, x1, 0
+                let x: u32 = (0b1100111) | (1 << 15);
+                sink.put4(x); */
+            }
+
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                to_bits: _to_bits,
+            } => {
+                todo!() /* let rn = allocs.next(rn);
+                        let rd = allocs.next_writable(rd);
+                        let mut insts = SmallInstVec::new();
+                        let shift_bits = (64 - from_bits) as i16;
+                        let is_u8 = || from_bits == 8 && signed == false;
+                        if is_u8() {
+                            // special for u8.
+                            insts.push(Inst::AluRRImm12 {
+                                alu_op: AluOPRRI::Andi,
+                                rd,
+                                rs: rn,
+                                imm12: Imm12::from_bits(255),
+                            });
+                        } else {
+                            insts.push(Inst::AluRRImm12 {
+                                alu_op: AluOPRRI::Slli,
+                                rd,
+                                rs: rn,
+                                imm12: Imm12::from_bits(shift_bits),
+                            });
+                            insts.push(Inst::AluRRImm12 {
+                                alu_op: if signed {
+                                    AluOPRRI::Srai
+                                } else {
+                                    AluOPRRI::Srli
+                                },
+                                rd,
+                                rs: rd.to_reg(),
+                                imm12: Imm12::from_bits(shift_bits),
+                            });
+                        }
+                        insts
+                            .into_iter()
+                            .for_each(|i| i.emit(&[], sink, emit_info, state)); */
+            }
+            &Inst::AdjustSp { amount } => {
+                let amount = if amount > 0 {
+                    format!("- {}", amount)
+                } else {
+                    format!("+ {}", -amount)
+                };
+                put_string(&format!("SP {amount} => SP\n"), sink);
+
+                /* if let Some(imm) = Imm12::maybe_from_u64(amount as u64) {
+                    Inst::AluRRImm12 {
+                        alu_op: AluOPRRI::Addi,
+                        rd: writable_stack_reg(),
+                        rs: stack_reg(),
+                        imm12: imm,
+                    }
+                    .emit(&[], sink, emit_info, state);
+                } else {
+                    let tmp = writable_spilltmp_reg();
+                    let mut insts = Inst::load_constant_u64(tmp, amount as u64, &mut |_| tmp);
+                    insts.push(Inst::AluRRR {
+                        alu_op: AluOPRRR::Add,
+                        rd: writable_stack_reg(),
+                        rs1: tmp.to_reg(),
+                        rs2: stack_reg(),
+                    });
+                    insts
+                        .into_iter()
+                        .for_each(|i| i.emit(&[], sink, emit_info, state));
+                } */
+            }
+            &Inst::Call { ref info } => {
+                // call
+                match info.dest {
+                    ExternalName::User(name) => {
+                        // For now we only support calls.
+                        assert!(info.opcode.is_call());
+                        sink.add_call_site(info.opcode);
+                        sink.add_reloc(Reloc::RiscvCall, &info.dest, 0);
+                        // This will be patched externally to do a necessary jump.
+                        put_string(&format!("; CALL {name}\n"), sink);
+
+                        // match name.index() {
+                        //     // Special case for ASSERT call.
+                        //     0 => {
+                        //         Inst::Mov {
+                        //             ty: types::I64,
+                        //             rd: regs::writable_a0(),
+                        //             rm: info.uses[0].preg,
+                        //         }
+                        //         .emit(&[], sink, emit_info, state);
+                        //         put_string(
+                        //             &format!("{} :ASSERT\n", reg_name(info.uses[1].preg)),
+                        //             sink,
+                        //         );
+                        //     }
+                        //     v => {
+                        //         Inst::Jal {
+                        //             dest: BranchTarget::Label(MachLabel::new(v)),
+                        //         }
+                        //         .emit(&[], sink, emit_info, state);
+                        //     }
+                        // };
+
+                        // if let Some(s) = state.take_stack_map() {
+                        //     sink.add_stack_map(StackMapExtent::UpcomingBytes(8), s);
+                        // }
+                        // Inst::construct_auipc_and_jalr(
+                        //     Some(writable_link_reg()),
+                        //     writable_link_reg(),
+                        //     0,
+                        // )
+                        // .into_iter()
+                        // .for_each(|i| i.emit(&[], sink, emit_info, state));
+                    }
+                    ExternalName::LibCall(..)
+                    | ExternalName::TestCase { .. }
+                    | ExternalName::KnownSymbol(..) => {
+                        unimplemented!();
+                        // use indirect call. it is more simple.
+                        // load ext name.
+                        // Inst::LoadExtName {
+                        //     rd: writable_spilltmp_reg2(),
+                        //     name: Box::new(info.dest.clone()),
+                        //     offset: 0,
+                        // }
+                        // .emit(&[], sink, emit_info, state);
+                        //
+                        // if let Some(s) = state.take_stack_map() {
+                        //     sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
+                        // }
+                        // if info.opcode.is_call() {
+                        //     sink.add_call_site(info.opcode);
+                        // }
+                        // call
+                        // Inst::Jalr {
+                        //     rd: writable_link_reg(),
+                        //     base: spilltmp_reg2(),
+                        //     offset: Imm12::zero(),
+                        // }
+                        // .emit(&[], sink, emit_info, state);
+                    }
+                }
+
+                let callee_pop_size = i64::from(info.callee_pop_size);
+                state.virtual_sp_offset -= callee_pop_size;
+                trace!(
+                    "call adjusts virtual sp offset by {callee_pop_size} -> {}",
+                    state.virtual_sp_offset
+                );
+            }
+            &Inst::CallInd { ref info } => {
+                // let rn = allocs.next(info.rn);
+                // put_string(&format!("CALL {}, {:?}\n", reg_name(rn), info.uses), sink);
+
+                dbg!(info);
+                todo!();
+                // For now we only support calls.
+                // assert!(info.opcode.is_call());
+
+                /*
+                if let Some(s) = state.take_stack_map() {
+                    sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
+                }
+
+                Inst::Jalr {
+                    rd: writable_link_reg(),
+                    base: rn,
+                    offset: Imm12::zero(),
+                }
+                .emit(&[], sink, emit_info, state);
+
+                let callee_pop_size = i64::from(info.callee_pop_size);
+                state.virtual_sp_offset -= callee_pop_size;
+                trace!(
+                    "call adjusts virtual sp offset by {callee_pop_size} -> {}",
+                    state.virtual_sp_offset
+                ); */
+            }
+
+            &Inst::ReturnCall {
+                ref callee,
+                ref info,
+            } => {
+                todo!() /* emit_return_call_common_sequence(
+                            &mut allocs,
+                            sink,
+                            emit_info,
+                            state,
+                            info.new_stack_arg_size,
+                            info.old_stack_arg_size,
+                            &info.uses,
+                        );
+
+                        sink.add_call_site(ir::Opcode::ReturnCall);
+                        sink.add_reloc(Reloc::RiscvCall, &callee, 0);
+                        Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0)
+                            .into_iter()
+                            .for_each(|i| i.emit(&[], sink, emit_info, state));
+
+                        // `emit_return_call_common_sequence` emits an island if
+                        // necessary, so we can safely disable the worst-case-size check
+                        // in this case.
+                        start_off = sink.cur_offset(); */
+            }
+
+            &Inst::ReturnCallInd { callee, ref info } => {
+                todo!() /* let callee = allocs.next(callee);
+
+                        emit_return_call_common_sequence(
+                            &mut allocs,
+                            sink,
+                            emit_info,
+                            state,
+                            info.new_stack_arg_size,
+                            info.old_stack_arg_size,
+                            &info.uses,
+                        );
+
+                        Inst::Jalr {
+                            rd: writable_zero_reg(),
+                            base: callee,
+                            offset: Imm12::zero(),
+                        }
+                        .emit(&[], sink, emit_info, state);
+
+                        // `emit_return_call_common_sequence` emits an island if
+                        // necessary, so we can safely disable the worst-case-size check
+                        // in this case.
+                        start_off = sink.cur_offset(); */
+            }
+
+            &Inst::Jal { dest } => {
+                match dest {
+                    BranchTarget::Label(label) => {
+                        // TODO: the following two lines allow eg. optimizing out jump-to-here
+                        // sink.use_label_at_offset(start_off, label, LabelUse::Jal20);
+                        // sink.add_uncond_branch(start_off, start_off + 4, label);
+                        put_string(&format!(":JMP(label_{})\n", label.index()), sink);
+                    }
+                    BranchTarget::ResolvedOffset(offset) => {
+                        todo!() /*
+                                let offset = offset as i64;
+                                if offset != 0 {
+                                    if LabelUse::Jal20.offset_in_range(offset) {
+                                        let mut code = code.to_le_bytes();
+                                        LabelUse::Jal20.patch_raw_offset(&mut code, offset);
+                                        sink.put_data(&code[..]);
+                                    } else {
+                                        Inst::construct_auipc_and_jalr(
+                                            None,
+                                            writable_spilltmp_reg(),
+                                            offset,
+                                        )
+                                        .into_iter()
+                                        .for_each(|i| i.emit(&[], sink, emit_info, state));
+                                    }
+                                } else {
+                                    // CondBr often generate Jal {dest : 0}, means otherwise no jump.
+                                } */
+                    }
+                }
+            }
+            &Inst::CondBr {
+                taken,
+                not_taken,
+                mut kind,
+            } => {
+                kind.rs1 = allocs.next(kind.rs1);
+                kind.rs2 = allocs.next(kind.rs2);
+                // TODO(akashin): Support other types of comparisons.
+                assert!(matches!(kind.kind, IntCC::NotEqual));
+                assert_eq!(kind.rs2, zero_reg());
+                match taken {
+                    BranchTarget::Label(label) => {
+                        put_string(
+                            &format!("{} :JMPNZ(label_{})\n", reg_name(kind.rs1), label.index()),
+                            sink,
+                        );
+
+                        // let code = kind.emit();
+                        // let code_inverse = kind.inverse().emit().to_le_bytes();
+                        // sink.use_label_at_offset(start_off, label, LabelUse::B12);
+                        // sink.add_cond_branch(start_off, start_off + 4, label, &code_inverse);
+                        // sink.put4(code);
+                    }
+                    BranchTarget::ResolvedOffset(offset) => {
+                        assert!(offset != 0);
+                        todo!();
+
+                        // if LabelUse::B12.offset_in_range(offset as i64) {
+                        //     let code = kind.emit();
+                        //     let mut code = code.to_le_bytes();
+                        //     LabelUse::B12.patch_raw_offset(&mut code, offset as i64);
+                        //     sink.put_data(&code[..])
+                        // } else {
+                        //     let mut code = kind.emit().to_le_bytes();
+                        //     // jump over the condbr , 4 bytes.
+                        //     LabelUse::B12.patch_raw_offset(&mut code[..], 4);
+                        //     sink.put_data(&code[..]);
+                        //     Inst::construct_auipc_and_jalr(
+                        //         None,
+                        //         writable_spilltmp_reg(),
+                        //         offset as i64,
+                        //     )
+                        //     .into_iter()
+                        //     .for_each(|i| i.emit(&[], sink, emit_info, state));
+                        // }
+                    }
+                }
+                // TODO(akashin): Can also merge this as an else in jump.
+                Inst::Jal { dest: not_taken }.emit(&[], sink, emit_info, state);
+            }
+
+            &Inst::Mov { rd, rm, ty } => {
+                if rd.to_reg() == rm {
+                    return;
+                }
+
+                let rm = allocs.next(rm);
+                let rd = allocs.next_writable(rd);
+                put_string(
+                    &format!("{} => {}\n", reg_name(rm), reg_name(rd.to_reg())),
+                    sink,
+                );
+
+                // match rm.class() {
+                //     RegClass::Int => Inst::AluRRImm12 {
+                //         alu_op: AluOPRRI::Ori,
+                //         rd: rd,
+                //         rs: rm,
+                //         imm12: Imm12::zero(),
+                //     },
+                //     RegClass::Float => Inst::FpuRRR {
+                //         alu_op: if ty == F32 {
+                //             FpuOPRRR::FsgnjS
+                //         } else {
+                //             FpuOPRRR::FsgnjD
+                //         },
+                //         frm: None,
+                //         rd: rd,
+                //         rs1: rm,
+                //         rs2: rm,
+                //     },
+                //     RegClass::Vector => Inst::VecAluRRImm5 {
+                //         op: VecAluOpRRImm5::VmvrV,
+                //         vd: rd,
+                //         vs2: rm,
+                //         // Imm 0 means copy 1 register.
+                //         imm: Imm5::maybe_from_i8(0).unwrap(),
+                //         mask: VecOpMasking::Disabled,
+                //         // Vstate for this instruction is ignored.
+                //         vstate: VState::from_type(ty),
+                //     },
+                // }
+                // .emit(&[], sink, emit_info, state);
+            }
+
+            &Inst::MovFromPReg { rd, rm } => {
+                todo!() /* debug_assert!([px_reg(2), px_reg(8)].contains(&rm));
+                        let rd = allocs.next_writable(rd);
+                        let x = Inst::AluRRImm12 {
+                            alu_op: AluOPRRI::Ori,
+                            rd,
+                            rs: Reg::from(rm),
+                            imm12: Imm12::zero(),
+                        };
+                        x.emit(&[], sink, emit_info, state); */
+            }
+
+            &Inst::BrTable {
+                index,
+                tmp1,
+                tmp2,
+                ref targets,
+            } => {
+                todo!() /* let index = allocs.next(index);
+                        let tmp1 = allocs.next_writable(tmp1);
+                        let tmp2 = allocs.next_writable(tmp2);
+                        let ext_index = writable_spilltmp_reg();
+
+                        // The default target is passed in as the 0th element of `targets`
+                        // separate it here for clarity.
+                        let default_target = targets[0];
+                        let targets = &targets[1..];
+
+                        // We emit a bounds check on the index, if the index is larger than the number of
+                        // jump table entries, we jump to the default block.  Otherwise we compute a jump
+                        // offset by multiplying the index by 8 (the size of each entry) and then jump to
+                        // that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially.
+                        //
+                        // Build the following sequence:
+                        //
+                        // extend_index:
+                        //     zext.w  ext_index, index
+                        // bounds_check:
+                        //     li      tmp, n_labels
+                        //     bltu    ext_index, tmp, compute_target
+                        // jump_to_default_block:
+                        //     auipc   pc, 0
+                        //     jalr    zero, pc, default_block
+                        // compute_target:
+                        //     auipc   pc, 0
+                        //     slli    tmp, ext_index, 3
+                        //     add     pc, pc, tmp
+                        //     jalr    zero, pc, 0x10
+                        // jump_table:
+                        //     ; This repeats for each entry in the jumptable
+                        //     auipc   pc, 0
+                        //     jalr    zero, pc, block_target
+
+                        // Extend the index to 64 bits.
+                        //
+                        // This prevents us branching on the top 32 bits of the index, which
+                        // are undefined.
+                        Inst::Extend {
+                            rd: ext_index,
+                            rn: index,
+                            signed: false,
+                            from_bits: 32,
+                            to_bits: 64,
+                        }
+                        .emit(&[], sink, emit_info, state);
+
+                        // Bounds check.
+                        //
+                        // Check if the index passed in is larger than the number of jumptable
+                        // entries that we have. If it is, we fallthrough to a jump into the
+                        // default block.
+                        Inst::load_constant_u32(tmp2, targets.len() as u64, &mut |_| tmp2)
+                            .iter()
+                            .for_each(|i| i.emit(&[], sink, emit_info, state));
+                        Inst::CondBr {
+                            taken: BranchTarget::offset(Inst::INSTRUCTION_SIZE * 3),
+                            not_taken: BranchTarget::zero(),
+                            kind: IntegerCompare {
+                                kind: IntCC::UnsignedLessThan,
+                                rs1: ext_index.to_reg(),
+                                rs2: tmp2.to_reg(),
+                            },
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        sink.use_label_at_offset(
+                            sink.cur_offset(),
+                            default_target.as_label().unwrap(),
+                            LabelUse::PCRel32,
+                        );
+                        Inst::construct_auipc_and_jalr(None, tmp2, 0)
+                            .iter()
+                            .for_each(|i| i.emit(&[], sink, emit_info, state));
+
+                        // Compute the jump table offset.
+                        // We need to emit a PC relative offset,
+
+                        // Get the current PC.
+                        Inst::Auipc {
+                            rd: tmp1,
+                            imm: Imm20::from_bits(0),
+                        }
+                        .emit(&[], sink, emit_info, state);
+
+                        // Multiply the index by 8, since that is the size in
+                        // bytes of each jump table entry
+                        Inst::AluRRImm12 {
+                            alu_op: AluOPRRI::Slli,
+                            rd: tmp2,
+                            rs: ext_index.to_reg(),
+                            imm12: Imm12::from_bits(3),
+                        }
+                        .emit(&[], sink, emit_info, state);
+
+                        // Calculate the base of the jump, PC + the offset from above.
+                        Inst::AluRRR {
+                            alu_op: AluOPRRR::Add,
+                            rd: tmp1,
+                            rs1: tmp1.to_reg(),
+                            rs2: tmp2.to_reg(),
+                        }
+                        .emit(&[], sink, emit_info, state);
+
+                        // Jump to the middle of the jump table.
+                        // We add a 16 byte offset here, since we used 4 instructions
+                        // since the AUIPC that was used to get the PC.
+                        Inst::Jalr {
+                            rd: writable_zero_reg(),
+                            base: tmp1.to_reg(),
+                            offset: Imm12::from_bits((4 * Inst::INSTRUCTION_SIZE) as i16),
+                        }
+                        .emit(&[], sink, emit_info, state);
+
+                        // Emit the jump table.
+                        //
+                        // Each entry is a aupc + jalr to the target block. We also start with a island
+                        // if necessary.
+
+                        // Each entry in the jump table is 2 instructions, so 8 bytes. Check if
+                        // we need to emit a jump table here to support that jump.
+                        let distance = (targets.len() * 2 * Inst::INSTRUCTION_SIZE as usize) as u32;
+                        if sink.island_needed(distance) {
+                            sink.emit_island(&mut state.ctrl_plane);
+                        }
+
+                        // Emit the jumps back to back
+                        for target in targets.iter() {
+                            sink.use_label_at_offset(
+                                sink.cur_offset(),
+                                target.as_label().unwrap(),
+                                LabelUse::PCRel32,
+                            );
+
+                            Inst::construct_auipc_and_jalr(None, tmp2, 0)
+                                .iter()
+                                .for_each(|i| i.emit(&[], sink, emit_info, state));
+                        }
+
+                        // We've just emitted an island that is safe up to *here*.
+                        // Mark it as such so that we don't needlessly emit additional islands.
+                        start_off = sink.cur_offset(); */
+            }
+
+            &Inst::VirtualSPOffsetAdj { amount } => {
+                println!("virtual_sp_offset_adj {amount}");
+                // crate::trace!(
+                //     "virtual sp offset adjusted by {} -> {}",
+                //     amount,
+                //     state.virtual_sp_offset + amount
+                //     );
+                // state.virtual_sp_offset += amount;
+            }
+            &Inst::Atomic {
+                op,
+                rd,
+                addr,
+                src,
+                amo,
+            } => {
+                todo!() /* let addr = allocs.next(addr);
+                        let src = allocs.next(src);
+                        let rd = allocs.next_writable(rd);
+                        let srcloc = state.cur_srcloc();
+                        if !srcloc.is_default() {
+                            sink.add_trap(TrapCode::HeapOutOfBounds);
+                        }
+                        let x = op.op_code()
+                            | reg_to_gpr_num(rd.to_reg()) << 7
+                            | op.funct3() << 12
+                            | reg_to_gpr_num(addr) << 15
+                            | reg_to_gpr_num(src) << 20
+                            | op.funct7(amo) << 25;
+
+                        sink.put4(x); */
+            }
+            &Inst::Fence { pred, succ } => {
+                todo!() /* let x = 0b0001111
+                            | 0b00000 << 7
+                            | 0b000 << 12
+                            | 0b00000 << 15
+                            | (succ as u32) << 20
+                            | (pred as u32) << 24;
+
+                        sink.put4(x); */
+            }
+            &Inst::FenceI => todo!(), // sink.put4(0x0000100f),
+            &Inst::Auipc { rd, imm } => {
+                todo!() /* let rd = allocs.next_writable(rd);
+                        let x = enc_auipc(rd, imm);
+                        sink.put4(x); */
+            }
+
+            &Inst::LoadAddr { rd, mem } => {
+                todo!() /* let mem = mem.with_allocs(&mut allocs);
+                        let rd = allocs.next_writable(rd);
+
+                        let base = mem.get_base_register();
+                        let offset = mem.get_offset_with_state(state);
+                        let offset_imm12 = Imm12::maybe_from_u64(offset as u64);
+
+                        match (mem, base, offset_imm12) {
+                            (_, Some(rs), Some(imm12)) => {
+                                Inst::AluRRImm12 {
+                                    alu_op: AluOPRRI::Addi,
+                                    rd,
+                                    rs,
+                                    imm12,
+                                }
+                                .emit(&[], sink, emit_info, state);
+                            }
+                            (_, Some(rs), None) => {
+                                LoadConstant::U64(offset as u64)
+                                    .load_constant_and_add(rd, rs)
+                                    .into_iter()
+                                    .for_each(|inst| inst.emit(&[], sink, emit_info, state));
+                            }
+                            (AMode::Const(addr), None, _) => {
+                                // Get an address label for the constant and recurse.
+                                let label = sink.get_label_for_constant(addr);
+                                Inst::LoadAddr {
+                                    rd,
+                                    mem: AMode::Label(label),
+                                }
+                                .emit(&[], sink, emit_info, state);
+                            }
+                            (AMode::Label(label), None, _) => {
+                                // Get the current PC.
+                                sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
+                                let inst = Inst::Auipc {
+                                    rd,
+                                    imm: Imm20::from_bits(0),
+                                };
+                                inst.emit(&[], sink, emit_info, state);
+
+                                // Emit an add to the address with a relocation.
+                                // This later gets patched up with the correct offset.
+                                sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
+                                Inst::AluRRImm12 {
+                                    alu_op: AluOPRRI::Addi,
+                                    rd,
+                                    rs: rd.to_reg(),
+                                    imm12: Imm12::zero(),
+                                }
+                                .emit(&[], sink, emit_info, state);
+                            }
+                            (amode, _, _) => {
+                                unimplemented!("LoadAddr: {:?}", amode);
+                            }
+                        } */
+            }
+
+            &Inst::Select {
+                ref dst,
+                condition,
+                ref x,
+                ref y,
+                ty: _ty,
+            } => {
+                todo!() /* let condition = allocs.next(condition);
+                        let x = alloc_value_regs(x, &mut allocs);
+                        let y = alloc_value_regs(y, &mut allocs);
+                        let dst: Vec<_> = dst
+                            .clone()
+                            .into_iter()
+                            .map(|r| allocs.next_writable(r))
+                            .collect();
+
+                        let mut insts = SmallInstVec::new();
+                        let label_false = sink.get_label();
+                        insts.push(Inst::CondBr {
+                            taken: BranchTarget::Label(label_false),
+                            not_taken: BranchTarget::zero(),
+                            kind: IntegerCompare {
+                                kind: IntCC::Equal,
+                                rs1: condition,
+                                rs2: zero_reg(),
+                            },
+                        });
+                        // here is the true
+                        // select the first value
+                        insts.extend(gen_moves(&dst[..], x.regs()));
+                        let label_jump_over = sink.get_label();
+                        insts.push(Inst::Jal {
+                            dest: BranchTarget::Label(label_jump_over),
+                        });
+                        // here is false
+                        insts
+                            .drain(..)
+                            .for_each(|i: Inst| i.emit(&[], sink, emit_info, state));
+                        sink.bind_label(label_false, &mut state.ctrl_plane);
+                        // select second value1
+                        insts.extend(gen_moves(&dst[..], y.regs()));
+                        insts
+                            .into_iter()
+                            .for_each(|i| i.emit(&[], sink, emit_info, state));
+                        sink.bind_label(label_jump_over, &mut state.ctrl_plane); */
+            }
+            &Inst::Jalr { rd, base, offset } => {
+                todo!() /* let rd = allocs.next_writable(rd);
+                        let x = enc_jalr(rd, base, offset);
+                        sink.put4(x); */
+            }
+            &Inst::ECall => {
+                todo!() // sink.put4(0x00000073);
+            }
+            &Inst::EBreak => {
+                todo!() // sink.put4(0x00100073);
+            }
+            &Inst::Icmp {
+                cc,
+                rd,
+                ref a,
+                ref b,
+                ty,
+            } => {
+                let a = alloc_value_regs(a, &mut allocs);
+                let b = alloc_value_regs(b, &mut allocs);
+                let rd = allocs.next_writable(rd);
+
+                let a = a
+                    .only_reg()
+                    .expect("Only support 1 register in comparison now");
+                let b = b
+                    .only_reg()
+                    .expect("Only support 1 register in comparison now");
+                debug_assert_eq!(a, a0());
+                debug_assert_eq!(b, b0());
+
+                let opcode = match cc {
+                    IntCC::Equal => "EQ",
+                    IntCC::NotEqual => "NEQ",
+                    IntCC::SignedLessThan => "SLT",
+                    IntCC::SignedGreaterThanOrEqual => todo!(),
+                    IntCC::SignedGreaterThan => todo!(),
+                    IntCC::SignedLessThanOrEqual => todo!(),
+                    IntCC::UnsignedLessThan => "LT",
+                    IntCC::UnsignedGreaterThanOrEqual => todo!(),
+                    IntCC::UnsignedGreaterThan => todo!(),
+                    IntCC::UnsignedLessThanOrEqual => todo!(),
+                };
+
+                put_string(&format!("$ => {} :{opcode}\n", reg_name(rd.to_reg())), sink);
+
+                /*
+                let label_true = sink.get_label();
+                let label_false = sink.get_label();
+                Inst::lower_br_icmp(
+                    cc,
+                    a,
+                    b,
+                    BranchTarget::Label(label_true),
+                    BranchTarget::Label(label_false),
+                    ty,
+                )
+                .into_iter()
+                .for_each(|i| i.emit(&[], sink, emit_info, state));
+
+                sink.bind_label(label_true, &mut state.ctrl_plane);
+                Inst::load_imm12(rd, Imm12::TRUE).emit(&[], sink, emit_info, state);
+                Inst::Jal {
+                    dest: BranchTarget::offset(Inst::INSTRUCTION_SIZE * 2),
+                }
+                .emit(&[], sink, emit_info, state);
+                sink.bind_label(label_false, &mut state.ctrl_plane);
+                Inst::load_imm12(rd, Imm12::FALSE).emit(&[], sink, emit_info, state); */
+            }
+            &Inst::AtomicCas {
+                offset,
+                t0,
+                dst,
+                e,
+                addr,
+                v,
+                ty,
+            } => {
+                todo!() /* let offset = allocs.next(offset);
+                        let e = allocs.next(e);
+                        let addr = allocs.next(addr);
+                        let v = allocs.next(v);
+                        let t0 = allocs.next_writable(t0);
+                        let dst = allocs.next_writable(dst);
+
+                        //     # addr holds address of memory location
+                        //     # e holds expected value
+                        //     # v holds desired value
+                        //     # dst holds return value
+                        // cas:
+                        //     lr.w dst, (addr)       # Load original value.
+                        //     bne dst, e, fail       # Doesn’t match, so fail.
+                        //     sc.w t0, v, (addr)     # Try to update.
+                        //     bnez t0 , cas          # if store not ok,retry.
+                        // fail:
+                        let fail_label = sink.get_label();
+                        let cas_lebel = sink.get_label();
+                        sink.bind_label(cas_lebel, &mut state.ctrl_plane);
+                        Inst::Atomic {
+                            op: AtomicOP::load_op(ty),
+                            rd: dst,
+                            addr,
+                            src: zero_reg(),
+                            amo: AMO::SeqCst,
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        if ty.bits() < 32 {
+                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
+                                .iter()
+                                .for_each(|i| i.emit(&[], sink, emit_info, state));
+                        } else if ty.bits() == 32 {
+                            Inst::Extend {
+                                rd: dst,
+                                rn: dst.to_reg(),
+                                signed: false,
+                                from_bits: 32,
+                                to_bits: 64,
+                            }
+                            .emit(&[], sink, emit_info, state);
+                        }
+                        Inst::CondBr {
+                            taken: BranchTarget::Label(fail_label),
+                            not_taken: BranchTarget::zero(),
+                            kind: IntegerCompare {
+                                kind: IntCC::NotEqual,
+                                rs1: e,
+                                rs2: dst.to_reg(),
+                            },
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        let store_value = if ty.bits() < 32 {
+                            // reload value to t0.
+                            Inst::Atomic {
+                                op: AtomicOP::load_op(ty),
+                                rd: t0,
+                                addr,
+                                src: zero_reg(),
+                                amo: AMO::SeqCst,
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            // set reset part.
+                            AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty)
+                                .iter()
+                                .for_each(|i| i.emit(&[], sink, emit_info, state));
+                            t0.to_reg()
+                        } else {
+                            v
+                        };
+                        Inst::Atomic {
+                            op: AtomicOP::store_op(ty),
+                            rd: t0,
+                            addr,
+                            src: store_value,
+                            amo: AMO::SeqCst,
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // check is our value stored.
+                        Inst::CondBr {
+                            taken: BranchTarget::Label(cas_lebel),
+                            not_taken: BranchTarget::zero(),
+                            kind: IntegerCompare {
+                                kind: IntCC::NotEqual,
+                                rs1: t0.to_reg(),
+                                rs2: zero_reg(),
+                            },
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        sink.bind_label(fail_label, &mut state.ctrl_plane); */
+            }
+            &Inst::AtomicRmwLoop {
+                offset,
+                op,
+                dst,
+                ty,
+                p,
+                x,
+                t0,
+            } => {
+                todo!() /* let offset = allocs.next(offset);
+                        let p = allocs.next(p);
+                        let x = allocs.next(x);
+                        let t0 = allocs.next_writable(t0);
+                        let dst = allocs.next_writable(dst);
+                        let retry = sink.get_label();
+                        sink.bind_label(retry, &mut state.ctrl_plane);
+                        // load old value.
+                        Inst::Atomic {
+                            op: AtomicOP::load_op(ty),
+                            rd: dst,
+                            addr: p,
+                            src: zero_reg(),
+                            amo: AMO::SeqCst,
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        //
+
+                        let store_value: Reg = match op {
+                            crate::ir::AtomicRmwOp::Add
+                            | crate::ir::AtomicRmwOp::Sub
+                            | crate::ir::AtomicRmwOp::And
+                            | crate::ir::AtomicRmwOp::Or
+                            | crate::ir::AtomicRmwOp::Xor => {
+                                AtomicOP::extract(dst, offset, dst.to_reg(), ty)
+                                    .iter()
+                                    .for_each(|i| i.emit(&[], sink, emit_info, state));
+                                Inst::AluRRR {
+                                    alu_op: match op {
+                                        crate::ir::AtomicRmwOp::Add => AluOPRRR::Add,
+                                        crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub,
+                                        crate::ir::AtomicRmwOp::And => AluOPRRR::And,
+                                        crate::ir::AtomicRmwOp::Or => AluOPRRR::Or,
+                                        crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor,
+                                        _ => unreachable!(),
+                                    },
+                                    rd: t0,
+                                    rs1: dst.to_reg(),
+                                    rs2: x,
+                                }
+                                .emit(&[], sink, emit_info, state);
+                                Inst::Atomic {
+                                    op: AtomicOP::load_op(ty),
+                                    rd: writable_spilltmp_reg2(),
+                                    addr: p,
+                                    src: zero_reg(),
+                                    amo: AMO::SeqCst,
+                                }
+                                .emit(&[], sink, emit_info, state);
+                                AtomicOP::merge(
+                                    writable_spilltmp_reg2(),
+                                    writable_spilltmp_reg(),
+                                    offset,
+                                    t0.to_reg(),
+                                    ty,
+                                )
+                                .iter()
+                                .for_each(|i| i.emit(&[], sink, emit_info, state));
+                                spilltmp_reg2()
+                            }
+                            crate::ir::AtomicRmwOp::Nand => {
+                                if ty.bits() < 32 {
+                                    AtomicOP::extract(dst, offset, dst.to_reg(), ty)
+                                        .iter()
+                                        .for_each(|i| i.emit(&[], sink, emit_info, state));
+                                }
+                                Inst::AluRRR {
+                                    alu_op: AluOPRRR::And,
+                                    rd: t0,
+                                    rs1: x,
+                                    rs2: dst.to_reg(),
+                                }
+                                .emit(&[], sink, emit_info, state);
+                                Inst::construct_bit_not(t0, t0.to_reg()).emit(&[], sink, emit_info, state);
+                                if ty.bits() < 32 {
+                                    Inst::Atomic {
+                                        op: AtomicOP::load_op(ty),
+                                        rd: writable_spilltmp_reg2(),
+                                        addr: p,
+                                        src: zero_reg(),
+                                        amo: AMO::SeqCst,
+                                    }
+                                    .emit(&[], sink, emit_info, state);
+                                    AtomicOP::merge(
+                                        writable_spilltmp_reg2(),
+                                        writable_spilltmp_reg(),
+                                        offset,
+                                        t0.to_reg(),
+                                        ty,
+                                    )
+                                    .iter()
+                                    .for_each(|i| i.emit(&[], sink, emit_info, state));
+                                    spilltmp_reg2()
+                                } else {
+                                    t0.to_reg()
+                                }
+                            }
+
+                            crate::ir::AtomicRmwOp::Umin
+                            | crate::ir::AtomicRmwOp::Umax
+                            | crate::ir::AtomicRmwOp::Smin
+                            | crate::ir::AtomicRmwOp::Smax => {
+                                let label_select_dst = sink.get_label();
+                                let label_select_done = sink.get_label();
+                                if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax
+                                {
+                                    AtomicOP::extract(dst, offset, dst.to_reg(), ty)
+                                } else {
+                                    AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty)
+                                }
+                                .iter()
+                                .for_each(|i| i.emit(&[], sink, emit_info, state));
+                                Inst::lower_br_icmp(
+                                    match op {
+                                        crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan,
+                                        crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan,
+                                        crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan,
+                                        crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan,
+                                        _ => unreachable!(),
+                                    },
+                                    ValueRegs::one(dst.to_reg()),
+                                    ValueRegs::one(x),
+                                    BranchTarget::Label(label_select_dst),
+                                    BranchTarget::zero(),
+                                    ty,
+                                )
+                                .iter()
+                                .for_each(|i| i.emit(&[], sink, emit_info, state));
+                                // here we select x.
+                                Inst::gen_move(t0, x, I64).emit(&[], sink, emit_info, state);
+                                Inst::Jal {
+                                    dest: BranchTarget::Label(label_select_done),
+                                }
+                                .emit(&[], sink, emit_info, state);
+                                sink.bind_label(label_select_dst, &mut state.ctrl_plane);
+                                Inst::gen_move(t0, dst.to_reg(), I64).emit(&[], sink, emit_info, state);
+                                sink.bind_label(label_select_done, &mut state.ctrl_plane);
+                                Inst::Atomic {
+                                    op: AtomicOP::load_op(ty),
+                                    rd: writable_spilltmp_reg2(),
+                                    addr: p,
+                                    src: zero_reg(),
+                                    amo: AMO::SeqCst,
+                                }
+                                .emit(&[], sink, emit_info, state);
+                                AtomicOP::merge(
+                                    writable_spilltmp_reg2(),
+                                    writable_spilltmp_reg(),
+                                    offset,
+                                    t0.to_reg(),
+                                    ty,
+                                )
+                                .iter()
+                                .for_each(|i| i.emit(&[], sink, emit_info, state));
+                                spilltmp_reg2()
+                            }
+                            crate::ir::AtomicRmwOp::Xchg => {
+                                AtomicOP::extract(dst, offset, dst.to_reg(), ty)
+                                    .iter()
+                                    .for_each(|i| i.emit(&[], sink, emit_info, state));
+                                Inst::Atomic {
+                                    op: AtomicOP::load_op(ty),
+                                    rd: writable_spilltmp_reg2(),
+                                    addr: p,
+                                    src: zero_reg(),
+                                    amo: AMO::SeqCst,
+                                }
+                                .emit(&[], sink, emit_info, state);
+                                AtomicOP::merge(
+                                    writable_spilltmp_reg2(),
+                                    writable_spilltmp_reg(),
+                                    offset,
+                                    x,
+                                    ty,
+                                )
+                                .iter()
+                                .for_each(|i| i.emit(&[], sink, emit_info, state));
+                                spilltmp_reg2()
+                            }
+                        };
+
+                        Inst::Atomic {
+                            op: AtomicOP::store_op(ty),
+                            rd: t0,
+                            addr: p,
+                            src: store_value,
+                            amo: AMO::SeqCst,
+                        }
+                        .emit(&[], sink, emit_info, state);
+
+                        // if store is not ok,retry.
+                        Inst::CondBr {
+                            taken: BranchTarget::Label(retry),
+                            not_taken: BranchTarget::zero(),
+                            kind: IntegerCompare {
+                                kind: IntCC::NotEqual,
+                                rs1: t0.to_reg(),
+                                rs2: zero_reg(),
+                            },
+                        }
+                        .emit(&[], sink, emit_info, state); */
+            }
+
+            &Inst::IntSelect {
+                op,
+                ref dst,
+                ref x,
+                ref y,
+                ty,
+            } => {
+                todo!() /* let x = alloc_value_regs(x, &mut allocs);
+                        let y = alloc_value_regs(y, &mut allocs);
+                        let dst: Vec<_> = dst.iter().map(|r| allocs.next_writable(*r)).collect();
+                        let label_true = sink.get_label();
+                        let label_false = sink.get_label();
+                        let label_done = sink.get_label();
+                        Inst::lower_br_icmp(
+                            op.to_int_cc(),
+                            x,
+                            y,
+                            BranchTarget::Label(label_true),
+                            BranchTarget::Label(label_false),
+                            ty,
+                        )
+                        .into_iter()
+                        .for_each(|i| i.emit(&[], sink, emit_info, state));
+
+                        let gen_move = |dst: &Vec<Writable<Reg>>,
+                                        val: &ValueRegs<Reg>,
+                                        sink: &mut MachBuffer<Inst>,
+                                        state: &mut EmitState| {
+                            let mut insts = SmallInstVec::new();
+                            insts.push(Inst::Mov {
+                                rd: dst[0],
+                                rm: val.regs()[0],
+                                ty: I64,
+                            });
+                            if ty.bits() == 128 {
+                                insts.push(Inst::Mov {
+                                    rd: dst[1],
+                                    rm: val.regs()[1],
+                                    ty,
+                                });
+                            }
+                            insts
+                                .into_iter()
+                                .for_each(|i| i.emit(&[], sink, emit_info, state));
+                        };
+                        //here is true , use x.
+                        sink.bind_label(label_true, &mut state.ctrl_plane);
+                        gen_move(&dst, &x, sink, state);
+                        Inst::gen_jump(label_done).emit(&[], sink, emit_info, state);
+                        // here is false use y
+                        sink.bind_label(label_false, &mut state.ctrl_plane);
+                        gen_move(&dst, &y, sink, state);
+                        sink.bind_label(label_done, &mut state.ctrl_plane); */
+            }
+
+            &Inst::SelectReg {
+                condition,
+                rd,
+                rs1,
+                rs2,
+            } => {
+                todo!() /* let mut condition = condition.clone();
+                        condition.rs1 = allocs.next(condition.rs1);
+                        condition.rs2 = allocs.next(condition.rs2);
+                        let rs1 = allocs.next(rs1);
+                        let rs2 = allocs.next(rs2);
+                        let rd = allocs.next_writable(rd);
+                        let label_true = sink.get_label();
+                        let label_jump_over = sink.get_label();
+                        let ty = Inst::canonical_type_for_rc(rs1.class());
+
+                        sink.use_label_at_offset(sink.cur_offset(), label_true, LabelUse::B12);
+                        let x = condition.emit();
+                        sink.put4(x);
+                        // here is false , use rs2
+                        Inst::gen_move(rd, rs2, ty).emit(&[], sink, emit_info, state);
+                        // and jump over
+                        Inst::Jal {
+                            dest: BranchTarget::Label(label_jump_over),
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // here condition is true , use rs1
+                        sink.bind_label(label_true, &mut state.ctrl_plane);
+                        Inst::gen_move(rd, rs1, ty).emit(&[], sink, emit_info, state);
+                        sink.bind_label(label_jump_over, &mut state.ctrl_plane); */
+            }
+            &Inst::FcvtToInt {
+                is_sat,
+                rd,
+                rs,
+                is_signed,
+                in_type,
+                out_type,
+                tmp,
+            } => {
+                todo!() /* let rs = allocs.next(rs);
+                        let tmp = allocs.next_writable(tmp);
+                        let rd = allocs.next_writable(rd);
+                        let label_nan = sink.get_label();
+                        let label_jump_over = sink.get_label();
+                        // get if nan.
+                        Inst::emit_not_nan(rd, rs, in_type).emit(&[], sink, emit_info, state);
+                        // jump to nan.
+                        Inst::CondBr {
+                            taken: BranchTarget::Label(label_nan),
+                            not_taken: BranchTarget::zero(),
+                            kind: IntegerCompare {
+                                kind: IntCC::Equal,
+                                rs2: zero_reg(),
+                                rs1: rd.to_reg(),
+                            },
+                        }
+                        .emit(&[], sink, emit_info, state);
+
+                        if !is_sat {
+                            let f32_bounds = f32_cvt_to_int_bounds(is_signed, out_type.bits() as u8);
+                            let f64_bounds = f64_cvt_to_int_bounds(is_signed, out_type.bits() as u8);
+                            if in_type == F32 {
+                                Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.0), |_| {
+                                    writable_spilltmp_reg()
+                                })
+                            } else {
+                                Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.0), |_| {
+                                    writable_spilltmp_reg()
+                                })
+                            }
+                            .iter()
+                            .for_each(|i| i.emit(&[], sink, emit_info, state));
+
+                            let le_op = if in_type == F32 {
+                                FpuOPRRR::FleS
+                            } else {
+                                FpuOPRRR::FleD
+                            };
+
+                            // rd := rs <= tmp
+                            Inst::FpuRRR {
+                                alu_op: le_op,
+                                frm: None,
+                                rd,
+                                rs1: rs,
+                                rs2: tmp.to_reg(),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            Inst::TrapIf {
+                                test: rd.to_reg(),
+                                trap_code: TrapCode::IntegerOverflow,
+                            }
+                            .emit(&[], sink, emit_info, state);
+
+                            if in_type == F32 {
+                                Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.1), |_| {
+                                    writable_spilltmp_reg()
+                                })
+                            } else {
+                                Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.1), |_| {
+                                    writable_spilltmp_reg()
+                                })
+                            }
+                            .iter()
+                            .for_each(|i| i.emit(&[], sink, emit_info, state));
+
+                            // rd := rs >= tmp
+                            Inst::FpuRRR {
+                                alu_op: le_op,
+                                frm: None,
+                                rd,
+                                rs1: tmp.to_reg(),
+                                rs2: rs,
+                            }
+                            .emit(&[], sink, emit_info, state);
+
+                            Inst::TrapIf {
+                                test: rd.to_reg(),
+                                trap_code: TrapCode::IntegerOverflow,
+                            }
+                            .emit(&[], sink, emit_info, state);
+                        }
+                        // convert to int normally.
+                        Inst::FpuRR {
+                            frm: Some(FRM::RTZ),
+                            alu_op: FpuOPRR::float_convert_2_int_op(in_type, is_signed, out_type),
+                            rd,
+                            rs,
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        if out_type.bits() < 32 && is_signed {
+                            // load value part mask.
+                            Inst::load_constant_u32(
+                                writable_spilltmp_reg(),
+                                if 16 == out_type.bits() {
+                                    (u16::MAX >> 1) as u64
+                                } else {
+                                    // I8
+                                    (u8::MAX >> 1) as u64
+                                },
+                                &mut |_| writable_spilltmp_reg2(),
+                            )
+                            .into_iter()
+                            .for_each(|x| x.emit(&[], sink, emit_info, state));
+                            // keep value part.
+                            Inst::AluRRR {
+                                alu_op: AluOPRRR::And,
+                                rd: writable_spilltmp_reg(),
+                                rs1: rd.to_reg(),
+                                rs2: spilltmp_reg(),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            // extact sign bit.
+                            Inst::AluRRImm12 {
+                                alu_op: AluOPRRI::Srli,
+                                rd: rd,
+                                rs: rd.to_reg(),
+                                imm12: Imm12::from_bits(31),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            Inst::AluRRImm12 {
+                                alu_op: AluOPRRI::Slli,
+                                rd: rd,
+                                rs: rd.to_reg(),
+                                imm12: Imm12::from_bits(if 16 == out_type.bits() {
+                                    15
+                                } else {
+                                    // I8
+                                    7
+                                }),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            // make result,sign bit and value part.
+                            Inst::AluRRR {
+                                alu_op: AluOPRRR::Or,
+                                rd: rd,
+                                rs1: rd.to_reg(),
+                                rs2: spilltmp_reg(),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                        }
+
+                        // I already have the result,jump over.
+                        Inst::Jal {
+                            dest: BranchTarget::Label(label_jump_over),
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // here is nan , move 0 into rd register
+                        sink.bind_label(label_nan, &mut state.ctrl_plane);
+                        if is_sat {
+                            Inst::load_imm12(rd, Imm12::from_bits(0)).emit(&[], sink, emit_info, state);
+                        } else {
+                            // here is ud2.
+                            Inst::Udf {
+                                trap_code: TrapCode::BadConversionToInteger,
+                            }
+                            .emit(&[], sink, emit_info, state);
+                        }
+                        // bind jump_over
+                        sink.bind_label(label_jump_over, &mut state.ctrl_plane); */
+            }
+
+            &Inst::LoadExtName {
+                rd,
+                ref name,
+                offset,
+            } => {
+                // dbg!(rd, name, offset);
+                // let rd = allocs.next_writable(rd);
+                // put_string(&format!("CALL {name:?} => {}\n", reg_name(rd.to_reg())), sink);
+
+                /*
+                // get the current pc.
+                Inst::Auipc {
+                    rd: rd,
+                    imm: Imm20::from_bits(0),
+                }
+                .emit(&[], sink, emit_info, state);
+                // load the value.
+                Inst::Load {
+                    rd: rd,
+                    op: LoadOP::Ld,
+                    flags: MemFlags::trusted(),
+                    from: AMode::RegOffset(
+                        rd.to_reg(),
+                        12, // auipc load and jal.
+                        I64,
+                    ),
+                }
+                .emit(&[], sink, emit_info, state);
+                // jump over.
+                Inst::Jal {
+                    // jal and abs8 size for 12.
+                    dest: BranchTarget::offset(12),
+                }
+                .emit(&[], sink, emit_info, state);
+
+                sink.add_reloc(Reloc::Abs8, name.as_ref(), offset);
+                sink.put8(0); */
+            }
+            &Inst::TrapIfC {
+                rs1,
+                rs2,
+                cc,
+                trap_code,
+            } => {
+                todo!() /* let rs1 = allocs.next(rs1);
+                        let rs2 = allocs.next(rs2);
+                        let label_trap = sink.get_label();
+                        let label_jump_over = sink.get_label();
+                        Inst::CondBr {
+                            taken: BranchTarget::Label(label_trap),
+                            not_taken: BranchTarget::Label(label_jump_over),
+                            kind: IntegerCompare { kind: cc, rs1, rs2 },
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // trap
+                        sink.bind_label(label_trap, &mut state.ctrl_plane);
+                        Inst::Udf { trap_code }.emit(&[], sink, emit_info, state);
+                        sink.bind_label(label_jump_over, &mut state.ctrl_plane); */
+            }
+            &Inst::TrapIf { test, trap_code } => {
+                todo!() /* let test = allocs.next(test);
+                        let label_trap = sink.get_label();
+                        let label_jump_over = sink.get_label();
+                        Inst::CondBr {
+                            taken: BranchTarget::Label(label_trap),
+                            not_taken: BranchTarget::Label(label_jump_over),
+                            kind: IntegerCompare {
+                                kind: IntCC::NotEqual,
+                                rs1: test,
+                                rs2: zero_reg(),
+                            },
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // trap
+                        sink.bind_label(label_trap, &mut state.ctrl_plane);
+                        Inst::Udf {
+                            trap_code: trap_code,
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        sink.bind_label(label_jump_over, &mut state.ctrl_plane); */
+            }
+            &Inst::Udf { trap_code } => {
+                todo!() /* sink.add_trap(trap_code);
+                        if let Some(s) = state.take_stack_map() {
+                            sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
+                        }
+                        sink.put_data(Inst::TRAP_OPCODE); */
+            }
+            &Inst::AtomicLoad { rd, ty, p } => {
+                todo!() /* let p = allocs.next(p);
+                        let rd = allocs.next_writable(rd);
+                        // emit the fence.
+                        Inst::Fence {
+                            pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
+                            succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // load.
+                        Inst::Load {
+                            rd: rd,
+                            op: LoadOP::from_type(ty),
+                            flags: MemFlags::new(),
+                            from: AMode::RegOffset(p, 0, ty),
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        Inst::Fence {
+                            pred: Inst::FENCE_REQ_R,
+                            succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
+                        }
+                        .emit(&[], sink, emit_info, state); */
+            }
+            &Inst::AtomicStore { src, ty, p } => {
+                todo!() /* let src = allocs.next(src);
+                        let p = allocs.next(p);
+                        Inst::Fence {
+                            pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
+                            succ: Inst::FENCE_REQ_W,
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        Inst::Store {
+                            to: AMode::RegOffset(p, 0, ty),
+                            op: StoreOP::from_type(ty),
+                            flags: MemFlags::new(),
+                            src,
+                        }
+                        .emit(&[], sink, emit_info, state); */
+            }
+            &Inst::FloatRound {
+                op,
+                rd,
+                int_tmp,
+                f_tmp,
+                rs,
+                ty,
+            } => {
+                todo!() /* // this code is port from glibc ceil floor ... implementation.
+                        let rs = allocs.next(rs);
+                        let int_tmp = allocs.next_writable(int_tmp);
+                        let f_tmp = allocs.next_writable(f_tmp);
+                        let rd = allocs.next_writable(rd);
+                        let label_nan = sink.get_label();
+                        let label_x = sink.get_label();
+                        let label_jump_over = sink.get_label();
+                        // check if is nan.
+                        Inst::emit_not_nan(int_tmp, rs, ty).emit(&[], sink, emit_info, state);
+                        Inst::CondBr {
+                            taken: BranchTarget::Label(label_nan),
+                            not_taken: BranchTarget::zero(),
+                            kind: IntegerCompare {
+                                kind: IntCC::Equal,
+                                rs1: int_tmp.to_reg(),
+                                rs2: zero_reg(),
+                            },
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        fn max_value_need_round(ty: Type) -> u64 {
+                            match ty {
+                                F32 => {
+                                    let x: u64 = 1 << f32::MANTISSA_DIGITS;
+                                    let x = x as f32;
+                                    let x = u32::from_le_bytes(x.to_le_bytes());
+                                    x as u64
+                                }
+                                F64 => {
+                                    let x: u64 = 1 << f64::MANTISSA_DIGITS;
+                                    let x = x as f64;
+                                    u64::from_le_bytes(x.to_le_bytes())
+                                }
+                                _ => unreachable!(),
+                            }
+                        }
+                        // load max value need to round.
+                        if ty == F32 {
+                            Inst::load_fp_constant32(f_tmp, max_value_need_round(ty) as u32, &mut |_| {
+                                writable_spilltmp_reg()
+                            })
+                        } else {
+                            Inst::load_fp_constant64(f_tmp, max_value_need_round(ty), &mut |_| {
+                                writable_spilltmp_reg()
+                            })
+                        }
+                        .into_iter()
+                        .for_each(|i| i.emit(&[], sink, emit_info, state));
+
+                        // get abs value.
+                        Inst::emit_fabs(rd, rs, ty).emit(&[], sink, emit_info, state);
+
+                        // branch if f_tmp < rd
+                        Inst::FpuRRR {
+                            frm: None,
+                            alu_op: if ty == F32 {
+                                FpuOPRRR::FltS
+                            } else {
+                                FpuOPRRR::FltD
+                            },
+                            rd: int_tmp,
+                            rs1: f_tmp.to_reg(),
+                            rs2: rd.to_reg(),
+                        }
+                        .emit(&[], sink, emit_info, state);
+
+                        Inst::CondBr {
+                            taken: BranchTarget::Label(label_x),
+                            not_taken: BranchTarget::zero(),
+                            kind: IntegerCompare {
+                                kind: IntCC::NotEqual,
+                                rs1: int_tmp.to_reg(),
+                                rs2: zero_reg(),
+                            },
+                        }
+                        .emit(&[], sink, emit_info, state);
+
+                        //convert to int.
+                        Inst::FpuRR {
+                            alu_op: FpuOPRR::float_convert_2_int_op(ty, true, I64),
+                            frm: Some(op.to_frm()),
+                            rd: int_tmp,
+                            rs: rs,
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        //convert back.
+                        Inst::FpuRR {
+                            alu_op: FpuOPRR::int_convert_2_float_op(I64, true, ty),
+                            frm: Some(op.to_frm()),
+                            rd,
+                            rs: int_tmp.to_reg(),
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // copy sign.
+                        Inst::FpuRRR {
+                            alu_op: if ty == F32 {
+                                FpuOPRRR::FsgnjS
+                            } else {
+                                FpuOPRRR::FsgnjD
+                            },
+                            frm: None,
+                            rd,
+                            rs1: rd.to_reg(),
+                            rs2: rs,
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // jump over.
+                        Inst::Jal {
+                            dest: BranchTarget::Label(label_jump_over),
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // here is nan.
+                        sink.bind_label(label_nan, &mut state.ctrl_plane);
+                        Inst::FpuRRR {
+                            alu_op: if ty == F32 {
+                                FpuOPRRR::FaddS
+                            } else {
+                                FpuOPRRR::FaddD
+                            },
+                            frm: None,
+                            rd: rd,
+                            rs1: rs,
+                            rs2: rs,
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        Inst::Jal {
+                            dest: BranchTarget::Label(label_jump_over),
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // here select origin x.
+                        sink.bind_label(label_x, &mut state.ctrl_plane);
+                        Inst::gen_move(rd, rs, ty).emit(&[], sink, emit_info, state);
+                        sink.bind_label(label_jump_over, &mut state.ctrl_plane); */
+            }
+
+            &Inst::FloatSelect {
+                op,
+                rd,
+                tmp,
+                rs1,
+                rs2,
+                ty,
+            } => {
+                todo!() /* let rs1 = allocs.next(rs1);
+                        let rs2 = allocs.next(rs2);
+                        let tmp = allocs.next_writable(tmp);
+                        let rd = allocs.next_writable(rd);
+                        let label_nan = sink.get_label();
+                        let label_jump_over = sink.get_label();
+                        // check if rs1 is nan.
+                        Inst::emit_not_nan(tmp, rs1, ty).emit(&[], sink, emit_info, state);
+                        Inst::CondBr {
+                            taken: BranchTarget::Label(label_nan),
+                            not_taken: BranchTarget::zero(),
+                            kind: IntegerCompare {
+                                kind: IntCC::Equal,
+                                rs1: tmp.to_reg(),
+                                rs2: zero_reg(),
+                            },
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // check if rs2 is nan.
+                        Inst::emit_not_nan(tmp, rs2, ty).emit(&[], sink, emit_info, state);
+                        Inst::CondBr {
+                            taken: BranchTarget::Label(label_nan),
+                            not_taken: BranchTarget::zero(),
+                            kind: IntegerCompare {
+                                kind: IntCC::Equal,
+                                rs1: tmp.to_reg(),
+                                rs2: zero_reg(),
+                            },
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // here rs1 and rs2 is not nan.
+                        Inst::FpuRRR {
+                            alu_op: op.to_fpuoprrr(ty),
+                            frm: None,
+                            rd: rd,
+                            rs1: rs1,
+                            rs2: rs2,
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // special handle for +0 or -0.
+                        {
+                            // check is rs1 and rs2 all equal to zero.
+                            let label_done = sink.get_label();
+                            {
+                                // if rs1 == 0
+                                let mut insts = Inst::emit_if_float_not_zero(
+                                    tmp,
+                                    rs1,
+                                    ty,
+                                    BranchTarget::Label(label_done),
+                                    BranchTarget::zero(),
+                                );
+                                insts.extend(Inst::emit_if_float_not_zero(
+                                    tmp,
+                                    rs2,
+                                    ty,
+                                    BranchTarget::Label(label_done),
+                                    BranchTarget::zero(),
+                                ));
+                                insts
+                                    .iter()
+                                    .for_each(|i| i.emit(&[], sink, emit_info, state));
+                            }
+                            Inst::FpuRR {
+                                alu_op: FpuOPRR::move_f_to_x_op(ty),
+                                frm: None,
+                                rd: tmp,
+                                rs: rs1,
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            Inst::FpuRR {
+                                alu_op: FpuOPRR::move_f_to_x_op(ty),
+                                frm: None,
+                                rd: writable_spilltmp_reg(),
+                                rs: rs2,
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            Inst::AluRRR {
+                                alu_op: if op == FloatSelectOP::Max {
+                                    AluOPRRR::And
+                                } else {
+                                    AluOPRRR::Or
+                                },
+                                rd: tmp,
+                                rs1: tmp.to_reg(),
+                                rs2: spilltmp_reg(),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            // move back to rd.
+                            Inst::FpuRR {
+                                alu_op: FpuOPRR::move_x_to_f_op(ty),
+                                frm: None,
+                                rd,
+                                rs: tmp.to_reg(),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            //
+                            sink.bind_label(label_done, &mut state.ctrl_plane);
+                        }
+                        // we have the reuslt,jump over.
+                        Inst::Jal {
+                            dest: BranchTarget::Label(label_jump_over),
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // here is nan.
+                        sink.bind_label(label_nan, &mut state.ctrl_plane);
+                        op.snan_bits(tmp, ty)
+                            .into_iter()
+                            .for_each(|i| i.emit(&[], sink, emit_info, state));
+                        // move to rd.
+                        Inst::FpuRR {
+                            alu_op: FpuOPRR::move_x_to_f_op(ty),
+                            frm: None,
+                            rd,
+                            rs: tmp.to_reg(),
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        sink.bind_label(label_jump_over, &mut state.ctrl_plane); */
+            }
+            &Inst::Popcnt {
+                sum,
+                tmp,
+                step,
+                rs,
+                ty,
+            } => {
+                todo!() /* let rs = allocs.next(rs);
+                        let tmp = allocs.next_writable(tmp);
+                        let step = allocs.next_writable(step);
+                        let sum = allocs.next_writable(sum);
+                        // load 0 to sum , init.
+                        Inst::gen_move(sum, zero_reg(), I64).emit(&[], sink, emit_info, state);
+                        // load
+                        Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit(
+                            &[],
+                            sink,
+                            emit_info,
+                            state,
+                        );
+                        //
+                        Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state);
+                        Inst::AluRRImm12 {
+                            alu_op: AluOPRRI::Slli,
+                            rd: tmp,
+                            rs: tmp.to_reg(),
+                            imm12: Imm12::from_bits((ty.bits() - 1) as i16),
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        let label_done = sink.get_label();
+                        let label_loop = sink.get_label();
+                        sink.bind_label(label_loop, &mut state.ctrl_plane);
+                        Inst::CondBr {
+                            taken: BranchTarget::Label(label_done),
+                            not_taken: BranchTarget::zero(),
+                            kind: IntegerCompare {
+                                kind: IntCC::SignedLessThanOrEqual,
+                                rs1: step.to_reg(),
+                                rs2: zero_reg(),
+                            },
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // test and add sum.
+                        {
+                            Inst::AluRRR {
+                                alu_op: AluOPRRR::And,
+                                rd: writable_spilltmp_reg2(),
+                                rs1: tmp.to_reg(),
+                                rs2: rs,
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            let label_over = sink.get_label();
+                            Inst::CondBr {
+                                taken: BranchTarget::Label(label_over),
+                                not_taken: BranchTarget::zero(),
+                                kind: IntegerCompare {
+                                    kind: IntCC::Equal,
+                                    rs1: zero_reg(),
+                                    rs2: spilltmp_reg2(),
+                                },
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            Inst::AluRRImm12 {
+                                alu_op: AluOPRRI::Addi,
+                                rd: sum,
+                                rs: sum.to_reg(),
+                                imm12: Imm12::from_bits(1),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            sink.bind_label(label_over, &mut state.ctrl_plane);
+                        }
+                        // set step and tmp.
+                        {
+                            Inst::AluRRImm12 {
+                                alu_op: AluOPRRI::Addi,
+                                rd: step,
+                                rs: step.to_reg(),
+                                imm12: Imm12::from_bits(-1),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            Inst::AluRRImm12 {
+                                alu_op: AluOPRRI::Srli,
+                                rd: tmp,
+                                rs: tmp.to_reg(),
+                                imm12: Imm12::from_bits(1),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            Inst::Jal {
+                                dest: BranchTarget::Label(label_loop),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                        }
+                        sink.bind_label(label_done, &mut state.ctrl_plane); */
+            }
+            &Inst::Rev8 { rs, rd, tmp, step } => {
+                todo!() /* let rs = allocs.next(rs);
+                        let tmp = allocs.next_writable(tmp);
+                        let step = allocs.next_writable(step);
+                        let rd = allocs.next_writable(rd);
+                        // init.
+                        Inst::gen_move(rd, zero_reg(), I64).emit(&[], sink, emit_info, state);
+                        Inst::gen_move(tmp, rs, I64).emit(&[], sink, emit_info, state);
+                        // load 56 to step.
+                        Inst::load_imm12(step, Imm12::from_bits(56)).emit(&[], sink, emit_info, state);
+                        let label_done = sink.get_label();
+                        let label_loop = sink.get_label();
+                        sink.bind_label(label_loop, &mut state.ctrl_plane);
+                        Inst::CondBr {
+                            taken: BranchTarget::Label(label_done),
+                            not_taken: BranchTarget::zero(),
+                            kind: IntegerCompare {
+                                kind: IntCC::SignedLessThan,
+                                rs1: step.to_reg(),
+                                rs2: zero_reg(),
+                            },
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        Inst::AluRRImm12 {
+                            alu_op: AluOPRRI::Andi,
+                            rd: writable_spilltmp_reg(),
+                            rs: tmp.to_reg(),
+                            imm12: Imm12::from_bits(255),
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        Inst::AluRRR {
+                            alu_op: AluOPRRR::Sll,
+                            rd: writable_spilltmp_reg(),
+                            rs1: spilltmp_reg(),
+                            rs2: step.to_reg(),
+                        }
+                        .emit(&[], sink, emit_info, state);
+
+                        Inst::AluRRR {
+                            alu_op: AluOPRRR::Or,
+                            rd: rd,
+                            rs1: rd.to_reg(),
+                            rs2: spilltmp_reg(),
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        {
+                            // reset step
+                            Inst::AluRRImm12 {
+                                alu_op: AluOPRRI::Addi,
+                                rd: step,
+                                rs: step.to_reg(),
+                                imm12: Imm12::from_bits(-8),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            //reset tmp.
+                            Inst::AluRRImm12 {
+                                alu_op: AluOPRRI::Srli,
+                                rd: tmp,
+                                rs: tmp.to_reg(),
+                                imm12: Imm12::from_bits(8),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            // loop.
+                            Inst::Jal {
+                                dest: BranchTarget::Label(label_loop),
+                            }
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        sink.bind_label(label_done, &mut state.ctrl_plane); */
+            }
+            &Inst::Cltz {
+                sum,
+                tmp,
+                step,
+                rs,
+                leading,
+                ty,
+            } => {
+                todo!() /* let rs = allocs.next(rs);
+                        let tmp = allocs.next_writable(tmp);
+                        let step = allocs.next_writable(step);
+                        let sum = allocs.next_writable(sum);
+                        // load 0 to sum , init.
+                        Inst::gen_move(sum, zero_reg(), I64).emit(&[], sink, emit_info, state);
+                        // load
+                        Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit(
+                            &[],
+                            sink,
+                            emit_info,
+                            state,
+                        );
+                        //
+                        Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state);
+                        if leading {
+                            Inst::AluRRImm12 {
+                                alu_op: AluOPRRI::Slli,
+                                rd: tmp,
+                                rs: tmp.to_reg(),
+                                imm12: Imm12::from_bits((ty.bits() - 1) as i16),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                        }
+                        let label_done = sink.get_label();
+                        let label_loop = sink.get_label();
+                        sink.bind_label(label_loop, &mut state.ctrl_plane);
+                        Inst::CondBr {
+                            taken: BranchTarget::Label(label_done),
+                            not_taken: BranchTarget::zero(),
+                            kind: IntegerCompare {
+                                kind: IntCC::SignedLessThanOrEqual,
+                                rs1: step.to_reg(),
+                                rs2: zero_reg(),
+                            },
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // test and add sum.
+                        {
+                            Inst::AluRRR {
+                                alu_op: AluOPRRR::And,
+                                rd: writable_spilltmp_reg2(),
+                                rs1: tmp.to_reg(),
+                                rs2: rs,
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            Inst::CondBr {
+                                taken: BranchTarget::Label(label_done),
+                                not_taken: BranchTarget::zero(),
+                                kind: IntegerCompare {
+                                    kind: IntCC::NotEqual,
+                                    rs1: zero_reg(),
+                                    rs2: spilltmp_reg2(),
+                                },
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            Inst::AluRRImm12 {
+                                alu_op: AluOPRRI::Addi,
+                                rd: sum,
+                                rs: sum.to_reg(),
+                                imm12: Imm12::from_bits(1),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                        }
+                        // set step and tmp.
+                        {
+                            Inst::AluRRImm12 {
+                                alu_op: AluOPRRI::Addi,
+                                rd: step,
+                                rs: step.to_reg(),
+                                imm12: Imm12::from_bits(-1),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            Inst::AluRRImm12 {
+                                alu_op: if leading {
+                                    AluOPRRI::Srli
+                                } else {
+                                    AluOPRRI::Slli
+                                },
+                                rd: tmp,
+                                rs: tmp.to_reg(),
+                                imm12: Imm12::from_bits(1),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            Inst::Jal {
+                                dest: BranchTarget::Label(label_loop),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                        }
+                        sink.bind_label(label_done, &mut state.ctrl_plane); */
+            }
+            &Inst::Brev8 {
+                rs,
+                ty,
+                step,
+                tmp,
+                tmp2,
+                rd,
+            } => {
+                todo!() /* let rs = allocs.next(rs);
+                        let step = allocs.next_writable(step);
+                        let tmp = allocs.next_writable(tmp);
+                        let tmp2 = allocs.next_writable(tmp2);
+                        let rd = allocs.next_writable(rd);
+                        Inst::gen_move(rd, zero_reg(), I64).emit(&[], sink, emit_info, state);
+                        Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit(
+                            &[],
+                            sink,
+                            emit_info,
+                            state,
+                        );
+                        //
+                        Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state);
+                        Inst::AluRRImm12 {
+                            alu_op: AluOPRRI::Slli,
+                            rd: tmp,
+                            rs: tmp.to_reg(),
+                            imm12: Imm12::from_bits((ty.bits() - 1) as i16),
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        Inst::load_imm12(tmp2, Imm12::from_bits(1)).emit(&[], sink, emit_info, state);
+                        Inst::AluRRImm12 {
+                            alu_op: AluOPRRI::Slli,
+                            rd: tmp2,
+                            rs: tmp2.to_reg(),
+                            imm12: Imm12::from_bits((ty.bits() - 8) as i16),
+                        }
+                        .emit(&[], sink, emit_info, state);
+
+                        let label_done = sink.get_label();
+                        let label_loop = sink.get_label();
+                        sink.bind_label(label_loop, &mut state.ctrl_plane);
+                        Inst::CondBr {
+                            taken: BranchTarget::Label(label_done),
+                            not_taken: BranchTarget::zero(),
+                            kind: IntegerCompare {
+                                kind: IntCC::SignedLessThanOrEqual,
+                                rs1: step.to_reg(),
+                                rs2: zero_reg(),
+                            },
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // test and set bit.
+                        {
+                            Inst::AluRRR {
+                                alu_op: AluOPRRR::And,
+                                rd: writable_spilltmp_reg2(),
+                                rs1: tmp.to_reg(),
+                                rs2: rs,
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            let label_over = sink.get_label();
+                            Inst::CondBr {
+                                taken: BranchTarget::Label(label_over),
+                                not_taken: BranchTarget::zero(),
+                                kind: IntegerCompare {
+                                    kind: IntCC::Equal,
+                                    rs1: zero_reg(),
+                                    rs2: spilltmp_reg2(),
+                                },
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            Inst::AluRRR {
+                                alu_op: AluOPRRR::Or,
+                                rd: rd,
+                                rs1: rd.to_reg(),
+                                rs2: tmp2.to_reg(),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            sink.bind_label(label_over, &mut state.ctrl_plane);
+                        }
+                        // set step and tmp.
+                        {
+                            Inst::AluRRImm12 {
+                                alu_op: AluOPRRI::Addi,
+                                rd: step,
+                                rs: step.to_reg(),
+                                imm12: Imm12::from_bits(-1),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            Inst::AluRRImm12 {
+                                alu_op: AluOPRRI::Srli,
+                                rd: tmp,
+                                rs: tmp.to_reg(),
+                                imm12: Imm12::from_bits(1),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                            {
+                                // reset tmp2
+                                // if (step %=8 == 0) then tmp2 = tmp2 >> 15
+                                // if (step %=8 != 0) then tmp2 = tmp2 << 1
+                                let label_over = sink.get_label();
+                                let label_sll_1 = sink.get_label();
+                                Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_bits(8)).emit(
+                                    &[],
+                                    sink,
+                                    emit_info,
+                                    state,
+                                );
+                                Inst::AluRRR {
+                                    alu_op: AluOPRRR::Rem,
+                                    rd: writable_spilltmp_reg2(),
+                                    rs1: step.to_reg(),
+                                    rs2: spilltmp_reg2(),
+                                }
+                                .emit(&[], sink, emit_info, state);
+                                Inst::CondBr {
+                                    taken: BranchTarget::Label(label_sll_1),
+                                    not_taken: BranchTarget::zero(),
+                                    kind: IntegerCompare {
+                                        kind: IntCC::NotEqual,
+                                        rs1: spilltmp_reg2(),
+                                        rs2: zero_reg(),
+                                    },
+                                }
+                                .emit(&[], sink, emit_info, state);
+                                Inst::AluRRImm12 {
+                                    alu_op: AluOPRRI::Srli,
+                                    rd: tmp2,
+                                    rs: tmp2.to_reg(),
+                                    imm12: Imm12::from_bits(15),
+                                }
+                                .emit(&[], sink, emit_info, state);
+                                Inst::Jal {
+                                    dest: BranchTarget::Label(label_over),
+                                }
+                                .emit(&[], sink, emit_info, state);
+                                sink.bind_label(label_sll_1, &mut state.ctrl_plane);
+                                Inst::AluRRImm12 {
+                                    alu_op: AluOPRRI::Slli,
+                                    rd: tmp2,
+                                    rs: tmp2.to_reg(),
+                                    imm12: Imm12::from_bits(1),
+                                }
+                                .emit(&[], sink, emit_info, state);
+                                sink.bind_label(label_over, &mut state.ctrl_plane);
+                            }
+                            Inst::Jal {
+                                dest: BranchTarget::Label(label_loop),
+                            }
+                            .emit(&[], sink, emit_info, state);
+                        }
+                        sink.bind_label(label_done, &mut state.ctrl_plane); */
+            }
+            &Inst::StackProbeLoop {
+                guard_size,
+                probe_count,
+                tmp: guard_size_tmp,
+            } => {
+                todo!() /* let step = writable_spilltmp_reg();
+                        Inst::load_constant_u64(
+                            step,
+                            (guard_size as u64) * (probe_count as u64),
+                            &mut |_| step,
+                        )
+                        .iter()
+                        .for_each(|i| i.emit(&[], sink, emit_info, state));
+                        Inst::load_constant_u64(guard_size_tmp, guard_size as u64, &mut |_| guard_size_tmp)
+                            .iter()
+                            .for_each(|i| i.emit(&[], sink, emit_info, state));
+
+                        let loop_start = sink.get_label();
+                        let label_done = sink.get_label();
+                        sink.bind_label(loop_start, &mut state.ctrl_plane);
+                        Inst::CondBr {
+                            taken: BranchTarget::Label(label_done),
+                            not_taken: BranchTarget::zero(),
+                            kind: IntegerCompare {
+                                kind: IntCC::UnsignedLessThanOrEqual,
+                                rs1: step.to_reg(),
+                                rs2: guard_size_tmp.to_reg(),
+                            },
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // compute address.
+                        Inst::AluRRR {
+                            alu_op: AluOPRRR::Sub,
+                            rd: writable_spilltmp_reg2(),
+                            rs1: stack_reg(),
+                            rs2: step.to_reg(),
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        Inst::Store {
+                            to: AMode::RegOffset(spilltmp_reg2(), 0, I8),
+                            op: StoreOP::Sb,
+                            flags: MemFlags::new(),
+                            src: zero_reg(),
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        // reset step.
+                        Inst::AluRRR {
+                            alu_op: AluOPRRR::Sub,
+                            rd: step,
+                            rs1: step.to_reg(),
+                            rs2: guard_size_tmp.to_reg(),
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        Inst::Jal {
+                            dest: BranchTarget::Label(loop_start),
+                        }
+                        .emit(&[], sink, emit_info, state);
+                        sink.bind_label(label_done, &mut state.ctrl_plane); */
+            }
+            &Inst::VecAluRRRImm5 {
+                op,
+                vd,
+                vd_src,
+                imm,
+                vs2,
+                ref mask,
+                ..
+            } => {
+                todo!() /* let vs2 = allocs.next(vs2);
+                        let vd_src = allocs.next(vd_src);
+                        let vd = allocs.next_writable(vd);
+                        let mask = mask.with_allocs(&mut allocs);
+
+                        debug_assert_eq!(vd.to_reg(), vd_src);
+
+                        sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, mask)); */
+            }
+            &Inst::VecAluRRRR {
+                op,
+                vd,
+                vd_src,
+                vs1,
+                vs2,
+                ref mask,
+                ..
+            } => {
+                todo!() /* let vs1 = allocs.next(vs1);
+                        let vs2 = allocs.next(vs2);
+                        let vd_src = allocs.next(vd_src);
+                        let vd = allocs.next_writable(vd);
+                        let mask = mask.with_allocs(&mut allocs);
+
+                        debug_assert_eq!(vd.to_reg(), vd_src);
+
+                        sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, mask)); */
+            }
+            &Inst::VecAluRRR {
+                op,
+                vd,
+                vs1,
+                vs2,
+                ref mask,
+                ..
+            } => {
+                todo!() /* let vs1 = allocs.next(vs1);
+                        let vs2 = allocs.next(vs2);
+                        let vd = allocs.next_writable(vd);
+                        let mask = mask.with_allocs(&mut allocs);
+
+                        sink.put4(encode_valu(op, vd, vs1, vs2, mask)); */
+            }
+            &Inst::VecAluRRImm5 {
+                op,
+                vd,
+                imm,
+                vs2,
+                ref mask,
+                ..
+            } => {
+                todo!() /* let vs2 = allocs.next(vs2);
+                        let vd = allocs.next_writable(vd);
+                        let mask = mask.with_allocs(&mut allocs);
+
+                        sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, mask)); */
+            }
+            &Inst::VecAluRR {
+                op,
+                vd,
+                vs,
+                ref mask,
+                ..
+            } => {
+                todo!() /* let vs = allocs.next(vs);
+                        let vd = allocs.next_writable(vd);
+                        let mask = mask.with_allocs(&mut allocs);
+
+                        sink.put4(encode_valu_rr(op, vd, vs, mask)); */
+            }
+            &Inst::VecAluRImm5 {
+                op,
+                vd,
+                imm,
+                ref mask,
+                ..
+            } => {
+                todo!() /* let vd = allocs.next_writable(vd);
+                        let mask = mask.with_allocs(&mut allocs);
+
+                        sink.put4(encode_valu_r_imm(op, vd, imm, mask)); */
+            }
+            &Inst::VecSetState { rd, ref vstate } => {
+                todo!() /* let rd = allocs.next_writable(rd);
+
+                        sink.put4(encode_vcfg_imm(
+                            0x57,
+                            rd.to_reg(),
+                            vstate.avl.unwrap_static(),
+                            &vstate.vtype,
+                        ));
+
+                        // Update the current vector emit state.
+                        state.vstate = EmitVState::Known(vstate.clone()); */
+            }
+
+            &Inst::VecLoad {
+                eew,
+                to,
+                ref from,
+                ref mask,
+                flags,
+                ..
+            } => {
+                todo!() /* let from = from.clone().with_allocs(&mut allocs);
+                        let to = allocs.next_writable(to);
+                        let mask = mask.with_allocs(&mut allocs);
+
+                        // Vector Loads don't support immediate offsets, so we need to load it into a register.
+                        let addr = match from {
+                            VecAMode::UnitStride { base } => {
+                                let base_reg = base.get_base_register();
+                                let offset = base.get_offset_with_state(state);
+
+                                // Reg+0 Offset can be directly encoded
+                                if let (Some(base_reg), 0) = (base_reg, offset) {
+                                    base_reg
+                                } else {
+                                    // Otherwise load the address it into a reg and load from it.
+                                    let tmp = writable_spilltmp_reg();
+                                    Inst::LoadAddr {
+                                        rd: tmp,
+                                        mem: base.clone(),
+                                    }
+                                    .emit(&[], sink, emit_info, state);
+                                    tmp.to_reg()
+                                }
+                            }
+                        };
+
+                        let srcloc = state.cur_srcloc();
+                        if !srcloc.is_default() && !flags.notrap() {
+                            // Register the offset at which the actual load instruction starts.
+                            sink.add_trap(TrapCode::HeapOutOfBounds);
+                        }
+
+                        sink.put4(encode_vmem_load(
+                            0x07,
+                            to.to_reg(),
+                            eew,
+                            addr,
+                            from.lumop(),
+                            mask,
+                            from.mop(),
+                            from.nf(),
+                        )); */
+            }
+
+            &Inst::VecStore {
+                eew,
+                ref to,
+                from,
+                ref mask,
+                flags,
+                ..
+            } => {
+                todo!() /* let to = to.clone().with_allocs(&mut allocs);
+                        let from = allocs.next(from);
+                        let mask = mask.with_allocs(&mut allocs);
+
+                        // Vector Stores don't support immediate offsets, so we need to load it into a register.
+                        let addr = match to {
+                            VecAMode::UnitStride { base } => {
+                                let base_reg = base.get_base_register();
+                                let offset = base.get_offset_with_state(state);
+
+                                // Reg+0 Offset can be directly encoded
+                                if let (Some(base_reg), 0) = (base_reg, offset) {
+                                    base_reg
+                                } else {
+                                    // Otherwise load the address it into a reg and load from it.
+                                    let tmp = writable_spilltmp_reg();
+                                    Inst::LoadAddr {
+                                        rd: tmp,
+                                        mem: base.clone(),
+                                    }
+                                    .emit(&[], sink, emit_info, state);
+                                    tmp.to_reg()
+                                }
+                            }
+                        };
+
+                        let srcloc = state.cur_srcloc();
+                        if !srcloc.is_default() && !flags.notrap() {
+                            // Register the offset at which the actual load instruction starts.
+                            sink.add_trap(TrapCode::HeapOutOfBounds);
+                        }
+
+                        sink.put4(encode_vmem_store(
+                            0x27,
+                            from,
+                            eew,
+                            addr,
+                            to.sumop(),
+                            mask,
+                            to.mop(),
+                            to.nf(),
+                        )); */
+            }
+        };
+        let end_off = sink.cur_offset();
+        assert!(
+            (end_off - start_off) <= Inst::worst_case_size(),
+            "Inst:{:?} length:{} worst_case_size:{}",
+            self,
+            end_off - start_off,
+            Inst::worst_case_size()
+        );
+    }
+
+    fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut Self::State) -> String {
+        let mut allocs = AllocationConsumer::new(allocs);
+        self.print_with_state(state, &mut allocs)
+    }
+}
+
+// helper function.
+fn alloc_value_regs(orgin: &ValueRegs<Reg>, alloc: &mut AllocationConsumer) -> ValueRegs<Reg> {
+    match orgin.regs().len() {
+        1 => ValueRegs::one(alloc.next(orgin.regs()[0])),
+        2 => ValueRegs::two(alloc.next(orgin.regs()[0]), alloc.next(orgin.regs()[1])),
+        _ => unreachable!(),
+    }
+}
+
+#[allow(unused)]
+fn emit_return_call_common_sequence(
+    allocs: &mut AllocationConsumer<'_>,
+    sink: &mut MachBuffer<Inst>,
+    emit_info: &EmitInfo,
+    state: &mut EmitState,
+    new_stack_arg_size: u32,
+    old_stack_arg_size: u32,
+    uses: &CallArgList,
+) {
+    todo!()
+    /* for u in uses {
+        let _ = allocs.next(u.vreg);
+    }
+
+    // We are emitting a dynamic number of instructions and might need an
+    // island. We emit four instructions regardless of how many stack arguments
+    // we have, up to two instructions for the actual call, and then two
+    // instructions per word of stack argument space.
+    let new_stack_words = new_stack_arg_size / 8;
+    let insts = 4 + 2 + 2 * new_stack_words;
+    let space_needed = insts * u32::try_from(Inst::INSTRUCTION_SIZE).unwrap();
+    if sink.island_needed(space_needed) {
+        let jump_around_label = sink.get_label();
+        Inst::Jal {
+            dest: BranchTarget::Label(jump_around_label),
+        }
+        .emit(&[], sink, emit_info, state);
+        sink.emit_island(&mut state.ctrl_plane);
+        sink.bind_label(jump_around_label, &mut state.ctrl_plane);
+    }
+
+    // Copy the new frame on top of our current frame.
+    //
+    // The current stack layout is the following:
+    //
+    //            | ...                 |
+    //            +---------------------+
+    //            | ...                 |
+    //            | stack arguments     |
+    //            | ...                 |
+    //    current | return address      |
+    //    frame   | old FP              | <-- FP
+    //            | ...                 |
+    //            | old stack slots     |
+    //            | ...                 |
+    //            +---------------------+
+    //            | ...                 |
+    //    new     | new stack arguments |
+    //    frame   | ...                 | <-- SP
+    //            +---------------------+
+    //
+    // We need to restore the old FP, restore the return address from the stack
+    // to the link register, copy the new stack arguments over the old stack
+    // arguments, adjust SP to point to the new stack arguments, and then jump
+    // to the callee (which will push the old FP and RA again). Note that the
+    // actual jump happens outside this helper function.
+
+    assert_eq!(
+        new_stack_arg_size % 8,
+        0,
+        "size of new stack arguments must be 8-byte aligned"
+    );
+
+    // The delta from our frame pointer to the (eventual) stack pointer value
+    // when we jump to the tail callee. This is the difference in size of stack
+    // arguments as well as accounting for the two words we pushed onto the
+    // stack upon entry to this function (the return address and old frame
+    // pointer).
+    let fp_to_callee_sp = i64::from(old_stack_arg_size) - i64::from(new_stack_arg_size) + 16;
+
+    let tmp1 = regs::writable_spilltmp_reg();
+    let tmp2 = regs::writable_spilltmp_reg2();
+
+    // Restore the return address to the link register, and load the old FP into
+    // a temporary register.
+    //
+    // We can't put the old FP into the FP register until after we copy the
+    // stack arguments into place, since that uses address modes that are
+    // relative to our current FP.
+    //
+    // Note that the FP is saved in the function prologue for all non-leaf
+    // functions, even when `preserve_frame_pointers=false`. Note also that
+    // `return_call` instructions make it so that a function is considered
+    // non-leaf. Therefore we always have an FP to restore here.
+
+    Inst::gen_load(
+        writable_link_reg(),
+        AMode::FPOffset(8, I64),
+        I64,
+        MemFlags::trusted(),
+    )
+    .emit(&[], sink, emit_info, state);
+    Inst::gen_load(tmp1, AMode::FPOffset(0, I64), I64, MemFlags::trusted()).emit(
+        &[],
+        sink,
+        emit_info,
+        state,
+    );
+
+    // Copy the new stack arguments over the old stack arguments.
+    for i in (0..new_stack_words).rev() {
+        // Load the `i`th new stack argument word from the temporary stack
+        // space.
+        Inst::gen_load(
+            tmp2,
+            AMode::SPOffset(i64::from(i * 8), types::I64),
+            types::I64,
+            ir::MemFlags::trusted(),
+        )
+        .emit(&[], sink, emit_info, state);
+
+        // Store it to its final destination on the stack, overwriting our
+        // current frame.
+        Inst::gen_store(
+            AMode::FPOffset(fp_to_callee_sp + i64::from(i * 8), types::I64),
+            tmp2.to_reg(),
+            types::I64,
+            ir::MemFlags::trusted(),
+        )
+        .emit(&[], sink, emit_info, state);
+    }
+
+    // Initialize the SP for the tail callee, deallocating the temporary stack
+    // argument space and our current frame at the same time.
+    Inst::AluRRImm12 {
+        alu_op: AluOPRRI::Addi,
+        rd: regs::writable_stack_reg(),
+        rs: regs::fp_reg(),
+        imm12: Imm12::maybe_from_u64(fp_to_callee_sp as u64).unwrap(),
+    }
+    .emit(&[], sink, emit_info, state);
+
+    // Move the old FP value from the temporary into the FP register.
+    Inst::Mov {
+        ty: types::I64,
+        rd: regs::writable_fp_reg(),
+        rm: tmp1.to_reg(),
+    }
+    .emit(&[], sink, emit_info, state);
+
+    state.virtual_sp_offset -= i64::from(new_stack_arg_size);
+    trace!(
+        "return_call[_ind] adjusts virtual sp offset by {} -> {}",
+        new_stack_arg_size,
+        state.virtual_sp_offset
+    ); */
+}
diff --git a/cranelift/codegen/src/isa/zkasm/inst/emit_tests.rs b/cranelift/codegen/src/isa/zkasm/inst/emit_tests.rs
new file mode 100644
index 000000000000..41e8ea6f8a6f
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/inst/emit_tests.rs
@@ -0,0 +1,2338 @@
+#[allow(unused)]
+use crate::ir::LibCall;
+use crate::isa::zkasm::inst::*;
+use crate::settings;
+use alloc::vec::Vec;
+use std::borrow::Cow;
+
+#[test]
+fn test_zkasm_binemit() {
+    struct TestUnit {
+        inst: Inst,
+        assembly: &'static str,
+        code: TestEncoding,
+    }
+
+    struct TestEncoding(Cow<'static, str>);
+
+    impl From<&'static str> for TestEncoding {
+        fn from(value: &'static str) -> Self {
+            Self(value.into())
+        }
+    }
+
+    impl From<u32> for TestEncoding {
+        fn from(value: u32) -> Self {
+            let value = value.swap_bytes();
+            let value = format!("{value:08X}");
+            Self(value.into())
+        }
+    }
+
+    impl TestUnit {
+        fn new(inst: Inst, assembly: &'static str, code: impl Into<TestEncoding>) -> Self {
+            let code = code.into();
+            Self {
+                inst,
+                assembly,
+                code,
+            }
+        }
+    }
+
+    let mut insns = Vec::<TestUnit>::with_capacity(500);
+
+    insns.push(TestUnit::new(
+        Inst::Ret {
+            rets: vec![],
+            stack_bytes_to_pop: 0,
+        },
+        "ret",
+        0x00008067,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Ret {
+            rets: vec![],
+            stack_bytes_to_pop: 16,
+        },
+        "add sp, sp, #16 ; ret",
+        "1301010167800000",
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Mov {
+            rd: writable_fa0(),
+            rm: fa1(),
+            ty: F32,
+        },
+        "fmv.s fa0,fa1",
+        0x20b58553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Mov {
+            rd: writable_fa0(),
+            rm: fa1(),
+            ty: F64,
+        },
+        "fmv.d fa0,fa1",
+        0x22b58553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Brev8,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::zero(),
+        },
+        "brev8 a1,a0",
+        0x68755593,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Rev8,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::zero(),
+        },
+        "rev8 a1,a0",
+        0x6b855593,
+    ));
+
+    //
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Bclri,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::from_bits(5),
+        },
+        "bclri a1,a0,5",
+        0x48551593,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Bexti,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::from_bits(5),
+        },
+        "bexti a1,a0,5",
+        0x48555593,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Binvi,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::from_bits(5),
+        },
+        "binvi a1,a0,5",
+        0x68551593,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Bseti,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::from_bits(5),
+        },
+        "bseti a1,a0,5",
+        0x28551593,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Rori,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::from_bits(5),
+        },
+        "rori a1,a0,5",
+        0x60555593,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Roriw,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::from_bits(5),
+        },
+        "roriw a1,a0,5",
+        0x6055559b,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::SlliUw,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::from_bits(5),
+        },
+        "slli.uw a1,a0,5",
+        0x855159b,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Clz,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::zero(),
+        },
+        "clz a1,a0",
+        0x60051593,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Clzw,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::zero(),
+        },
+        "clzw a1,a0",
+        0x6005159b,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Cpop,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::zero(),
+        },
+        "cpop a1,a0",
+        0x60251593,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Cpopw,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::zero(),
+        },
+        "cpopw a1,a0",
+        0x6025159b,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Ctz,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::zero(),
+        },
+        "ctz a1,a0",
+        0x60151593,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Ctzw,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::zero(),
+        },
+        "ctzw a1,a0",
+        0x6015159b,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Sextb,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::zero(),
+        },
+        "sext.b a1,a0",
+        0x60451593,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Sexth,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::zero(),
+        },
+        "sext.h a1,a0",
+        0x60551593,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Zexth,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::zero(),
+        },
+        "zext.h a1,a0",
+        0x80545bb,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Orcb,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::zero(),
+        },
+        "orc.b a1,a0",
+        0x28755593,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Adduw,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "zext.w a1,a0",
+        0x80505bb,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Adduw,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "add.uw a1,a0,a1",
+        0x08b505bb,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Andn,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "andn a1,a0,zero",
+        0x400575b3,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Bclr,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "bclr a1,a0,zero",
+        0x480515b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Bext,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "bext a1,a0,zero",
+        0x480555b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Binv,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "binv a1,a0,zero",
+        0x680515b3,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Bset,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "bset a1,a0,zero",
+        0x280515b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Clmul,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "clmul a1,a0,zero",
+        0xa0515b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Clmulh,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "clmulh a1,a0,zero",
+        0xa0535b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Clmulr,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "clmulr a1,a0,zero",
+        0xa0525b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Max,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "max a1,a0,zero",
+        0xa0565b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Maxu,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "maxu a1,a0,zero",
+        0xa0575b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Min,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "min a1,a0,zero",
+        0xa0545b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Minu,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "minu a1,a0,zero",
+        0xa0555b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Orn,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "orn a1,a0,zero",
+        0x400565b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Rol,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "rol a1,a0,zero",
+        0x600515b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Rolw,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "rolw a1,a0,zero",
+        0x600515bb,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Ror,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "ror a1,a0,zero",
+        0x600555b3,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Rorw,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "rorw a1,a0,zero",
+        0x600555bb,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sh1add,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "sh1add a1,a0,zero",
+        0x200525b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sh1adduw,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "sh1add.uw a1,a0,zero",
+        0x200525bb,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sh2add,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "sh2add a1,a0,zero",
+        0x200545b3,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sh2adduw,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "sh2add.uw a1,a0,zero",
+        0x200545bb,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sh3add,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "sh3add a1,a0,zero",
+        0x200565b3,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sh3adduw,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "sh3add.uw a1,a0,zero",
+        0x200565bb,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Xnor,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "xnor a1,a0,zero",
+        0x400545b3,
+    ));
+
+    // Zbkb
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Pack,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "pack a1,a0,zero",
+        0x080545b3,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Packw,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "packw a1,a0,zero",
+        0x080545bb,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Packh,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "packh a1,a0,zero",
+        0x080575b3,
+    ));
+
+    //
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Add,
+            rd: writable_fp_reg(),
+            rs1: fp_reg(),
+            rs2: zero_reg(),
+        },
+        "add fp,fp,zero",
+        0x40433,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Addi,
+            rd: writable_fp_reg(),
+            rs: stack_reg(),
+            imm12: Imm12::maybe_from_u64(100).unwrap(),
+        },
+        "addi fp,sp,100",
+        0x6410413,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Lui {
+            rd: writable_zero_reg(),
+            imm: Imm20::from_bits(120),
+        },
+        "lui zero,120",
+        0x78037,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Auipc {
+            rd: writable_zero_reg(),
+            imm: Imm20::from_bits(120),
+        },
+        "auipc zero,120",
+        0x78017,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Jalr {
+            rd: writable_a0(),
+            base: a0(),
+            offset: Imm12::from_bits(100),
+        },
+        "jalr a0,100(a0)",
+        0x6450567,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Load {
+            rd: writable_a0(),
+            op: LoadOP::Lb,
+            flags: MemFlags::new(),
+            from: AMode::RegOffset(a1(), 100, I8),
+        },
+        "lb a0,100(a1)",
+        0x6458503,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Load {
+            rd: writable_a0(),
+            op: LoadOP::Lh,
+            flags: MemFlags::new(),
+            from: AMode::RegOffset(a1(), 100, I16),
+        },
+        "lh a0,100(a1)",
+        0x6459503,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Load {
+            rd: writable_a0(),
+            op: LoadOP::Lw,
+            flags: MemFlags::new(),
+            from: AMode::RegOffset(a1(), 100, I32),
+        },
+        "lw a0,100(a1)",
+        0x645a503,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Load {
+            rd: writable_a0(),
+            op: LoadOP::Ld,
+            flags: MemFlags::new(),
+            from: AMode::RegOffset(a1(), 100, I64),
+        },
+        "ld a0,100(a1)",
+        0x645b503,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Load {
+            rd: Writable::from_reg(fa0()),
+            op: LoadOP::Flw,
+            flags: MemFlags::new(),
+            from: AMode::RegOffset(a1(), 100, I64),
+        },
+        "flw fa0,100(a1)",
+        0x645a507,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Load {
+            rd: Writable::from_reg(fa0()),
+            op: LoadOP::Fld,
+            flags: MemFlags::new(),
+            from: AMode::RegOffset(a1(), 100, I64),
+        },
+        "fld fa0,100(a1)",
+        0x645b507,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Store {
+            to: AMode::SPOffset(100, I8),
+            op: StoreOP::Sb,
+            flags: MemFlags::new(),
+            src: a0(),
+        },
+        "sb a0,100(sp)",
+        0x6a10223,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Store {
+            to: AMode::SPOffset(100, I16),
+            op: StoreOP::Sh,
+            flags: MemFlags::new(),
+            src: a0(),
+        },
+        "sh a0,100(sp)",
+        0x6a11223,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Store {
+            to: AMode::SPOffset(100, I32),
+            op: StoreOP::Sw,
+            flags: MemFlags::new(),
+            src: a0(),
+        },
+        "sw a0,100(sp)",
+        0x6a12223,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Store {
+            to: AMode::SPOffset(100, I64),
+            op: StoreOP::Sd,
+            flags: MemFlags::new(),
+            src: a0(),
+        },
+        "sd a0,100(sp)",
+        0x6a13223,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Store {
+            to: AMode::SPOffset(100, I64),
+            op: StoreOP::Fsw,
+            flags: MemFlags::new(),
+            src: fa0(),
+        },
+        "fsw fa0,100(sp)",
+        0x6a12227,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Store {
+            to: AMode::SPOffset(100, I64),
+            op: StoreOP::Fsd,
+            flags: MemFlags::new(),
+            src: fa0(),
+        },
+        "fsd fa0,100(sp)",
+        0x6a13227,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Addi,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_bits(100),
+        },
+        "addi a0,a0,100",
+        0x6450513,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Slti,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_bits(100),
+        },
+        "slti a0,a0,100",
+        0x6452513,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::SltiU,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_bits(100),
+        },
+        "sltiu a0,a0,100",
+        0x6453513,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Xori,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_bits(100),
+        },
+        "xori a0,a0,100",
+        0x6454513,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Andi,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_bits(100),
+        },
+        "andi a0,a0,100",
+        0x6457513,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Slli,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_bits(5),
+        },
+        "slli a0,a0,5",
+        0x551513,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Srli,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_bits(5),
+        },
+        "srli a0,a0,5",
+        0x555513,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Srai,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_bits(5),
+        },
+        "srai a0,a0,5",
+        0x40555513,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Addiw,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_bits(120),
+        },
+        "addiw a0,a0,120",
+        0x785051b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Slliw,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_bits(5),
+        },
+        "slliw a0,a0,5",
+        0x55151b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::SrliW,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_bits(5),
+        },
+        "srliw a0,a0,5",
+        0x55551b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Sraiw,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_bits(5),
+        },
+        "sraiw a0,a0,5",
+        0x4055551b,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Sraiw,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_bits(5),
+        },
+        "sraiw a0,a0,5",
+        0x4055551b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Add,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "add a0,a0,a1",
+        0xb50533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sub,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "sub a0,a0,a1",
+        0x40b50533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sll,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "sll a0,a0,a1",
+        0xb51533,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Slt,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "slt a0,a0,a1",
+        0xb52533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::SltU,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "sltu a0,a0,a1",
+        0xb53533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Xor,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "xor a0,a0,a1",
+        0xb54533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Srl,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "srl a0,a0,a1",
+        0xb55533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sra,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "sra a0,a0,a1",
+        0x40b55533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Or,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "or a0,a0,a1",
+        0xb56533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::And,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "and a0,a0,a1",
+        0xb57533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Addw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "addw a0,a0,a1",
+        0xb5053b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Subw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "subw a0,a0,a1",
+        0x40b5053b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sllw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "sllw a0,a0,a1",
+        0xb5153b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Srlw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "srlw a0,a0,a1",
+        0xb5553b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sraw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "sraw a0,a0,a1",
+        0x40b5553b,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Mul,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "mul a0,a0,a1",
+        0x2b50533,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Mulh,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "mulh a0,a0,a1",
+        0x2b51533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Mulhsu,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "mulhsu a0,a0,a1",
+        0x2b52533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Mulhu,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "mulhu a0,a0,a1",
+        0x2b53533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Div,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "div a0,a0,a1",
+        0x2b54533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::DivU,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "divu a0,a0,a1",
+        0x2b55533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Rem,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "rem a0,a0,a1",
+        0x2b56533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::RemU,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "remu a0,a0,a1",
+        0x2b57533,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Mulw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "mulw a0,a0,a1",
+        0x2b5053b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Divw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "divw a0,a0,a1",
+        0x2b5453b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Remw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "remw a0,a0,a1",
+        0x2b5653b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Remuw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "remuw a0,a0,a1",
+        0x2b5753b,
+    ));
+
+    //
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: Some(FRM::RNE),
+            alu_op: FpuOPRRR::FaddS,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fadd.s fa0,fa0,fa1,rne",
+        0xb50553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: Some(FRM::RTZ),
+            alu_op: FpuOPRRR::FsubS,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fsub.s fa0,fa0,fa1,rtz",
+        0x8b51553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: Some(FRM::RUP),
+            alu_op: FpuOPRRR::FmulS,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fmul.s fa0,fa0,fa1,rup",
+        0x10b53553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FdivS,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fdiv.s fa0,fa0,fa1",
+        0x18b57553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FsgnjS,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fsgnj.s fa0,fa0,fa1",
+        0x20b50553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FsgnjnS,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fsgnjn.s fa0,fa0,fa1",
+        0x20b51553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FsgnjxS,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fsgnjx.s fa0,fa0,fa1",
+        0x20b52553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FminS,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fmin.s fa0,fa0,fa1",
+        0x28b50553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FmaxS,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fmax.s fa0,fa0,fa1",
+        0x28b51553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FeqS,
+            rd: writable_a0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "feq.s a0,fa0,fa1",
+        0xa0b52553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FltS,
+            rd: writable_a0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "flt.s a0,fa0,fa1",
+        0xa0b51553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FleS,
+            rd: writable_a0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fle.s a0,fa0,fa1",
+        0xa0b50553,
+    ));
+
+    //
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FaddD,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fadd.d fa0,fa0,fa1",
+        0x2b57553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FsubD,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fsub.d fa0,fa0,fa1",
+        0xab57553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FmulD,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fmul.d fa0,fa0,fa1",
+        0x12b57553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FdivD,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fdiv.d fa0,fa0,fa1",
+        0x1ab57553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FsgnjD,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fsgnj.d fa0,fa0,fa1",
+        0x22b50553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FsgnjnD,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fsgnjn.d fa0,fa0,fa1",
+        0x22b51553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FsgnjxD,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fsgnjx.d fa0,fa0,fa1",
+        0x22b52553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FminD,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fmin.d fa0,fa0,fa1",
+        0x2ab50553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FmaxD,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fmax.d fa0,fa0,fa1",
+        0x2ab51553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FeqD,
+            rd: writable_a0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "feq.d a0,fa0,fa1",
+        0xa2b52553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FltD,
+            rd: writable_a0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "flt.d a0,fa0,fa1",
+        0xa2b51553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: None,
+            alu_op: FpuOPRRR::FleD,
+            rd: writable_a0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fle.d a0,fa0,fa1",
+        0xa2b50553,
+    ));
+
+    //
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: Some(FRM::RNE),
+            alu_op: FpuOPRR::FsqrtS,
+            rd: writable_fa0(),
+            rs: fa1(),
+        },
+        "fsqrt.s fa0,fa1,rne",
+        0x58058553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FcvtWS,
+            rd: writable_a0(),
+            rs: fa1(),
+        },
+        "fcvt.w.s a0,fa1",
+        0xc005f553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FcvtWuS,
+            rd: writable_a0(),
+            rs: fa1(),
+        },
+        "fcvt.wu.s a0,fa1",
+        0xc015f553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FmvXW,
+            rd: writable_a0(),
+            rs: fa1(),
+        },
+        "fmv.x.w a0,fa1",
+        0xe0058553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FclassS,
+            rd: writable_a0(),
+            rs: fa1(),
+        },
+        "fclass.s a0,fa1",
+        0xe0059553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FcvtSw,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fcvt.s.w fa0,a0",
+        0xd0057553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FcvtSwU,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fcvt.s.wu fa0,a0",
+        0xd0157553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FmvWX,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fmv.w.x fa0,a0",
+        0xf0050553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FcvtLS,
+            rd: writable_a0(),
+            rs: fa0(),
+        },
+        "fcvt.l.s a0,fa0",
+        0xc0257553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FcvtLuS,
+            rd: writable_a0(),
+            rs: fa0(),
+        },
+        "fcvt.lu.s a0,fa0",
+        0xc0357553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+
+            alu_op: FpuOPRR::FcvtSL,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fcvt.s.l fa0,a0",
+        0xd0257553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FcvtSLU,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fcvt.s.lu fa0,a0",
+        0xd0357553,
+    ));
+
+    //
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FsqrtD,
+            rd: writable_fa0(),
+            rs: fa1(),
+        },
+        "fsqrt.d fa0,fa1",
+        0x5a05f553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FcvtWD,
+            rd: writable_a0(),
+            rs: fa1(),
+        },
+        "fcvt.w.d a0,fa1",
+        0xc205f553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FcvtWuD,
+            rd: writable_a0(),
+            rs: fa1(),
+        },
+        "fcvt.wu.d a0,fa1",
+        0xc215f553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FmvXD,
+            rd: writable_a0(),
+            rs: fa1(),
+        },
+        "fmv.x.d a0,fa1",
+        0xe2058553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FclassD,
+            rd: writable_a0(),
+            rs: fa1(),
+        },
+        "fclass.d a0,fa1",
+        0xe2059553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FcvtSD,
+            rd: writable_fa0(),
+            rs: fa0(),
+        },
+        "fcvt.s.d fa0,fa0",
+        0x40157553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FcvtDWU,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fcvt.d.wu fa0,a0",
+        0xd2150553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FmvDX,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fmv.d.x fa0,a0",
+        0xf2050553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FcvtLD,
+            rd: writable_a0(),
+            rs: fa0(),
+        },
+        "fcvt.l.d a0,fa0",
+        0xc2257553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FcvtLuD,
+            rd: writable_a0(),
+            rs: fa0(),
+        },
+        "fcvt.lu.d a0,fa0",
+        0xc2357553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FcvtDL,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fcvt.d.l fa0,a0",
+        0xd2257553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::FcvtDLu,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fcvt.d.lu fa0,a0",
+        0xd2357553,
+    ));
+    //////////////////////
+
+    insns.push(TestUnit::new(
+        Inst::FpuRRRR {
+            frm: Some(FRM::RNE),
+            alu_op: FpuOPRRRR::FmaddS,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+            rs3: fa7(),
+        },
+        "fmadd.s fa0,fa0,fa1,fa7,rne",
+        0x88b50543,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRRR {
+            frm: None,
+            alu_op: FpuOPRRRR::FmsubS,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+            rs3: fa7(),
+        },
+        "fmsub.s fa0,fa0,fa1,fa7",
+        0x88b57547,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRRR {
+            frm: None,
+            alu_op: FpuOPRRRR::FnmsubS,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+            rs3: fa7(),
+        },
+        "fnmsub.s fa0,fa0,fa1,fa7",
+        0x88b5754b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRRR {
+            frm: None,
+            alu_op: FpuOPRRRR::FnmaddS,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+            rs3: fa7(),
+        },
+        "fnmadd.s fa0,fa0,fa1,fa7",
+        0x88b5754f,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRRRR {
+            frm: None,
+            alu_op: FpuOPRRRR::FmaddD,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+            rs3: fa7(),
+        },
+        "fmadd.d fa0,fa0,fa1,fa7",
+        0x8ab57543,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRRR {
+            frm: None,
+
+            alu_op: FpuOPRRRR::FmsubD,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+            rs3: fa7(),
+        },
+        "fmsub.d fa0,fa0,fa1,fa7",
+        0x8ab57547,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRRR {
+            frm: None,
+            alu_op: FpuOPRRRR::FnmsubD,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+            rs3: fa7(),
+        },
+        "fnmsub.d fa0,fa0,fa1,fa7",
+        0x8ab5754b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRRR {
+            frm: None,
+            alu_op: FpuOPRRRR::FnmaddD,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+            rs3: fa7(),
+        },
+        "fnmadd.d fa0,fa0,fa1,fa7",
+        0x8ab5754f,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::LrW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: zero_reg(),
+            amo: AMO::Relax,
+        },
+        "lr.w a0,(a1)",
+        0x1005a52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::ScW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Release,
+        },
+        "sc.w.rl a0,a2,(a1)",
+        0x1ac5a52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoswapW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Aquire,
+        },
+        "amoswap.w.aq a0,a2,(a1)",
+        0xcc5a52f,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoaddW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::SeqCst,
+        },
+        "amoadd.w.aqrl a0,a2,(a1)",
+        0x6c5a52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoxorW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amoxor.w a0,a2,(a1)",
+        0x20c5a52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoandW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amoand.w a0,a2,(a1)",
+        0x60c5a52f,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoorW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amoor.w a0,a2,(a1)",
+        0x40c5a52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmominW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amomin.w a0,a2,(a1)",
+        0x80c5a52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmomaxW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amomax.w a0,a2,(a1)",
+        0xa0c5a52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmominuW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amominu.w a0,a2,(a1)",
+        0xc0c5a52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmomaxuW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amomaxu.w a0,a2,(a1)",
+        0xe0c5a52f,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::LrD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: zero_reg(),
+            amo: AMO::Relax,
+        },
+        "lr.d a0,(a1)",
+        0x1005b52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::ScD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "sc.d a0,a2,(a1)",
+        0x18c5b52f,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoswapD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amoswap.d a0,a2,(a1)",
+        0x8c5b52f,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoaddD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amoadd.d a0,a2,(a1)",
+        0xc5b52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoxorD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amoxor.d a0,a2,(a1)",
+        0x20c5b52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoandD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amoand.d a0,a2,(a1)",
+        0x60c5b52f,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoorD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amoor.d a0,a2,(a1)",
+        0x40c5b52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmominD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amomin.d a0,a2,(a1)",
+        0x80c5b52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmomaxD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amomax.d a0,a2,(a1)",
+        0xa0c5b52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmominuD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amominu.d a0,a2,(a1)",
+        0xc0c5b52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmomaxuD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amomaxu.d a0,a2,(a1)",
+        0xe0c5b52f,
+    ));
+
+    /////////
+    insns.push(TestUnit::new(
+        Inst::Fence {
+            pred: 1,
+            succ: 1 << 1,
+        },
+        "fence w,r",
+        0x120000f,
+    ));
+    insns.push(TestUnit::new(Inst::FenceI {}, "fence.i", 0x100f));
+    insns.push(TestUnit::new(Inst::ECall {}, "ecall", 0x73));
+    insns.push(TestUnit::new(Inst::EBreak {}, "ebreak", 0x100073));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            alu_op: FpuOPRRR::FsgnjS,
+            frm: None,
+            rd: writable_fa0(),
+            rs1: fa1(),
+            rs2: fa1(),
+        },
+        "fmv.s fa0,fa1",
+        0x20b58553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            alu_op: FpuOPRRR::FsgnjD,
+            frm: None,
+            rd: writable_fa0(),
+            rs1: fa1(),
+            rs2: fa1(),
+        },
+        "fmv.d fa0,fa1",
+        0x22b58553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            alu_op: FpuOPRRR::FsgnjnS,
+            frm: None,
+            rd: writable_fa0(),
+            rs1: fa1(),
+            rs2: fa1(),
+        },
+        "fneg.s fa0,fa1",
+        0x20b59553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            alu_op: FpuOPRRR::FsgnjnD,
+            frm: None,
+            rd: writable_fa0(),
+            rs1: fa1(),
+            rs2: fa1(),
+        },
+        "fneg.d fa0,fa1",
+        0x22b59553,
+    ));
+
+    let (flags, isa_flags) = make_test_flags();
+    let emit_info = EmitInfo::new(flags, isa_flags);
+
+    for unit in insns.iter() {
+        println!("Riscv64: {:?}, {}", unit.inst, unit.assembly);
+        // Check the printed text is as expected.
+        let actual_printing = unit
+            .inst
+            .print_with_state(&mut EmitState::default(), &mut AllocationConsumer::new(&[]));
+        assert_eq!(unit.assembly, actual_printing);
+        let mut buffer = MachBuffer::new();
+        unit.inst
+            .emit(&[], &mut buffer, &emit_info, &mut Default::default());
+        let buffer = buffer.finish(&Default::default(), &mut Default::default());
+        let actual_encoding = buffer.stringify_code_bytes();
+
+        assert_eq!(actual_encoding, unit.code.0);
+    }
+}
+
+fn make_test_flags() -> (settings::Flags, super::super::riscv_settings::Flags) {
+    let b = settings::builder();
+    let flags = settings::Flags::new(b.clone());
+    let b2 = super::super::riscv_settings::builder();
+    let isa_flags = super::super::riscv_settings::Flags::new(&flags, &b2);
+    (flags, isa_flags)
+}
+
+#[derive(Debug)]
+pub(crate) struct DebugRTypeInst {
+    op_code: u32,
+    rd: u32,
+    funct3: u32,
+    rs1: u32,
+    rs2: u32,
+    funct7: u32,
+}
+
+impl DebugRTypeInst {
+    pub(crate) fn from_bs(x: &[u8]) -> Option<Self> {
+        if x.len() != 4 {
+            return None;
+        }
+        let a = [x[0], x[1], x[2], x[3]];
+        Some(Self::from_u32(u32::from_le_bytes(a)))
+    }
+
+    pub(crate) fn from_u32(x: u32) -> Self {
+        let op_code = x & 0b111_1111;
+        let x = x >> 7;
+        let rd = x & 0b1_1111;
+        let x = x >> 5;
+        let funct3 = x & 0b111;
+        let x = x >> 3;
+        let rs1 = x & 0b1_1111;
+        let x = x >> 5;
+        let rs2 = x & 0b1_1111;
+        let x = x >> 5;
+        let funct7 = x & 0b111_1111;
+        Self {
+            op_code,
+            rd,
+            funct3,
+            rs1,
+            rs2,
+            funct7,
+        }
+    }
+}
+
+#[derive(Debug)]
+pub(crate) struct DebugITypeInst {
+    op_code: u32,
+    rd: u32,
+    funct3: u32,
+    rs: u32,
+    imm12: u32,
+    shamt5: u32,
+    shamt6: u32,
+    funct7: u32,
+    funct6: u32,
+}
+
+impl DebugITypeInst {
+    pub(crate) fn from_bs(x: &[u8]) -> Self {
+        let a = [x[0], x[1], x[2], x[3]];
+        Self::from_u32(u32::from_le_bytes(a))
+    }
+    pub(crate) fn from_u32(x: u32) -> Self {
+        let op_code = x & 0b111_1111;
+        let x = x >> 7;
+        let rd = x & 0b1_1111;
+        let x = x >> 5;
+        let funct3 = x & 0b111;
+        let x = x >> 3;
+        let rs = x & 0b1_1111;
+        let x = x >> 5;
+        let imm12 = x & 0b1111_1111_1111;
+        let shamt5 = imm12 & 0b1_1111;
+        let shamt6 = imm12 & 0b11_1111;
+        let funct7 = imm12 >> 5;
+        let funct6 = funct7 >> 1;
+        Self {
+            op_code,
+            rd,
+            funct3,
+            rs,
+            imm12,
+            shamt5,
+            shamt6,
+            funct7,
+            funct6,
+        }
+    }
+    fn print_b(self) {
+        println!("opcode:{:b}", self.op_code);
+        println!("rd:{}", self.rd);
+        println!("funct3:{:b}", self.funct3);
+        println!("rs:{}", self.rs);
+        println!("shamt5:{:b}", self.shamt5);
+        println!("shamt6:{:b}", self.shamt6);
+        println!("funct6:{:b}", self.funct6);
+        println!("funct7:{:b}", self.funct7);
+    }
+}
+
+#[test]
+fn xxx() {
+    let x = 1240847763;
+    let x = DebugITypeInst::from_u32(x);
+    x.print_b();
+}
+
+#[test]
+fn zkasm_worst_case_instruction_size() {
+    let (flags, isa_flags) = make_test_flags();
+    let emit_info = EmitInfo::new(flags, isa_flags);
+
+    //there are all candidates potential generate a lot of bytes.
+    let mut candidates: Vec<MInst> = vec![];
+
+    candidates.push(Inst::IntSelect {
+        dst: vec![writable_a0(), writable_a0()],
+        ty: I128,
+        op: IntSelectOP::Smax,
+        x: ValueRegs::two(x_reg(1), x_reg(2)),
+        y: ValueRegs::two(x_reg(3), x_reg(4)),
+    });
+
+    candidates.push(Inst::FcvtToInt {
+        rd: writable_a0(),
+        rs: fa0(),
+        is_signed: true,
+        in_type: F64,
+        out_type: I8,
+        is_sat: false,
+        tmp: writable_a1(),
+    });
+    candidates.push(Inst::FcvtToInt {
+        rd: writable_a0(),
+        rs: fa0(),
+        is_signed: true,
+        in_type: F64,
+        out_type: I16,
+        is_sat: false,
+        tmp: writable_a1(),
+    });
+    candidates.push(Inst::FcvtToInt {
+        rd: writable_a0(),
+        rs: fa0(),
+        is_signed: true,
+        in_type: F32,
+        out_type: I8,
+        is_sat: false,
+        tmp: writable_a1(),
+    });
+    candidates.push(Inst::FcvtToInt {
+        rd: writable_a0(),
+        rs: fa0(),
+        is_signed: true,
+        in_type: F32,
+        out_type: I16,
+        is_sat: false,
+        tmp: writable_a1(),
+    });
+    candidates.push(Inst::FcvtToInt {
+        rd: writable_a0(),
+        rs: fa0(),
+        is_signed: true,
+        in_type: F64,
+        out_type: I8,
+        is_sat: false,
+        tmp: writable_a1(),
+    });
+    candidates.push(Inst::FcvtToInt {
+        rd: writable_a0(),
+        rs: fa0(),
+        is_signed: true,
+        in_type: F64,
+        out_type: I16,
+        is_sat: false,
+        tmp: writable_a1(),
+    });
+
+    candidates.push(Inst::FloatRound {
+        op: FloatRoundOP::Trunc,
+        int_tmp: writable_a0(),
+        f_tmp: writable_a0(),
+        rd: writable_fa0(),
+        rs: fa0(),
+        ty: F64,
+    });
+
+    candidates.push(Inst::FloatSelect {
+        op: FloatSelectOP::Max,
+        rd: writable_fa0(),
+        tmp: writable_a0(),
+        rs1: fa0(),
+        rs2: fa0(),
+        ty: F64,
+    });
+
+    let mut max: (u32, MInst) = (0, Inst::Nop0);
+    for i in candidates {
+        let mut buffer = MachBuffer::new();
+        i.emit(&[], &mut buffer, &emit_info, &mut Default::default());
+        let buffer = buffer.finish(&Default::default(), &mut Default::default());
+        let length = buffer.data().len() as u32;
+        if length > max.0 {
+            let length = buffer.data().len() as u32;
+            max = (length, i.clone());
+        }
+        println!("insn:{:?}  length: {}", i, length);
+    }
+    println!("calculate max size is {} , inst is {:?}", max.0, max.1);
+    assert!(max.0 <= Inst::worst_case_size());
+}
diff --git a/cranelift/codegen/src/isa/zkasm/inst/encode.rs b/cranelift/codegen/src/isa/zkasm/inst/encode.rs
new file mode 100644
index 000000000000..69d18d9bae77
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/inst/encode.rs
@@ -0,0 +1,299 @@
+//! Contains the RISC-V instruction encoding logic.
+//!
+//! These formats are specified in the RISC-V specification in section 2.2.
+//! See: https://riscv.org/wp-content/uploads/2017/05/riscv-spec-v2.2.pdf
+//!
+//! Some instructions especially in extensions have slight variations from
+//! the base RISC-V specification.
+
+use super::{Imm12, Imm5, UImm5, VType};
+use crate::isa::zkasm::inst::reg_to_gpr_num;
+use crate::isa::zkasm::lower::isle::generated_code::{
+    VecAluOpRImm5, VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, VecAluOpRRRImm5, VecAluOpRRRR,
+    VecElementWidth, VecOpCategory, VecOpMasking,
+};
+use crate::machinst::isle::WritableReg;
+use crate::Reg;
+
+fn unsigned_field_width(value: u32, width: u8) -> u32 {
+    debug_assert_eq!(value & (!0 << width), 0);
+    value
+}
+
+/// Layout:
+/// 0-------6-7-------11-12------14-15------19-20------24-25-------31
+/// | Opcode |   rd     |  funct3  |   rs1    |   rs2    |   funct7  |
+fn encode_r_type_bits(opcode: u32, rd: u32, funct3: u32, rs1: u32, rs2: u32, funct7: u32) -> u32 {
+    let mut bits = 0;
+    bits |= unsigned_field_width(opcode, 7);
+    bits |= unsigned_field_width(rd, 5) << 7;
+    bits |= unsigned_field_width(funct3, 3) << 12;
+    bits |= unsigned_field_width(rs1, 5) << 15;
+    bits |= unsigned_field_width(rs2, 5) << 20;
+    bits |= unsigned_field_width(funct7, 7) << 25;
+    bits
+}
+
+/// Encode an R-type instruction.
+pub fn encode_r_type(
+    opcode: u32,
+    rd: WritableReg,
+    funct3: u32,
+    rs1: Reg,
+    rs2: Reg,
+    funct7: u32,
+) -> u32 {
+    encode_r_type_bits(
+        opcode,
+        reg_to_gpr_num(rd.to_reg()),
+        funct3,
+        reg_to_gpr_num(rs1),
+        reg_to_gpr_num(rs2),
+        funct7,
+    )
+}
+
+/// Encode an I-type instruction.
+///
+/// Layout:
+/// 0-------6-7-------11-12------14-15------19-20------------------31
+/// | Opcode |   rd     |  width   |   rs1    |     Offset[11:0]    |
+pub fn encode_i_type(opcode: u32, rd: WritableReg, width: u32, rs1: Reg, offset: Imm12) -> u32 {
+    let mut bits = 0;
+    bits |= unsigned_field_width(opcode, 7);
+    bits |= reg_to_gpr_num(rd.to_reg()) << 7;
+    bits |= unsigned_field_width(width, 3) << 12;
+    bits |= reg_to_gpr_num(rs1) << 15;
+    bits |= unsigned_field_width(offset.as_u32(), 12) << 20;
+    bits
+}
+
+/// Encode an S-type instruction.
+///
+/// Layout:
+/// 0-------6-7-------11-12------14-15------19-20---24-25-------------31
+/// | Opcode | imm[4:0] |  width   |   base   |  src  |    imm[11:5]   |
+pub fn encode_s_type(opcode: u32, width: u32, base: Reg, src: Reg, offset: Imm12) -> u32 {
+    let mut bits = 0;
+    bits |= unsigned_field_width(opcode, 7);
+    bits |= (offset.as_u32() & 0b11111) << 7;
+    bits |= unsigned_field_width(width, 3) << 12;
+    bits |= reg_to_gpr_num(base) << 15;
+    bits |= reg_to_gpr_num(src) << 20;
+    bits |= unsigned_field_width(offset.as_u32() >> 5, 7) << 25;
+    bits
+}
+
+/// Encodes a Vector ALU instruction.
+///
+/// Fields:
+/// - opcode (7 bits)
+/// - vd     (5 bits)
+/// - funct3 (3 bits)
+/// - vs1    (5 bits)
+/// - vs2    (5 bits)
+/// - vm     (1 bit)
+/// - funct6 (6 bits)
+///
+/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc
+pub fn encode_valu(
+    op: VecAluOpRRR,
+    vd: WritableReg,
+    vs1: Reg,
+    vs2: Reg,
+    masking: VecOpMasking,
+) -> u32 {
+    let funct7 = (op.funct6() << 1) | masking.encode();
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(vd.to_reg()),
+        op.funct3(),
+        reg_to_gpr_num(vs1),
+        reg_to_gpr_num(vs2),
+        funct7,
+    )
+}
+
+/// Encodes a Vector ALU+Imm instruction.
+/// This is just a Vector ALU instruction with an immediate in the VS1 field.
+///
+/// Fields:
+/// - opcode (7 bits)
+/// - vd     (5 bits)
+/// - funct3 (3 bits)
+/// - imm    (5 bits)
+/// - vs2    (5 bits)
+/// - vm     (1 bit)
+/// - funct6 (6 bits)
+///
+/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc
+pub fn encode_valu_rr_imm(
+    op: VecAluOpRRImm5,
+    vd: WritableReg,
+    imm: Imm5,
+    vs2: Reg,
+    masking: VecOpMasking,
+) -> u32 {
+    let funct7 = (op.funct6() << 1) | masking.encode();
+    let imm = imm.bits() as u32;
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(vd.to_reg()),
+        op.funct3(),
+        imm,
+        reg_to_gpr_num(vs2),
+        funct7,
+    )
+}
+
+pub fn encode_valu_rrrr(
+    op: VecAluOpRRRR,
+    vd: WritableReg,
+    vs2: Reg,
+    vs1: Reg,
+    masking: VecOpMasking,
+) -> u32 {
+    let funct7 = (op.funct6() << 1) | masking.encode();
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(vd.to_reg()),
+        op.funct3(),
+        reg_to_gpr_num(vs1),
+        reg_to_gpr_num(vs2),
+        funct7,
+    )
+}
+
+pub fn encode_valu_rrr_imm(
+    op: VecAluOpRRRImm5,
+    vd: WritableReg,
+    imm: Imm5,
+    vs2: Reg,
+    masking: VecOpMasking,
+) -> u32 {
+    let funct7 = (op.funct6() << 1) | masking.encode();
+    let imm = imm.bits() as u32;
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(vd.to_reg()),
+        op.funct3(),
+        imm,
+        reg_to_gpr_num(vs2),
+        funct7,
+    )
+}
+
+pub fn encode_valu_rr(op: VecAluOpRR, vd: WritableReg, vs: Reg, masking: VecOpMasking) -> u32 {
+    let funct7 = (op.funct6() << 1) | masking.encode();
+
+    let (vs1, vs2) = if op.vs_is_vs2_encoded() {
+        (op.aux_encoding(), reg_to_gpr_num(vs))
+    } else {
+        (reg_to_gpr_num(vs), op.aux_encoding())
+    };
+
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(vd.to_reg()),
+        op.funct3(),
+        vs1,
+        vs2,
+        funct7,
+    )
+}
+
+pub fn encode_valu_r_imm(
+    op: VecAluOpRImm5,
+    vd: WritableReg,
+    imm: Imm5,
+    masking: VecOpMasking,
+) -> u32 {
+    let funct7 = (op.funct6() << 1) | masking.encode();
+
+    // This is true for this opcode, not sure if there are any other ones.
+    debug_assert_eq!(op, VecAluOpRImm5::VmvVI);
+    let vs1 = imm.bits() as u32;
+    let vs2 = op.aux_encoding();
+
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(vd.to_reg()),
+        op.funct3(),
+        vs1,
+        vs2,
+        funct7,
+    )
+}
+
+/// Encodes a Vector CFG Imm instruction.
+///
+/// See: https://github.com/riscv/riscv-v-spec/blob/master/vcfg-format.adoc
+// TODO: Check if this is any of the known instruction types in the spec.
+pub fn encode_vcfg_imm(opcode: u32, rd: Reg, imm: UImm5, vtype: &VType) -> u32 {
+    let mut bits = 0;
+    bits |= unsigned_field_width(opcode, 7);
+    bits |= reg_to_gpr_num(rd) << 7;
+    bits |= VecOpCategory::OPCFG.encode() << 12;
+    bits |= unsigned_field_width(imm.bits(), 5) << 15;
+    bits |= unsigned_field_width(vtype.encode(), 10) << 20;
+    bits |= 0b11 << 30;
+    bits
+}
+
+/// Encodes a Vector Mem Unit Stride Load instruction.
+///
+/// See: https://github.com/riscv/riscv-v-spec/blob/master/vmem-format.adoc
+/// TODO: These instructions share opcode space with LOAD-FP and STORE-FP
+pub fn encode_vmem_load(
+    opcode: u32,
+    vd: Reg,
+    width: VecElementWidth,
+    rs1: Reg,
+    lumop: u32,
+    masking: VecOpMasking,
+    mop: u32,
+    nf: u32,
+) -> u32 {
+    // Width is encoded differently to avoid a clash with the FP load/store sizes.
+    let width = match width {
+        VecElementWidth::E8 => 0b000,
+        VecElementWidth::E16 => 0b101,
+        VecElementWidth::E32 => 0b110,
+        VecElementWidth::E64 => 0b111,
+    };
+
+    let mut bits = 0;
+    bits |= unsigned_field_width(opcode, 7);
+    bits |= reg_to_gpr_num(vd) << 7;
+    bits |= width << 12;
+    bits |= reg_to_gpr_num(rs1) << 15;
+    bits |= unsigned_field_width(lumop, 5) << 20;
+    bits |= masking.encode() << 25;
+    bits |= unsigned_field_width(mop, 2) << 26;
+
+    // The mew bit (inst[28]) when set is expected to be used to encode expanded
+    // memory sizes of 128 bits and above, but these encodings are currently reserved.
+    bits |= 0b0 << 28;
+
+    bits |= unsigned_field_width(nf, 3) << 29;
+    bits
+}
+
+/// Encodes a Vector Mem Unit Stride Load instruction.
+///
+/// See: https://github.com/riscv/riscv-v-spec/blob/master/vmem-format.adoc
+/// TODO: These instructions share opcode space with LOAD-FP and STORE-FP
+pub fn encode_vmem_store(
+    opcode: u32,
+    vs3: Reg,
+    width: VecElementWidth,
+    rs1: Reg,
+    sumop: u32,
+    masking: VecOpMasking,
+    mop: u32,
+    nf: u32,
+) -> u32 {
+    // This is pretty much the same as the load instruction, just
+    // with different names on the fields.
+    encode_vmem_load(opcode, vs3, width, rs1, sumop, masking, mop, nf)
+}
diff --git a/cranelift/codegen/src/isa/zkasm/inst/imms.rs b/cranelift/codegen/src/isa/zkasm/inst/imms.rs
new file mode 100644
index 000000000000..f04477e1dcd4
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/inst/imms.rs
@@ -0,0 +1,280 @@
+//! Riscv64 ISA definitions: immediate constants.
+
+// Some variants are never constructed, but we still want them as options in the future.
+use super::Inst;
+#[allow(dead_code)]
+use std::fmt::{Debug, Display, Formatter, Result};
+
+#[derive(Copy, Clone, Debug, Default)]
+pub struct Imm12 {
+    pub bits: i16,
+}
+
+#[derive(Copy, Clone, Debug, Default)]
+pub struct Imm32 {
+    pub bits: i32,
+}
+
+impl Imm12 {
+    pub(crate) const FALSE: Self = Self { bits: 0 };
+    pub(crate) const TRUE: Self = Self { bits: 1 };
+    pub fn maybe_from_u64(val: u64) -> Option<Imm12> {
+        let sign_bit = 1 << 11;
+        if val == 0 {
+            Some(Imm12 { bits: 0 })
+        } else if (val & sign_bit) != 0 && (val >> 12) == 0xffff_ffff_ffff_f {
+            Some(Imm12 {
+                bits: (val & 0xffff) as i16,
+            })
+        } else if (val & sign_bit) == 0 && (val >> 12) == 0 {
+            Some(Imm12 {
+                bits: (val & 0xffff) as i16,
+            })
+        } else {
+            None
+        }
+    }
+    #[inline]
+    pub fn from_bits(bits: i16) -> Self {
+        Self { bits: bits & 0xfff }
+    }
+    /// Create a zero immediate of this format.
+    #[inline]
+    pub fn zero() -> Self {
+        Imm12 { bits: 0 }
+    }
+    #[inline]
+    pub fn as_i16(self) -> i16 {
+        self.bits
+    }
+    #[inline]
+    pub fn as_u32(&self) -> u32 {
+        (self.bits as u32) & 0xfff
+    }
+}
+
+impl Imm32 {
+    pub fn maybe_from_u64(val: u64) -> Option<Imm32> {
+        let sign_bit = 1 << 31;
+        if val == 0 {
+            Some(Imm32 { bits: 0 })
+        } else if (val & sign_bit) != 0 && (val >> 31) == 0xffff_ffff {
+            Some(Imm32 {
+                bits: (val & 0xffff_ffff) as i32,
+            })
+        } else if (val & sign_bit) == 0 && (val >> 32) == 0 {
+            Some(Imm32 {
+                bits: (val & 0xffff_ffff) as i32,
+            })
+        } else {
+            None
+        }
+    }
+}
+
+impl Into<i64> for Imm12 {
+    fn into(self) -> i64 {
+        self.bits as i64
+    }
+}
+
+impl Display for Imm12 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{:+}", self.bits)
+    }
+}
+
+impl Display for Imm32 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{:+}", self.bits)
+    }
+}
+
+impl std::ops::Neg for Imm12 {
+    type Output = Self;
+    fn neg(self) -> Self::Output {
+        Self { bits: -self.bits }
+    }
+}
+
+// singed
+#[derive(Clone, Copy, Default)]
+pub struct Imm20 {
+    /// The immediate bits.
+    pub bits: i32,
+}
+
+impl Imm20 {
+    #[inline]
+    pub fn from_bits(bits: i32) -> Self {
+        Self {
+            bits: bits & 0xf_ffff,
+        }
+    }
+    #[inline]
+    pub fn as_u32(&self) -> u32 {
+        (self.bits as u32) & 0xf_ffff
+    }
+}
+
+impl Debug for Imm20 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.bits)
+    }
+}
+
+impl Display for Imm20 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.bits)
+    }
+}
+
+/// An unsigned 5-bit immediate.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct UImm5 {
+    value: u8,
+}
+
+impl UImm5 {
+    /// Create an unsigned 5-bit immediate from u8.
+    pub fn maybe_from_u8(value: u8) -> Option<UImm5> {
+        if value < 32 {
+            Some(UImm5 { value })
+        } else {
+            None
+        }
+    }
+
+    /// Bits for encoding.
+    pub fn bits(&self) -> u32 {
+        u32::from(self.value)
+    }
+}
+
+impl Display for UImm5 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.value)
+    }
+}
+
+/// A Signed 5-bit immediate.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct Imm5 {
+    value: i8,
+}
+
+impl Imm5 {
+    /// Create an signed 5-bit immediate from an i8.
+    pub fn maybe_from_i8(value: i8) -> Option<Imm5> {
+        if value >= -16 && value <= 15 {
+            Some(Imm5 { value })
+        } else {
+            None
+        }
+    }
+
+    pub fn from_bits(value: u8) -> Imm5 {
+        assert_eq!(value & 0x1f, value);
+        let signed = ((value << 3) as i8) >> 3;
+        Imm5 { value: signed }
+    }
+
+    /// Bits for encoding.
+    pub fn bits(&self) -> u8 {
+        self.value as u8 & 0x1f
+    }
+}
+
+impl Display for Imm5 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.value)
+    }
+}
+
+impl Inst {
+    pub(crate) fn imm_min() -> i64 {
+        let imm20_max: i64 = (1 << 19) << 12;
+        let imm12_max = 1 << 11;
+        -imm20_max - imm12_max
+    }
+    pub(crate) fn imm_max() -> i64 {
+        let imm20_max: i64 = ((1 << 19) - 1) << 12;
+        let imm12_max = (1 << 11) - 1;
+        imm20_max + imm12_max
+    }
+
+    /// An imm20 immediate and an Imm12 immediate can generate a 32-bit immediate.
+    /// This helper produces an imm12, imm20, or both to generate the value.
+    ///
+    /// `value` must be between `imm_min()` and `imm_max()`, or else
+    /// this helper returns `None`.
+    pub(crate) fn generate_imm<R>(
+        value: u64,
+        mut handle_imm: impl FnMut(Option<Imm20>, Option<Imm12>) -> R,
+    ) -> Option<R> {
+        if let Some(imm12) = Imm12::maybe_from_u64(value) {
+            // can be load using single imm12.
+            let r = handle_imm(None, Some(imm12));
+            return Some(r);
+        }
+        let value = value as i64;
+        if !(value >= Self::imm_min() && value <= Self::imm_max()) {
+            // not in range, return None.
+            return None;
+        }
+        const MOD_NUM: i64 = 4096;
+        let (imm20, imm12) = if value > 0 {
+            let mut imm20 = value / MOD_NUM;
+            let mut imm12 = value % MOD_NUM;
+            if imm12 >= 2048 {
+                imm12 -= MOD_NUM;
+                imm20 += 1;
+            }
+            assert!(imm12 >= -2048 && imm12 <= 2047);
+            (imm20, imm12)
+        } else {
+            // this is the abs value.
+            let value_abs = value.abs();
+            let imm20 = value_abs / MOD_NUM;
+            let imm12 = value_abs % MOD_NUM;
+            let mut imm20 = -imm20;
+            let mut imm12 = -imm12;
+            if imm12 < -2048 {
+                imm12 += MOD_NUM;
+                imm20 -= 1;
+            }
+            (imm20, imm12)
+        };
+        assert!(imm20 >= -(0x7_ffff + 1) && imm20 <= 0x7_ffff);
+        assert!(imm20 != 0 || imm12 != 0);
+        Some(handle_imm(
+            if imm20 != 0 {
+                Some(Imm20::from_bits(imm20 as i32))
+            } else {
+                None
+            },
+            if imm12 != 0 {
+                Some(Imm12::from_bits(imm12 as i16))
+            } else {
+                None
+            },
+        ))
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    #[test]
+    fn test_imm12() {
+        let x = Imm12::zero();
+        assert_eq!(0, x.as_u32());
+        Imm12::maybe_from_u64(0xffff_ffff_ffff_ffff).unwrap();
+    }
+
+    #[test]
+    fn imm20_and_imm12() {
+        assert!(Inst::imm_max() == (i32::MAX - 2048) as i64);
+        assert!(Inst::imm_min() == i32::MIN as i64 - 2048);
+    }
+}
diff --git a/cranelift/codegen/src/isa/zkasm/inst/mod.rs b/cranelift/codegen/src/isa/zkasm/inst/mod.rs
new file mode 100644
index 000000000000..ef612569dcda
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/inst/mod.rs
@@ -0,0 +1,2195 @@
+//! This module defines zkasm-specific machine instruction types.
+
+// Some variants are not constructed, but we still want them as options in the future.
+#![allow(dead_code)]
+#![allow(non_camel_case_types)]
+#![allow(warnings)]
+
+use super::lower::isle::generated_code::{VecAMode, VecElementWidth, VecOpMasking};
+use crate::binemit::{Addend, CodeOffset, Reloc};
+pub use crate::ir::condcodes::IntCC;
+use crate::ir::types::{self, F32, F64, I128, I16, I32, I64, I8, I8X16, R32, R64};
+
+pub use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, Type, ValueLabel};
+use crate::isa::{CallConv, FunctionAlignment};
+use crate::machinst::*;
+use crate::{settings, CodegenError, CodegenResult};
+
+pub use crate::ir::condcodes::FloatCC;
+
+use alloc::vec::Vec;
+use regalloc2::{PRegSet, RegClass, VReg};
+use smallvec::{smallvec, SmallVec};
+use std::boxed::Box;
+use std::fmt::Write;
+use std::string::{String, ToString};
+
+pub mod regs;
+pub use self::regs::*;
+pub mod imms;
+pub use self::imms::*;
+pub mod args;
+pub use self::args::*;
+pub mod emit;
+pub use self::emit::*;
+pub mod vector;
+pub use self::vector::*;
+pub mod encode;
+pub use self::encode::*;
+pub mod unwind;
+
+use crate::isa::zkasm::abi::Riscv64MachineDeps;
+
+#[cfg(test)]
+mod emit_tests;
+
+use std::fmt::{Display, Formatter};
+
+pub(crate) type OptionReg = Option<Reg>;
+pub(crate) type OptionImm12 = Option<Imm12>;
+pub(crate) type VecBranchTarget = Vec<BranchTarget>;
+pub(crate) type OptionUimm5 = Option<UImm5>;
+pub(crate) type OptionFloatRoundingMode = Option<FRM>;
+pub(crate) type VecU8 = Vec<u8>;
+pub(crate) type VecWritableReg = Vec<Writable<Reg>>;
+//=============================================================================
+// Instructions (top level): definition
+
+pub use crate::isa::zkasm::lower::isle::generated_code::{
+    AluOPRRI, AluOPRRR, AtomicOP, FClassResult, FFlagsException, FloatRoundOP, FloatSelectOP,
+    FpuOPRR, FpuOPRRR, FpuOPRRRR, IntSelectOP, LoadOP, MInst as Inst, StoreOP, FRM,
+};
+use crate::isa::zkasm::lower::isle::generated_code::{MInst, VecAluOpRRImm5, VecAluOpRRR};
+
+type BoxCallInfo = Box<CallInfo>;
+type BoxCallIndInfo = Box<CallIndInfo>;
+type BoxReturnCallInfo = Box<ReturnCallInfo>;
+
+/// Additional information for (direct) Call instructions, left out of line to lower the size of
+/// the Inst enum.
+#[derive(Clone, Debug)]
+pub struct CallInfo {
+    pub dest: ExternalName,
+    pub uses: CallArgList,
+    pub defs: CallRetList,
+    pub opcode: Opcode,
+    pub caller_callconv: CallConv,
+    pub callee_callconv: CallConv,
+    pub clobbers: PRegSet,
+    pub callee_pop_size: u32,
+}
+
+/// Additional information for CallInd instructions, left out of line to lower the size of the Inst
+/// enum.
+#[derive(Clone, Debug)]
+pub struct CallIndInfo {
+    pub rn: Reg,
+    pub uses: CallArgList,
+    pub defs: CallRetList,
+    pub opcode: Opcode,
+    pub caller_callconv: CallConv,
+    pub callee_callconv: CallConv,
+    pub clobbers: PRegSet,
+    pub callee_pop_size: u32,
+}
+
+/// Additional information for `return_call[_ind]` instructions, left out of
+/// line to lower the size of the `Inst` enum.
+#[derive(Clone, Debug)]
+pub struct ReturnCallInfo {
+    pub uses: CallArgList,
+    pub opcode: Opcode,
+    pub old_stack_arg_size: u32,
+    pub new_stack_arg_size: u32,
+}
+
+/// A branch target. Either unresolved (basic-block index) or resolved (offset
+/// from end of current instruction).
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum BranchTarget {
+    /// An unresolved reference to a Label, as passed into
+    /// `lower_branch_group()`.
+    Label(MachLabel),
+    /// A fixed PC offset.
+    ResolvedOffset(i32),
+}
+
+impl BranchTarget {
+    /// Return the target's label, if it is a label-based target.
+    pub(crate) fn as_label(self) -> Option<MachLabel> {
+        match self {
+            BranchTarget::Label(l) => Some(l),
+            _ => None,
+        }
+    }
+    /// offset zero.
+    #[inline]
+    pub(crate) fn zero() -> Self {
+        Self::ResolvedOffset(0)
+    }
+    #[inline]
+    pub(crate) fn offset(off: i32) -> Self {
+        Self::ResolvedOffset(off)
+    }
+    #[inline]
+    pub(crate) fn is_zero(self) -> bool {
+        match self {
+            BranchTarget::Label(_) => false,
+            BranchTarget::ResolvedOffset(off) => off == 0,
+        }
+    }
+    #[inline]
+    pub(crate) fn as_offset(self) -> Option<i32> {
+        match self {
+            BranchTarget::Label(_) => None,
+            BranchTarget::ResolvedOffset(off) => Some(off),
+        }
+    }
+}
+
+impl Display for BranchTarget {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            BranchTarget::Label(l) => write!(f, "{}", l.to_string()),
+            BranchTarget::ResolvedOffset(off) => write!(f, "{}", off),
+        }
+    }
+}
+
+pub(crate) fn enc_auipc(rd: Writable<Reg>, imm: Imm20) -> u32 {
+    let x = 0b0010111 | reg_to_gpr_num(rd.to_reg()) << 7 | imm.as_u32() << 12;
+    x
+}
+
+pub(crate) fn enc_jalr(rd: Writable<Reg>, base: Reg, offset: Imm12) -> u32 {
+    let x = 0b1100111
+        | reg_to_gpr_num(rd.to_reg()) << 7
+        | 0b000 << 12
+        | reg_to_gpr_num(base) << 15
+        | offset.as_u32() << 20;
+    x
+}
+
+/// rd and src must have the same length.
+pub(crate) fn gen_moves(rd: &[Writable<Reg>], src: &[Reg]) -> SmallInstVec<Inst> {
+    assert!(rd.len() == src.len());
+    assert!(rd.len() > 0);
+    let mut insts = SmallInstVec::new();
+    for (dst, src) in rd.iter().zip(src.iter()) {
+        let ty = Inst::canonical_type_for_rc(dst.to_reg().class());
+        insts.push(Inst::gen_move(*dst, *src, ty));
+    }
+    insts
+}
+
+impl Inst {
+    const INSTRUCTION_SIZE: i32 = 4;
+
+    #[inline]
+    pub(crate) fn load_imm12(rd: Writable<Reg>, imm: Imm12) -> Inst {
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Addi,
+            rd,
+            rs: zero_reg(),
+            imm12: imm,
+        }
+    }
+
+    /// Immediates can be loaded using lui and addi instructions.
+    fn load_const_imm<F: FnMut(Type) -> Writable<Reg>>(
+        rd: Writable<Reg>,
+        value: u64,
+        alloc_tmp: &mut F,
+    ) -> Option<SmallInstVec<Inst>> {
+        Inst::generate_imm(value, |imm20, imm12| {
+            let mut insts = SmallVec::new();
+
+            let rs = if let Some(imm) = imm20 {
+                let rd = if imm12.is_some() { alloc_tmp(I64) } else { rd };
+                insts.push(Inst::Lui { rd, imm });
+                rd.to_reg()
+            } else {
+                zero_reg()
+            };
+
+            if let Some(imm12) = imm12 {
+                insts.push(Inst::AluRRImm12 {
+                    alu_op: AluOPRRI::Addi,
+                    rd,
+                    rs,
+                    imm12,
+                })
+            }
+
+            insts
+        })
+    }
+
+    pub(crate) fn load_constant_u32<F: FnMut(Type) -> Writable<Reg>>(
+        rd: Writable<Reg>,
+        value: u64,
+        alloc_tmp: &mut F,
+    ) -> SmallInstVec<Inst> {
+        let insts = Inst::load_const_imm(rd, value, alloc_tmp);
+        insts.unwrap_or_else(|| {
+            smallvec![Inst::LoadConst32 {
+                rd,
+                imm: value as u32
+            }]
+        })
+    }
+
+    pub fn load_constant_u64<F: FnMut(Type) -> Writable<Reg>>(
+        rd: Writable<Reg>,
+        value: u64,
+        alloc_tmp: &mut F,
+    ) -> SmallInstVec<Inst> {
+        let insts = Inst::load_const_imm(rd, value, alloc_tmp);
+        insts.unwrap_or_else(|| smallvec![Inst::LoadConst64 { rd, imm: value }])
+    }
+
+    pub(crate) fn construct_auipc_and_jalr(
+        link: Option<Writable<Reg>>,
+        tmp: Writable<Reg>,
+        offset: i64,
+    ) -> [Inst; 2] {
+        Inst::generate_imm(offset as u64, |imm20, imm12| {
+            let a = Inst::Auipc {
+                rd: tmp,
+                imm: imm20.unwrap_or_default(),
+            };
+            let b = Inst::Jalr {
+                rd: link.unwrap_or(writable_zero_reg()),
+                base: tmp.to_reg(),
+                offset: imm12.unwrap_or_default(),
+            };
+            [a, b]
+        })
+        .expect("code range is too big.")
+    }
+
+    /// Create instructions that load a 32-bit floating-point constant.
+    pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
+        rd: Writable<Reg>,
+        const_data: u32,
+        mut alloc_tmp: F,
+    ) -> SmallVec<[Inst; 4]> {
+        let mut insts = SmallVec::new();
+        let tmp = alloc_tmp(I64);
+        insts.extend(Self::load_constant_u32(
+            tmp,
+            const_data as u64,
+            &mut alloc_tmp,
+        ));
+        insts.push(Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::move_x_to_f_op(F32),
+            rd,
+            rs: tmp.to_reg(),
+        });
+        insts
+    }
+
+    /// Create instructions that load a 64-bit floating-point constant.
+    pub fn load_fp_constant64<F: FnMut(Type) -> Writable<Reg>>(
+        rd: Writable<Reg>,
+        const_data: u64,
+        mut alloc_tmp: F,
+    ) -> SmallVec<[Inst; 4]> {
+        let mut insts = SmallInstVec::new();
+        let tmp = alloc_tmp(I64);
+        insts.extend(Self::load_constant_u64(tmp, const_data, &mut alloc_tmp));
+        insts.push(Inst::FpuRR {
+            frm: None,
+            alu_op: FpuOPRR::move_x_to_f_op(F64),
+            rd,
+            rs: tmp.to_reg(),
+        });
+        insts
+    }
+
+    /// Generic constructor for a load (zero-extending where appropriate).
+    pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
+        if ty.is_vector() {
+            Inst::VecLoad {
+                eew: VecElementWidth::from_type(ty),
+                to: into_reg,
+                from: VecAMode::UnitStride { base: mem },
+                flags,
+                mask: VecOpMasking::Disabled,
+                vstate: VState::from_type(ty),
+            }
+        } else {
+            Inst::Load {
+                rd: into_reg,
+                op: LoadOP::from_type(ty),
+                from: mem,
+                flags,
+            }
+        }
+    }
+
+    /// Generic constructor for a store.
+    pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {
+        if ty.is_vector() {
+            Inst::VecStore {
+                eew: VecElementWidth::from_type(ty),
+                to: VecAMode::UnitStride { base: mem },
+                from: from_reg,
+                flags,
+                mask: VecOpMasking::Disabled,
+                vstate: VState::from_type(ty),
+            }
+        } else {
+            Inst::Store {
+                src: from_reg,
+                op: StoreOP::from_type(ty),
+                to: mem,
+                flags,
+            }
+        }
+    }
+}
+
+//=============================================================================
+
+fn vec_mask_operands<F: Fn(VReg) -> VReg>(
+    mask: &VecOpMasking,
+    collector: &mut OperandCollector<'_, F>,
+) {
+    match mask {
+        VecOpMasking::Enabled { reg } => {
+            collector.reg_fixed_use(*reg, pv_reg(0).into());
+        }
+        VecOpMasking::Disabled => {}
+    }
+}
+fn vec_mask_late_operands<F: Fn(VReg) -> VReg>(
+    mask: &VecOpMasking,
+    collector: &mut OperandCollector<'_, F>,
+) {
+    match mask {
+        VecOpMasking::Enabled { reg } => {
+            collector.reg_fixed_late_use(*reg, pv_reg(0).into());
+        }
+        VecOpMasking::Disabled => {}
+    }
+}
+
+fn zkasm_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCollector<'_, F>) {
+    match inst {
+        &Inst::Nop0 => {}
+        &Inst::Nop4 => {}
+        &Inst::Label { .. } => {}
+        &Inst::BrTable {
+            index, tmp1, tmp2, ..
+        } => {
+            collector.reg_use(index);
+            collector.reg_early_def(tmp1);
+            collector.reg_early_def(tmp2);
+        }
+        &Inst::Auipc { rd, .. } => collector.reg_def(rd),
+        &Inst::Lui { rd, .. } => collector.reg_def(rd),
+        &Inst::LoadConst32 { rd, .. } => collector.reg_def(rd),
+        &Inst::LoadConst64 { rd, .. } => collector.reg_def(rd),
+        &Inst::AluRRR { rd, rs1, rs2, .. } => {
+            collector.reg_fixed_use(rs1, a0());
+            collector.reg_fixed_use(rs2, b0());
+            collector.reg_def(rd);
+        }
+        &Inst::FpuRRR { rd, rs1, rs2, .. } => {
+            collector.reg_use(rs1);
+            collector.reg_use(rs2);
+            collector.reg_def(rd);
+        }
+        &Inst::AluRRImm12 { rd, rs, .. } => {
+            collector.reg_use(rs);
+            collector.reg_def(rd);
+        }
+        &Inst::Load { rd, from, .. } => {
+            if let Some(r) = from.get_allocatable_register() {
+                collector.reg_use(r);
+            }
+            collector.reg_def(rd);
+        }
+        &Inst::Store { to, src, .. } => {
+            if let Some(r) = to.get_allocatable_register() {
+                collector.reg_use(r);
+            }
+            collector.reg_use(src);
+        }
+
+        &Inst::Args { ref args } => {
+            for arg in args {
+                collector.reg_fixed_def(arg.vreg, arg.preg);
+            }
+        }
+        &Inst::Ret { ref rets, .. } => {
+            for ret in rets {
+                collector.reg_fixed_use(ret.vreg, ret.preg);
+            }
+        }
+
+        &Inst::Extend { rd, rn, .. } => {
+            collector.reg_use(rn);
+            collector.reg_def(rd);
+        }
+        &Inst::AdjustSp { .. } => {}
+        &Inst::Call { ref info } => {
+            for u in &info.uses {
+                collector.reg_fixed_use(u.vreg, u.preg);
+            }
+            for d in &info.defs {
+                collector.reg_fixed_def(d.vreg, d.preg);
+            }
+            collector.reg_clobbers(info.clobbers);
+        }
+        &Inst::CallInd { ref info } => {
+            if info.callee_callconv == CallConv::Tail {
+                // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
+                // This shouldn't be a fixed register constraint.
+                collector.reg_fixed_use(info.rn, x_reg(5));
+            } else {
+                collector.reg_use(info.rn);
+            }
+
+            for u in &info.uses {
+                collector.reg_fixed_use(u.vreg, u.preg);
+            }
+            for d in &info.defs {
+                collector.reg_fixed_def(d.vreg, d.preg);
+            }
+            collector.reg_clobbers(info.clobbers);
+        }
+        &Inst::ReturnCall {
+            callee: _,
+            ref info,
+        } => {
+            for u in &info.uses {
+                collector.reg_fixed_use(u.vreg, u.preg);
+            }
+        }
+        &Inst::ReturnCallInd { ref info, callee } => {
+            collector.reg_use(callee);
+            for u in &info.uses {
+                collector.reg_fixed_use(u.vreg, u.preg);
+            }
+        }
+        &Inst::TrapIf { test, .. } => {
+            collector.reg_use(test);
+        }
+        &Inst::Jal { .. } => {}
+        &Inst::CondBr { kind, .. } => {
+            collector.reg_use(kind.rs1);
+            collector.reg_use(kind.rs2);
+        }
+        &Inst::LoadExtName { rd, .. } => {
+            collector.reg_def(rd);
+        }
+        &Inst::LoadAddr { rd, mem } => {
+            if let Some(r) = mem.get_allocatable_register() {
+                collector.reg_use(r);
+            }
+            collector.reg_early_def(rd);
+        }
+
+        &Inst::VirtualSPOffsetAdj { .. } => {}
+        &Inst::Mov { rd, rm, .. } => {
+            collector.reg_use(rm);
+            collector.reg_def(rd);
+        }
+        &Inst::MovFromPReg { rd, rm } => {
+            debug_assert!([px_reg(2), px_reg(8)].contains(&rm));
+            collector.reg_def(rd);
+        }
+        &Inst::Fence { .. } => {}
+        &Inst::FenceI => {}
+        &Inst::ECall => {}
+        &Inst::EBreak => {}
+        &Inst::Udf { .. } => {}
+        &Inst::FpuRR { rd, rs, .. } => {
+            collector.reg_use(rs);
+            collector.reg_def(rd);
+        }
+        &Inst::FpuRRRR {
+            rd, rs1, rs2, rs3, ..
+        } => {
+            collector.reg_uses(&[rs1, rs2, rs3]);
+            collector.reg_def(rd);
+        }
+
+        &Inst::Jalr { rd, base, .. } => {
+            collector.reg_use(base);
+            collector.reg_def(rd);
+        }
+        &Inst::Atomic { rd, addr, src, .. } => {
+            collector.reg_use(addr);
+            collector.reg_use(src);
+            collector.reg_def(rd);
+        }
+        &Inst::Select {
+            ref dst,
+            condition,
+            x,
+            y,
+            ..
+        } => {
+            collector.reg_use(condition);
+            collector.reg_uses(x.regs());
+            collector.reg_uses(y.regs());
+            for d in dst.iter() {
+                collector.reg_early_def(d.clone());
+            }
+        }
+        &Inst::AtomicCas {
+            offset,
+            t0,
+            dst,
+            e,
+            addr,
+            v,
+            ..
+        } => {
+            collector.reg_uses(&[offset, e, addr, v]);
+            collector.reg_early_def(t0);
+            collector.reg_early_def(dst);
+        }
+        &Inst::IntSelect {
+            ref dst,
+            ref x,
+            ref y,
+            ..
+        } => {
+            collector.reg_uses(x.regs());
+            collector.reg_uses(y.regs());
+            for d in dst.iter() {
+                collector.reg_early_def(d.clone());
+            }
+        }
+
+        &Inst::Icmp { rd, a, b, .. } => {
+            // TODO(akashin): Why would Icmp have multiple input registers?
+            // collector.reg_uses(a.regs());
+            // collector.reg_uses(b.regs());
+            collector.reg_fixed_use(
+                a.only_reg()
+                    .expect("Only support 1 register in comparison now"),
+                a0(),
+            );
+            collector.reg_fixed_use(
+                b.only_reg()
+                    .expect("Only support 1 register in comparison now"),
+                b0(),
+            );
+            collector.reg_def(rd);
+        }
+
+        &Inst::SelectReg {
+            rd,
+            rs1,
+            rs2,
+            condition,
+        } => {
+            collector.reg_use(condition.rs1);
+            collector.reg_use(condition.rs2);
+            collector.reg_use(rs1);
+            collector.reg_use(rs2);
+            collector.reg_def(rd);
+        }
+        &Inst::FcvtToInt { rd, rs, tmp, .. } => {
+            collector.reg_use(rs);
+            collector.reg_early_def(tmp);
+            collector.reg_early_def(rd);
+        }
+        &Inst::RawData { .. } => {}
+        &Inst::AtomicStore { src, p, .. } => {
+            collector.reg_use(src);
+            collector.reg_use(p);
+        }
+        &Inst::AtomicLoad { rd, p, .. } => {
+            collector.reg_use(p);
+            collector.reg_def(rd);
+        }
+        &Inst::AtomicRmwLoop {
+            offset,
+            dst,
+            p,
+            x,
+            t0,
+            ..
+        } => {
+            collector.reg_uses(&[offset, p, x]);
+            collector.reg_early_def(t0);
+            collector.reg_early_def(dst);
+        }
+        &Inst::TrapIfC { rs1, rs2, .. } => {
+            collector.reg_use(rs1);
+            collector.reg_use(rs2);
+        }
+        &Inst::Unwind { .. } => {}
+        &Inst::DummyUse { reg } => {
+            collector.reg_use(reg);
+        }
+        &Inst::FloatRound {
+            rd,
+            int_tmp,
+            f_tmp,
+            rs,
+            ..
+        } => {
+            collector.reg_use(rs);
+            collector.reg_early_def(int_tmp);
+            collector.reg_early_def(f_tmp);
+            collector.reg_early_def(rd);
+        }
+        &Inst::FloatSelect {
+            rd, tmp, rs1, rs2, ..
+        } => {
+            collector.reg_uses(&[rs1, rs2]);
+            collector.reg_early_def(tmp);
+            collector.reg_early_def(rd);
+        }
+        &Inst::Popcnt {
+            sum, step, rs, tmp, ..
+        } => {
+            collector.reg_use(rs);
+            collector.reg_early_def(tmp);
+            collector.reg_early_def(step);
+            collector.reg_early_def(sum);
+        }
+        &Inst::Rev8 { rs, rd, tmp, step } => {
+            collector.reg_use(rs);
+            collector.reg_early_def(tmp);
+            collector.reg_early_def(step);
+            collector.reg_early_def(rd);
+        }
+        &Inst::Cltz {
+            sum, step, tmp, rs, ..
+        } => {
+            collector.reg_use(rs);
+            collector.reg_early_def(tmp);
+            collector.reg_early_def(step);
+            collector.reg_early_def(sum);
+        }
+        &Inst::Brev8 {
+            rs,
+            rd,
+            step,
+            tmp,
+            tmp2,
+            ..
+        } => {
+            collector.reg_use(rs);
+            collector.reg_early_def(step);
+            collector.reg_early_def(tmp);
+            collector.reg_early_def(tmp2);
+            collector.reg_early_def(rd);
+        }
+        &Inst::StackProbeLoop { .. } => {
+            // StackProbeLoop has a tmp register and StackProbeLoop used at gen_prologue.
+            // t3 will do the job. (t3 is caller-save register and not used directly by compiler like writable_spilltmp_reg)
+            // gen_prologue is called at emit stage.
+            // no need let reg alloc know.
+        }
+        &Inst::VecAluRRRR {
+            op,
+            vd,
+            vd_src,
+            vs1,
+            vs2,
+            ref mask,
+            ..
+        } => {
+            debug_assert_eq!(vd_src.class(), RegClass::Vector);
+            debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
+            debug_assert_eq!(vs2.class(), RegClass::Vector);
+            debug_assert_eq!(vs1.class(), op.vs1_regclass());
+
+            collector.reg_late_use(vs1);
+            collector.reg_late_use(vs2);
+            collector.reg_use(vd_src);
+            collector.reg_reuse_def(vd, 2); // `vd` == `vd_src`.
+            vec_mask_late_operands(mask, collector);
+        }
+
+        Inst::AddImm32 { rd, src1, src2 } => {
+            collector.reg_def(*rd);
+        }
+
+        Inst::MulImm32 { rd, src1, src2 } => {
+            collector.reg_def(*rd);
+        }
+
+        &Inst::VecAluRRRImm5 {
+            op,
+            vd,
+            vd_src,
+            vs2,
+            ref mask,
+            ..
+        } => {
+            debug_assert_eq!(vd_src.class(), RegClass::Vector);
+            debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
+            debug_assert_eq!(vs2.class(), RegClass::Vector);
+
+            // If the operation forbids source/destination overlap we need to
+            // ensure that the source and destination registers are different.
+            if op.forbids_src_dst_overlaps() {
+                collector.reg_late_use(vs2);
+                collector.reg_use(vd_src);
+                collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.
+                vec_mask_late_operands(mask, collector);
+            } else {
+                collector.reg_use(vs2);
+                collector.reg_use(vd_src);
+                collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.
+                vec_mask_operands(mask, collector);
+            }
+        }
+        &Inst::VecAluRRR {
+            op,
+            vd,
+            vs1,
+            vs2,
+            ref mask,
+            ..
+        } => {
+            debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
+            debug_assert_eq!(vs2.class(), RegClass::Vector);
+            debug_assert_eq!(vs1.class(), op.vs1_regclass());
+
+            collector.reg_use(vs1);
+            collector.reg_use(vs2);
+
+            // If the operation forbids source/destination overlap, then we must
+            // register it as an early_def. This encodes the constraint that
+            // these must not overlap.
+            if op.forbids_src_dst_overlaps() {
+                collector.reg_early_def(vd);
+            } else {
+                collector.reg_def(vd);
+            }
+
+            vec_mask_operands(mask, collector);
+        }
+        &Inst::VecAluRRImm5 {
+            op,
+            vd,
+            vs2,
+            ref mask,
+            ..
+        } => {
+            debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
+            debug_assert_eq!(vs2.class(), RegClass::Vector);
+
+            collector.reg_use(vs2);
+
+            // If the operation forbids source/destination overlap, then we must
+            // register it as an early_def. This encodes the constraint that
+            // these must not overlap.
+            if op.forbids_src_dst_overlaps() {
+                collector.reg_early_def(vd);
+            } else {
+                collector.reg_def(vd);
+            }
+
+            vec_mask_operands(mask, collector);
+        }
+        &Inst::VecAluRR {
+            op,
+            vd,
+            vs,
+            ref mask,
+            ..
+        } => {
+            debug_assert_eq!(vd.to_reg().class(), op.dst_regclass());
+            debug_assert_eq!(vs.class(), op.src_regclass());
+
+            collector.reg_use(vs);
+
+            // If the operation forbids source/destination overlap, then we must
+            // register it as an early_def. This encodes the constraint that
+            // these must not overlap.
+            if op.forbids_src_dst_overlaps() {
+                collector.reg_early_def(vd);
+            } else {
+                collector.reg_def(vd);
+            }
+
+            vec_mask_operands(mask, collector);
+        }
+        &Inst::VecAluRImm5 { vd, ref mask, .. } => {
+            debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
+
+            collector.reg_def(vd);
+            vec_mask_operands(mask, collector);
+        }
+        &Inst::VecSetState { rd, .. } => {
+            collector.reg_def(rd);
+        }
+        &Inst::VecLoad {
+            to,
+            ref from,
+            ref mask,
+            ..
+        } => {
+            if let Some(r) = from.get_allocatable_register() {
+                collector.reg_use(r);
+            }
+            collector.reg_def(to);
+            vec_mask_operands(mask, collector);
+        }
+        &Inst::VecStore {
+            ref to,
+            from,
+            ref mask,
+            ..
+        } => {
+            if let Some(r) = to.get_allocatable_register() {
+                collector.reg_use(r);
+            }
+            collector.reg_use(from);
+            vec_mask_operands(mask, collector);
+        }
+    }
+}
+
+impl MachInst for Inst {
+    type LabelUse = LabelUse;
+    type ABIMachineSpec = Riscv64MachineDeps;
+
+    // https://github.com/riscv/riscv-isa-manual/issues/850
+    // all zero will cause invalid opcode.
+    const TRAP_OPCODE: &'static [u8] = &[0; 4];
+
+    fn gen_dummy_use(reg: Reg) -> Self {
+        Inst::DummyUse { reg }
+    }
+
+    fn gen_block_start(
+        block_index: usize,
+        _is_indirect_branch_target: bool,
+        _is_forward_edge_cfi_enabled: bool,
+    ) -> Option<Self> {
+        Some(Inst::Label { imm: block_index })
+    }
+
+    fn canonical_type_for_rc(rc: RegClass) -> Type {
+        match rc {
+            regalloc2::RegClass::Int => I64,
+            regalloc2::RegClass::Float => F64,
+            regalloc2::RegClass::Vector => I8X16,
+        }
+    }
+
+    fn is_safepoint(&self) -> bool {
+        match self {
+            &Inst::Call { .. }
+            | &Inst::CallInd { .. }
+            | &Inst::TrapIf { .. }
+            | &Inst::Udf { .. } => true,
+            _ => false,
+        }
+    }
+
+    fn get_operands<F: Fn(VReg) -> VReg>(&self, collector: &mut OperandCollector<'_, F>) {
+        zkasm_get_operands(self, collector);
+    }
+
+    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
+        match self {
+            Inst::Mov { rd, rm, .. } => Some((rd.clone(), rm.clone())),
+            _ => None,
+        }
+    }
+
+    fn is_included_in_clobbers(&self) -> bool {
+        match self {
+            &Inst::Args { .. } => false,
+            _ => true,
+        }
+    }
+
+    fn is_trap(&self) -> bool {
+        match self {
+            Self::Udf { .. } => true,
+            _ => false,
+        }
+    }
+
+    fn is_args(&self) -> bool {
+        match self {
+            Self::Args { .. } => true,
+            _ => false,
+        }
+    }
+
+    fn is_term(&self) -> MachTerminator {
+        match self {
+            &Inst::Jal { .. } => MachTerminator::Uncond,
+            &Inst::CondBr { .. } => MachTerminator::Cond,
+            &Inst::Jalr { .. } => MachTerminator::Uncond,
+            &Inst::Ret { .. } => MachTerminator::Ret,
+            &Inst::BrTable { .. } => MachTerminator::Indirect,
+            &Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall,
+            _ => MachTerminator::None,
+        }
+    }
+
+    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
+        let x = Inst::Mov {
+            rd: to_reg,
+            rm: from_reg,
+            ty,
+        };
+        x
+    }
+
+    fn gen_nop(preferred_size: usize) -> Inst {
+        if preferred_size == 0 {
+            return Inst::Nop0;
+        }
+        // We can't give a NOP (or any insn) < 4 bytes.
+        assert!(preferred_size >= 4);
+        Inst::Nop4
+    }
+
+    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
+        match ty {
+            I8 => Ok((&[RegClass::Int], &[I8])),
+            I16 => Ok((&[RegClass::Int], &[I16])),
+            I32 => Ok((&[RegClass::Int], &[I32])),
+            I64 => Ok((&[RegClass::Int], &[I64])),
+            R32 => panic!("32-bit reftype pointer should never be seen on zkasm"),
+            R64 => Ok((&[RegClass::Int], &[R64])),
+            F32 => Ok((&[RegClass::Float], &[F32])),
+            F64 => Ok((&[RegClass::Float], &[F64])),
+            I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
+            _ if ty.is_vector() => {
+                debug_assert!(ty.bits() <= 512);
+
+                // Here we only need to return a SIMD type with the same size as `ty`.
+                // We use these types for spills and reloads, so prefer types with lanes <= 31
+                // since that fits in the immediate field of `vsetivli`.
+                const SIMD_TYPES: [[Type; 1]; 6] = [
+                    [types::I8X2],
+                    [types::I8X4],
+                    [types::I8X8],
+                    [types::I8X16],
+                    [types::I16X16],
+                    [types::I32X16],
+                ];
+                let idx = (ty.bytes().ilog2() - 1) as usize;
+                let ty = &SIMD_TYPES[idx][..];
+
+                Ok((&[RegClass::Vector], ty))
+            }
+            _ => Err(CodegenError::Unsupported(format!(
+                "Unexpected SSA-value type: {}",
+                ty
+            ))),
+        }
+    }
+
+    fn gen_jump(target: MachLabel) -> Inst {
+        Inst::Jal {
+            dest: BranchTarget::Label(target),
+        }
+    }
+
+    fn worst_case_size() -> CodeOffset {
+        // calculate by test function zkasm_worst_case_instruction_size()
+        1_000_000
+    }
+
+    fn ref_type_regclass(_settings: &settings::Flags) -> RegClass {
+        RegClass::Int
+    }
+
+    fn function_alignment() -> FunctionAlignment {
+        FunctionAlignment {
+            minimum: 4,
+            preferred: 4,
+        }
+    }
+}
+
+//=============================================================================
+// Pretty-printing of instructions.
+pub fn reg_name(reg: Reg) -> String {
+    match reg.to_real_reg() {
+        Some(real) => match real.class() {
+            RegClass::Int => match real.hw_enc() {
+                0 => "0".into(),
+                1 => "RR".into(),
+                2 => "SP".into(),
+                // TODO(akashin): Do we have a global pointer register in ZK ASM?
+                // https://www.five-embeddev.com/quickref/global_pointer.html
+                // Supposed to be unallocatable.
+                3 => "gp".into(),
+                // TODO(akashin): Do we have a thread pointer register in ZK ASM?
+                // https://groups.google.com/a/groups.riscv.org/g/sw-dev/c/cov47bNy5gY?pli=1
+                // Supposed to be unallocatable.
+                4 => "tp".into(),
+                // Temporary registers.
+                5 => "C".into(),
+                6 => "D".into(),
+                7 => "E".into(),
+                8 => "fp".into(),
+                9 => "s1".into(),
+                10 => "A".into(),
+                11 => "B".into(),
+                12 => "CTX".into(),
+                13..=17 => format!("a{}", real.hw_enc() - 10),
+                18..=27 => format!("s{}", real.hw_enc() - 16),
+                28..=31 => format!("t{}", real.hw_enc() - 25),
+                _ => unreachable!(),
+            },
+            RegClass::Float => match real.hw_enc() {
+                0..=7 => format!("ft{}", real.hw_enc() - 0),
+                8..=9 => format!("fs{}", real.hw_enc() - 8),
+                10..=17 => format!("fa{}", real.hw_enc() - 10),
+                18..=27 => format!("fs{}", real.hw_enc() - 16),
+                28..=31 => format!("ft{}", real.hw_enc() - 20),
+                _ => unreachable!(),
+            },
+            RegClass::Vector => format!("v{}", real.hw_enc()),
+        },
+        None => {
+            format!("{:?}", reg)
+        }
+    }
+}
+
+impl Inst {
+    fn print_with_state(
+        &self,
+        _state: &mut EmitState,
+        allocs: &mut AllocationConsumer<'_>,
+    ) -> String {
+        let format_reg = |reg: Reg, allocs: &mut AllocationConsumer<'_>| -> String {
+            let reg = allocs.next(reg);
+            reg_name(reg)
+        };
+
+        let format_vec_amode = |amode: &VecAMode, allocs: &mut AllocationConsumer<'_>| -> String {
+            match amode {
+                VecAMode::UnitStride { base } => base.to_string_with_alloc(allocs),
+            }
+        };
+
+        let format_mask = |mask: &VecOpMasking, allocs: &mut AllocationConsumer<'_>| -> String {
+            match mask {
+                VecOpMasking::Enabled { reg } => format!(",{}.t", format_reg(*reg, allocs)),
+                VecOpMasking::Disabled => format!(""),
+            }
+        };
+
+        let format_regs = |regs: &[Reg], allocs: &mut AllocationConsumer<'_>| -> String {
+            let mut x = if regs.len() > 1 {
+                String::from("[")
+            } else {
+                String::default()
+            };
+            regs.iter().for_each(|i| {
+                x.push_str(format_reg(i.clone(), allocs).as_str());
+                if *i != *regs.last().unwrap() {
+                    x.push_str(",");
+                }
+            });
+            if regs.len() > 1 {
+                x.push_str("]");
+            }
+            x
+        };
+        let format_labels = |labels: &[MachLabel]| -> String {
+            if labels.len() == 0 {
+                return String::from("[_]");
+            }
+            let mut x = String::from("[");
+            labels.iter().for_each(|l| {
+                x.push_str(
+                    format!(
+                        "{:?}{}",
+                        l,
+                        if l != labels.last().unwrap() { "," } else { "" },
+                    )
+                    .as_str(),
+                );
+            });
+            x.push_str("]");
+            x
+        };
+
+        fn format_frm(rounding_mode: Option<FRM>) -> String {
+            if let Some(r) = rounding_mode {
+                format!(",{}", r.to_static_str(),)
+            } else {
+                "".into()
+            }
+        }
+
+        let mut empty_allocs = AllocationConsumer::default();
+        match self {
+            &Inst::Nop0 => {
+                format!("##zero length nop")
+            }
+            &Inst::Nop4 => {
+                format!("##fixed 4-size nop")
+            }
+            &Inst::Label { imm } => {
+                format!("##label=L{imm}")
+            }
+            &Inst::StackProbeLoop {
+                guard_size,
+                probe_count,
+                tmp,
+            } => {
+                let tmp = format_reg(tmp.to_reg(), allocs);
+                format!(
+                    "inline_stack_probe##guard_size={} probe_count={} tmp={}",
+                    guard_size, probe_count, tmp
+                )
+            }
+            &Inst::FloatRound {
+                op,
+                rd,
+                int_tmp,
+                f_tmp,
+                rs,
+                ty,
+            } => {
+                let rs = format_reg(rs, allocs);
+                let int_tmp = format_reg(int_tmp.to_reg(), allocs);
+                let f_tmp = format_reg(f_tmp.to_reg(), allocs);
+                let rd = format_reg(rd.to_reg(), allocs);
+                format!(
+                    "{} {},{}##int_tmp={} f_tmp={} ty={}",
+                    op.op_name(),
+                    rd,
+                    rs,
+                    int_tmp,
+                    f_tmp,
+                    ty
+                )
+            }
+            &Inst::FloatSelect {
+                op,
+                rd,
+                tmp,
+                rs1,
+                rs2,
+                ty,
+            } => {
+                let rs1 = format_reg(rs1, allocs);
+                let rs2 = format_reg(rs2, allocs);
+                let tmp = format_reg(tmp.to_reg(), allocs);
+                let rd = format_reg(rd.to_reg(), allocs);
+                format!(
+                    "f{}.{} {},{},{}##tmp={} ty={}",
+                    op.op_name(),
+                    if ty == F32 { "s" } else { "d" },
+                    rd,
+                    rs1,
+                    rs2,
+                    tmp,
+                    ty
+                )
+            }
+            &Inst::AtomicStore { src, ty, p } => {
+                let src = format_reg(src, allocs);
+                let p = format_reg(p, allocs);
+                format!("atomic_store.{} {},({})", ty, src, p)
+            }
+            &Inst::DummyUse { reg } => {
+                let reg = format_reg(reg, allocs);
+                format!("dummy_use {}", reg)
+            }
+
+            &Inst::AtomicLoad { rd, ty, p } => {
+                let p = format_reg(p, allocs);
+                let rd = format_reg(rd.to_reg(), allocs);
+                format!("atomic_load.{} {},({})", ty, rd, p)
+            }
+            &Inst::AtomicRmwLoop {
+                offset,
+                op,
+                dst,
+                ty,
+                p,
+                x,
+                t0,
+            } => {
+                let offset = format_reg(offset, allocs);
+                let p = format_reg(p, allocs);
+                let x = format_reg(x, allocs);
+                let t0 = format_reg(t0.to_reg(), allocs);
+                let dst = format_reg(dst.to_reg(), allocs);
+                format!(
+                    "atomic_rmw.{} {} {},{},({})##t0={} offset={}",
+                    ty, op, dst, x, p, t0, offset
+                )
+            }
+
+            &Inst::RawData { ref data } => match data.len() {
+                4 => {
+                    let mut bytes = [0; 4];
+                    for i in 0..bytes.len() {
+                        bytes[i] = data[i];
+                    }
+                    format!(".4byte 0x{:x}", u32::from_le_bytes(bytes))
+                }
+                8 => {
+                    let mut bytes = [0; 8];
+                    for i in 0..bytes.len() {
+                        bytes[i] = data[i];
+                    }
+                    format!(".8byte 0x{:x}", u64::from_le_bytes(bytes))
+                }
+                _ => {
+                    format!(".data {:?}", data)
+                }
+            },
+            &Inst::Unwind { ref inst } => {
+                todo!()
+            }
+            &Inst::Brev8 {
+                rs,
+                ty,
+                step,
+                tmp,
+                tmp2,
+                rd,
+            } => {
+                let rs = format_reg(rs, allocs);
+                let step = format_reg(step.to_reg(), allocs);
+                let tmp = format_reg(tmp.to_reg(), allocs);
+                let tmp2 = format_reg(tmp2.to_reg(), allocs);
+                let rd = format_reg(rd.to_reg(), allocs);
+                format!(
+                    "brev8 {},{}##tmp={} tmp2={} step={} ty={}",
+                    rd, rs, tmp, tmp2, step, ty
+                )
+            }
+            &Inst::Popcnt {
+                sum,
+                step,
+                rs,
+                tmp,
+                ty,
+            } => {
+                let rs = format_reg(rs, allocs);
+                let tmp = format_reg(tmp.to_reg(), allocs);
+                let step = format_reg(step.to_reg(), allocs);
+                let sum = format_reg(sum.to_reg(), allocs);
+                format!("popcnt {},{}##ty={} tmp={} step={}", sum, rs, ty, tmp, step)
+            }
+            &Inst::Rev8 { rs, rd, tmp, step } => {
+                let rs = format_reg(rs, allocs);
+                let tmp = format_reg(tmp.to_reg(), allocs);
+                let step = format_reg(step.to_reg(), allocs);
+                let rd = format_reg(rd.to_reg(), allocs);
+                format!("rev8 {},{}##step={} tmp={}", rd, rs, step, tmp)
+            }
+            &Inst::Cltz {
+                sum,
+                step,
+                rs,
+                tmp,
+                ty,
+                leading,
+            } => {
+                let rs = format_reg(rs, allocs);
+                let tmp = format_reg(tmp.to_reg(), allocs);
+                let step = format_reg(step.to_reg(), allocs);
+                let sum = format_reg(sum.to_reg(), allocs);
+                format!(
+                    "{} {},{}##ty={} tmp={} step={}",
+                    if leading { "clz" } else { "ctz" },
+                    sum,
+                    rs,
+                    ty,
+                    tmp,
+                    step
+                )
+            }
+            &Inst::FcvtToInt {
+                is_sat,
+                rd,
+                rs,
+                is_signed,
+                in_type,
+                out_type,
+                tmp,
+            } => {
+                let rs = format_reg(rs, allocs);
+                let tmp = format_reg(tmp.to_reg(), allocs);
+                let rd = format_reg(rd.to_reg(), allocs);
+                format!(
+                    "fcvt_to_{}int{}.{} {},{}##in_ty={} tmp={}",
+                    if is_signed { "s" } else { "u" },
+                    if is_sat { "_sat" } else { "" },
+                    out_type,
+                    rd,
+                    rs,
+                    in_type,
+                    tmp
+                )
+            }
+            &Inst::SelectReg {
+                rd,
+                rs1,
+                rs2,
+                ref condition,
+            } => {
+                let c_rs1 = format_reg(condition.rs1, allocs);
+                let c_rs2 = format_reg(condition.rs2, allocs);
+                let rs1 = format_reg(rs1, allocs);
+                let rs2 = format_reg(rs2, allocs);
+                let rd = format_reg(rd.to_reg(), allocs);
+                format!(
+                    "select_reg {},{},{}##condition={}",
+                    rd,
+                    rs1,
+                    rs2,
+                    format!("({} {} {})", c_rs1, condition.kind.to_static_str(), c_rs2),
+                )
+            }
+            &Inst::AtomicCas {
+                offset,
+                t0,
+                dst,
+                e,
+                addr,
+                v,
+                ty,
+            } => {
+                let offset = format_reg(offset, allocs);
+                let e = format_reg(e, allocs);
+                let addr = format_reg(addr, allocs);
+                let v = format_reg(v, allocs);
+                let t0 = format_reg(t0.to_reg(), allocs);
+                let dst = format_reg(dst.to_reg(), allocs);
+                format!(
+                    "atomic_cas.{} {},{},{},({})##t0={} offset={}",
+                    ty, dst, e, v, addr, t0, offset,
+                )
+            }
+            &Inst::Icmp { cc, rd, a, b, ty } => {
+                let a = format_regs(a.regs(), allocs);
+                let b = format_regs(b.regs(), allocs);
+                let rd = format_reg(rd.to_reg(), allocs);
+                format!("{} {},{},{}##ty={}", cc.to_static_str(), rd, a, b, ty)
+            }
+            &Inst::IntSelect {
+                op,
+                ref dst,
+                x,
+                y,
+                ty,
+            } => {
+                let x = format_regs(x.regs(), allocs);
+                let y = format_regs(y.regs(), allocs);
+                let dst: Vec<_> = dst.iter().map(|r| r.to_reg()).collect();
+                let dst = format_regs(&dst[..], allocs);
+                format!("{} {},{},{}##ty={}", op.op_name(), dst, x, y, ty,)
+            }
+            &Inst::BrTable {
+                index,
+                tmp1,
+                tmp2,
+                ref targets,
+            } => {
+                let targets: Vec<_> = targets.iter().map(|x| x.as_label().unwrap()).collect();
+                format!(
+                    "{} {},{}##tmp1={},tmp2={}",
+                    "br_table",
+                    format_reg(index, allocs),
+                    format_labels(&targets[..]),
+                    format_reg(tmp1.to_reg(), allocs),
+                    format_reg(tmp2.to_reg(), allocs),
+                )
+            }
+            &Inst::Auipc { rd, imm } => {
+                format!(
+                    "{} {},{}",
+                    "auipc",
+                    format_reg(rd.to_reg(), allocs),
+                    imm.bits
+                )
+            }
+            &Inst::Jalr { rd, base, offset } => {
+                let base = format_reg(base, allocs);
+                let rd = format_reg(rd.to_reg(), allocs);
+                format!("{} {},{}({})", "jalr", rd, offset.bits, base)
+            }
+            &Inst::Lui { rd, ref imm } => {
+                format!("{} {},{}", "lui", format_reg(rd.to_reg(), allocs), imm.bits)
+            }
+            &Inst::LoadConst32 { rd, imm } => {
+                let rd = format_reg(rd.to_reg(), allocs);
+                let mut buf = String::new();
+                write!(&mut buf, "auipc {},0; ", rd).unwrap();
+                write!(&mut buf, "ld {},12({}); ", rd, rd).unwrap();
+                write!(&mut buf, "j {}; ", Inst::INSTRUCTION_SIZE + 4).unwrap();
+                write!(&mut buf, ".4byte 0x{:x}", imm).unwrap();
+                buf
+            }
+            &Inst::LoadConst64 { rd, imm } => {
+                let rd = format_reg(rd.to_reg(), allocs);
+                let mut buf = String::new();
+                write!(&mut buf, "auipc {},0; ", rd).unwrap();
+                write!(&mut buf, "ld {},12({}); ", rd, rd).unwrap();
+                write!(&mut buf, "j {}; ", Inst::INSTRUCTION_SIZE + 8).unwrap();
+                write!(&mut buf, ".8byte 0x{:x}", imm).unwrap();
+                buf
+            }
+            &Inst::AluRRR {
+                alu_op,
+                rd,
+                rs1,
+                rs2,
+            } => {
+                let rs1_s = format_reg(rs1, allocs);
+                let rs2_s = format_reg(rs2, allocs);
+                let rd_s = format_reg(rd.to_reg(), allocs);
+                match alu_op {
+                    AluOPRRR::Adduw if rs2 == zero_reg() => {
+                        format!("zext.w {},{}", rd_s, rs1_s)
+                    }
+                    _ => {
+                        format!("{} {},{},{}", alu_op.op_name(), rd_s, rs1_s, rs2_s)
+                    }
+                }
+            }
+
+            Inst::AddImm32 { rd, src1, src2 } => {
+                let rd = format_reg(rd.to_reg(), allocs);
+                format!("{src1} + {src2} => {rd};")
+            }
+
+            Inst::MulImm32 { rd, src1, src2 } => {
+                let rd = format_reg(rd.to_reg(), allocs);
+                format!("{src1} * {src2} => {rd};")
+            }
+
+            &Inst::FpuRR {
+                frm,
+                alu_op,
+                rd,
+                rs,
+            } => {
+                let rs = format_reg(rs, allocs);
+                let rd = format_reg(rd.to_reg(), allocs);
+                format!("{} {},{}{}", alu_op.op_name(), rd, rs, format_frm(frm))
+            }
+            &Inst::FpuRRR {
+                alu_op,
+                rd,
+                rs1,
+                rs2,
+                frm,
+            } => {
+                let rs1 = format_reg(rs1, allocs);
+                let rs2 = format_reg(rs2, allocs);
+                let rd = format_reg(rd.to_reg(), allocs);
+                let rs1_is_rs2 = rs1 == rs2;
+                if rs1_is_rs2 && alu_op.is_copy_sign() {
+                    // this is move instruction.
+                    format!(
+                        "fmv.{} {},{}",
+                        if alu_op.is_32() { "s" } else { "d" },
+                        rd,
+                        rs1
+                    )
+                } else if rs1_is_rs2 && alu_op.is_copy_neg_sign() {
+                    format!(
+                        "fneg.{} {},{}",
+                        if alu_op.is_32() { "s" } else { "d" },
+                        rd,
+                        rs1
+                    )
+                } else if rs1_is_rs2 && alu_op.is_copy_xor_sign() {
+                    format!(
+                        "fabs.{} {},{}",
+                        if alu_op.is_32() { "s" } else { "d" },
+                        rd,
+                        rs1
+                    )
+                } else {
+                    format!(
+                        "{} {},{},{}{}",
+                        alu_op.op_name(),
+                        rd,
+                        rs1,
+                        rs2,
+                        format_frm(frm)
+                    )
+                }
+            }
+            &Inst::FpuRRRR {
+                alu_op,
+                rd,
+                rs1,
+                rs2,
+                rs3,
+                frm,
+            } => {
+                let rs1 = format_reg(rs1, allocs);
+                let rs2 = format_reg(rs2, allocs);
+                let rs3 = format_reg(rs3, allocs);
+                let rd = format_reg(rd.to_reg(), allocs);
+                format!(
+                    "{} {},{},{},{}{}",
+                    alu_op.op_name(),
+                    rd,
+                    rs1,
+                    rs2,
+                    rs3,
+                    format_frm(frm)
+                )
+            }
+            &Inst::AluRRImm12 {
+                alu_op,
+                rd,
+                rs,
+                ref imm12,
+            } => {
+                let rs_s = format_reg(rs, allocs);
+                let rd = format_reg(rd.to_reg(), allocs);
+
+                // Some of these special cases are better known as
+                // their pseudo-instruction version, so prefer printing those.
+                match (alu_op, rs, imm12) {
+                    (AluOPRRI::Addi, rs, _) if rs == zero_reg() => {
+                        return format!("li {},{}", rd, imm12.as_i16());
+                    }
+                    (AluOPRRI::Addiw, _, imm12) if imm12.as_i16() == 0 => {
+                        return format!("sext.w {},{}", rd, rs_s);
+                    }
+                    (AluOPRRI::Xori, _, imm12) if imm12.as_i16() == -1 => {
+                        return format!("not {},{}", rd, rs_s);
+                    }
+                    (AluOPRRI::SltiU, _, imm12) if imm12.as_i16() == 1 => {
+                        return format!("seqz {},{}", rd, rs_s);
+                    }
+                    (alu_op, _, _) if alu_op.option_funct12().is_some() => {
+                        format!("{} {},{}", alu_op.op_name(), rd, rs_s)
+                    }
+                    (alu_op, _, imm12) => {
+                        format!("{} {},{},{}", alu_op.op_name(), rd, rs_s, imm12.as_i16())
+                    }
+                }
+            }
+            &Inst::Load {
+                rd,
+                op,
+                from,
+                flags: _flags,
+            } => {
+                let base = from.to_string_with_alloc(allocs);
+                let rd = format_reg(rd.to_reg(), allocs);
+                format!("{} {},{}", op.op_name(), rd, base,)
+            }
+            &Inst::Store {
+                to,
+                src,
+                op,
+                flags: _flags,
+            } => {
+                let base = to.to_string_with_alloc(allocs);
+                let src = format_reg(src, allocs);
+                format!("{} {},{}", op.op_name(), src, base,)
+            }
+            &Inst::Args { ref args } => {
+                let mut s = "args".to_string();
+                let mut empty_allocs = AllocationConsumer::default();
+                for arg in args {
+                    let preg = format_reg(arg.preg, &mut empty_allocs);
+                    let def = format_reg(arg.vreg.to_reg(), allocs);
+                    write!(&mut s, " {}={}", def, preg).unwrap();
+                }
+                s
+            }
+            &Inst::Ret {
+                ref rets,
+                stack_bytes_to_pop,
+            } => {
+                let mut s = if stack_bytes_to_pop == 0 {
+                    "ret".to_string()
+                } else {
+                    format!("add sp, sp, #{stack_bytes_to_pop} ; ret")
+                };
+
+                let mut empty_allocs = AllocationConsumer::default();
+                for ret in rets {
+                    let preg = format_reg(ret.preg, &mut empty_allocs);
+                    let vreg = format_reg(ret.vreg, allocs);
+                    write!(&mut s, " {vreg}={preg}").unwrap();
+                }
+                s
+            }
+
+            &MInst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                ..
+            } => {
+                let rn = format_reg(rn, allocs);
+                let rd = format_reg(rd.to_reg(), allocs);
+                return if signed == false && from_bits == 8 {
+                    format!("andi {rd},{rn}")
+                } else {
+                    let op = if signed { "srai" } else { "srli" };
+                    let shift_bits = (64 - from_bits) as i16;
+                    format!("slli {rd},{rn},{shift_bits}; {op} {rd},{rd},{shift_bits}")
+                };
+            }
+            &MInst::AdjustSp { amount } => {
+                format!("{} sp,{:+}", "add", amount)
+            }
+            &MInst::Call { ref info } => format!("call {}", info.dest.display(None)),
+            &MInst::CallInd { ref info } => {
+                let rd = format_reg(info.rn, allocs);
+                format!("callind {}", rd)
+            }
+            &MInst::ReturnCall {
+                ref callee,
+                ref info,
+            } => {
+                let mut s = format!(
+                    "return_call {callee:?} old_stack_arg_size:{} new_stack_arg_size:{}",
+                    info.old_stack_arg_size, info.new_stack_arg_size
+                );
+                for ret in &info.uses {
+                    let preg = format_reg(ret.preg, &mut empty_allocs);
+                    let vreg = format_reg(ret.vreg, allocs);
+                    write!(&mut s, " {vreg}={preg}").unwrap();
+                }
+                s
+            }
+            &MInst::ReturnCallInd { callee, ref info } => {
+                let callee = format_reg(callee, allocs);
+                let mut s = format!(
+                    "return_call_ind {callee} old_stack_arg_size:{} new_stack_arg_size:{}",
+                    info.old_stack_arg_size, info.new_stack_arg_size
+                );
+                for ret in &info.uses {
+                    let preg = format_reg(ret.preg, &mut empty_allocs);
+                    let vreg = format_reg(ret.vreg, allocs);
+                    write!(&mut s, " {vreg}={preg}").unwrap();
+                }
+                s
+            }
+            &MInst::TrapIf { test, trap_code } => {
+                format!("trap_if {},{}", format_reg(test, allocs), trap_code,)
+            }
+            &MInst::TrapIfC {
+                rs1,
+                rs2,
+                cc,
+                trap_code,
+            } => {
+                let rs1 = format_reg(rs1, allocs);
+                let rs2 = format_reg(rs2, allocs);
+                format!("trap_ifc {}##({} {} {})", trap_code, rs1, cc, rs2)
+            }
+            &MInst::Jal { dest, .. } => {
+                format!("{} {}", "j", dest)
+            }
+            &MInst::CondBr {
+                taken,
+                not_taken,
+                kind,
+                ..
+            } => {
+                let rs1 = format_reg(kind.rs1, allocs);
+                let rs2 = format_reg(kind.rs2, allocs);
+                if not_taken.is_zero() && taken.as_label().is_none() {
+                    let off = taken.as_offset().unwrap();
+                    format!("{} {},{},{}", kind.op_name(), rs1, rs2, off)
+                } else {
+                    let x = format!(
+                        "{} {},{},taken({}),not_taken({})",
+                        kind.op_name(),
+                        rs1,
+                        rs2,
+                        taken,
+                        not_taken
+                    );
+                    x
+                }
+            }
+            &MInst::Atomic {
+                op,
+                rd,
+                addr,
+                src,
+                amo,
+            } => {
+                let op_name = op.op_name(amo);
+                let addr = format_reg(addr, allocs);
+                let src = format_reg(src, allocs);
+                let rd = format_reg(rd.to_reg(), allocs);
+                if op.is_load() {
+                    format!("{} {},({})", op_name, rd, addr)
+                } else {
+                    format!("{} {},{},({})", op_name, rd, src, addr)
+                }
+            }
+            &MInst::LoadExtName {
+                rd,
+                ref name,
+                offset,
+            } => {
+                let rd = format_reg(rd.to_reg(), allocs);
+                format!("load_sym {},{}{:+}", rd, name.display(None), offset)
+            }
+            &MInst::LoadAddr { ref rd, ref mem } => {
+                let rs = mem.to_string_with_alloc(allocs);
+                let rd = format_reg(rd.to_reg(), allocs);
+                format!("load_addr {},{}", rd, rs)
+            }
+            &MInst::VirtualSPOffsetAdj { amount } => {
+                format!("virtual_sp_offset_adj {:+}", amount)
+            }
+            &MInst::Mov { rd, rm, ty } => {
+                let rd = format_reg(rd.to_reg(), allocs);
+                let rm = format_reg(rm, allocs);
+
+                let op = match ty {
+                    F32 => "fmv.s",
+                    F64 => "fmv.d",
+                    ty if ty.is_vector() => "vmv1r.v",
+                    _ => "mv",
+                };
+
+                format!("{op} {rd},{rm}")
+            }
+            &MInst::MovFromPReg { rd, rm } => {
+                let rd = format_reg(rd.to_reg(), allocs);
+                debug_assert!([px_reg(2), px_reg(8)].contains(&rm));
+                let rm = reg_name(Reg::from(rm));
+                format!("mv {},{}", rd, rm)
+            }
+            &MInst::Fence { pred, succ } => {
+                format!(
+                    "fence {},{}",
+                    Inst::fence_req_to_string(pred),
+                    Inst::fence_req_to_string(succ),
+                )
+            }
+            &MInst::FenceI => "fence.i".into(),
+            &MInst::Select {
+                ref dst,
+                condition,
+                ref x,
+                ref y,
+                ty,
+            } => {
+                let condition = format_reg(condition, allocs);
+                let x = format_regs(x.regs(), allocs);
+                let y = format_regs(y.regs(), allocs);
+                let dst: Vec<_> = dst.clone().into_iter().map(|r| r.to_reg()).collect();
+                let dst = format_regs(&dst[..], allocs);
+                format!("select_{} {},{},{}##condition={}", ty, dst, x, y, condition)
+            }
+            &MInst::Udf { trap_code } => format!("udf##trap_code={}", trap_code),
+            &MInst::EBreak {} => String::from("ebreak"),
+            &MInst::ECall {} => String::from("ecall"),
+            &Inst::VecAluRRRR {
+                op,
+                vd,
+                vd_src,
+                vs1,
+                vs2,
+                ref mask,
+                ref vstate,
+            } => {
+                let vs1_s = format_reg(vs1, allocs);
+                let vs2_s = format_reg(vs2, allocs);
+                let vd_src_s = format_reg(vd_src, allocs);
+                let vd_s = format_reg(vd.to_reg(), allocs);
+                let mask = format_mask(mask, allocs);
+
+                let vd_fmt = if vd_s != vd_src_s {
+                    format!("{},{}", vd_s, vd_src_s)
+                } else {
+                    vd_s
+                };
+
+                // Note: vs2 and vs1 here are opposite to the standard scalar ordering.
+                // This is noted in Section 10.1 of the RISC-V Vector spec.
+                format!("{op} {vd_fmt},{vs2_s},{vs1_s}{mask} {vstate}")
+            }
+            &Inst::VecAluRRRImm5 {
+                op,
+                vd,
+                imm,
+                vs2,
+                ref mask,
+                ref vstate,
+                ..
+            } => {
+                let vs2_s = format_reg(vs2, allocs);
+                let vd_s = format_reg(vd.to_reg(), allocs);
+                let mask = format_mask(mask, allocs);
+
+                // Some opcodes interpret the immediate as unsigned, lets show the
+                // correct number here.
+                let imm_s = if op.imm_is_unsigned() {
+                    format!("{}", imm.bits())
+                } else {
+                    format!("{}", imm)
+                };
+
+                format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}")
+            }
+            &Inst::VecAluRRR {
+                op,
+                vd,
+                vs1,
+                vs2,
+                ref mask,
+                ref vstate,
+            } => {
+                let vs1_s = format_reg(vs1, allocs);
+                let vs2_s = format_reg(vs2, allocs);
+                let vd_s = format_reg(vd.to_reg(), allocs);
+                let mask = format_mask(mask, allocs);
+
+                // Note: vs2 and vs1 here are opposite to the standard scalar ordering.
+                // This is noted in Section 10.1 of the RISC-V Vector spec.
+                match (op, vs2, vs1) {
+                    (VecAluOpRRR::VrsubVX, _, vs1) if vs1 == zero_reg() => {
+                        format!("vneg.v {vd_s},{vs2_s}{mask} {vstate}")
+                    }
+                    (VecAluOpRRR::VfsgnjnVV, vs2, vs1) if vs2 == vs1 => {
+                        format!("vfneg.v {vd_s},{vs2_s}{mask} {vstate}")
+                    }
+                    (VecAluOpRRR::VfsgnjxVV, vs2, vs1) if vs2 == vs1 => {
+                        format!("vfabs.v {vd_s},{vs2_s}{mask} {vstate}")
+                    }
+                    (VecAluOpRRR::VmnandMM, vs2, vs1) if vs2 == vs1 => {
+                        format!("vmnot.m {vd_s},{vs2_s}{mask} {vstate}")
+                    }
+                    _ => format!("{op} {vd_s},{vs2_s},{vs1_s}{mask} {vstate}"),
+                }
+            }
+            &Inst::VecAluRRImm5 {
+                op,
+                vd,
+                imm,
+                vs2,
+                ref mask,
+                ref vstate,
+            } => {
+                let vs2_s = format_reg(vs2, allocs);
+                let vd_s = format_reg(vd.to_reg(), allocs);
+                let mask = format_mask(mask, allocs);
+
+                // Some opcodes interpret the immediate as unsigned, lets show the
+                // correct number here.
+                let imm_s = if op.imm_is_unsigned() {
+                    format!("{}", imm.bits())
+                } else {
+                    format!("{}", imm)
+                };
+
+                match (op, imm) {
+                    (VecAluOpRRImm5::VxorVI, imm) if imm == Imm5::maybe_from_i8(-1).unwrap() => {
+                        format!("vnot.v {vd_s},{vs2_s}{mask} {vstate}")
+                    }
+                    _ => format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}"),
+                }
+            }
+            &Inst::VecAluRR {
+                op,
+                vd,
+                vs,
+                ref mask,
+                ref vstate,
+            } => {
+                let vs_s = format_reg(vs, allocs);
+                let vd_s = format_reg(vd.to_reg(), allocs);
+                let mask = format_mask(mask, allocs);
+
+                format!("{op} {vd_s},{vs_s}{mask} {vstate}")
+            }
+            &Inst::VecAluRImm5 {
+                op,
+                vd,
+                imm,
+                ref mask,
+                ref vstate,
+            } => {
+                let vd_s = format_reg(vd.to_reg(), allocs);
+                let mask = format_mask(mask, allocs);
+
+                format!("{op} {vd_s},{imm}{mask} {vstate}")
+            }
+            &Inst::VecSetState { rd, ref vstate } => {
+                let rd_s = format_reg(rd.to_reg(), allocs);
+                assert!(vstate.avl.is_static());
+                format!("vsetivli {}, {}, {}", rd_s, vstate.avl, vstate.vtype)
+            }
+            Inst::VecLoad {
+                eew,
+                to,
+                from,
+                ref mask,
+                ref vstate,
+                ..
+            } => {
+                let base = format_vec_amode(from, allocs);
+                let vd = format_reg(to.to_reg(), allocs);
+                let mask = format_mask(mask, allocs);
+
+                format!("vl{eew}.v {vd},{base}{mask} {vstate}")
+            }
+            Inst::VecStore {
+                eew,
+                to,
+                from,
+                ref mask,
+                ref vstate,
+                ..
+            } => {
+                let dst = format_vec_amode(to, allocs);
+                let vs3 = format_reg(*from, allocs);
+                let mask = format_mask(mask, allocs);
+
+                format!("vs{eew}.v {vs3},{dst}{mask} {vstate}")
+            }
+        }
+    }
+}
+
+/// Different forms of label references for different instruction formats.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum LabelUse {
+    /// 20-bit branch offset (unconditional branches). PC-rel, offset is
+    /// imm << 1. Immediate is 20 signed bits. Use in Jal instructions.
+    Jal20,
+
+    /// The unconditional jump instructions all use PC-relative
+    /// addressing to help support position independent code. The JALR
+    /// instruction was defined to enable a two-instruction sequence to
+    /// jump anywhere in a 32-bit absolute address range. A LUI
+    /// instruction can first load rs1 with the upper 20 bits of a
+    /// target address, then JALR can add in the lower bits. Similarly,
+    /// AUIPC then JALR can jump anywhere in a 32-bit pc-relative
+    /// address range.
+    PCRel32,
+
+    /// All branch instructions use the B-type instruction format. The
+    /// 12-bit B-immediate encodes signed offsets in multiples of 2, and
+    /// is added to the current pc to give the target address. The
+    /// conditional branch range is ±4 KiB.
+    B12,
+
+    /// Equivalent to the `R_RISCV_PCREL_HI20` relocation, Allows setting
+    /// the immediate field of an `auipc` instruction.
+    PCRelHi20,
+
+    /// Similar to the `R_RISCV_PCREL_LO12_I` relocation but pointing to
+    /// the final address, instead of the `PCREL_HI20` label. Allows setting
+    /// the immediate field of I Type instructions such as `addi` or `lw`.
+    ///
+    /// Since we currently don't support offsets in labels, this relocation has
+    /// an implicit offset of 4.
+    PCRelLo12I,
+}
+
+impl MachInstLabelUse for LabelUse {
+    /// Alignment for veneer code. Every Riscv64 instruction must be
+    /// 4-byte-aligned.
+    const ALIGN: CodeOffset = 4;
+
+    /// Maximum PC-relative range (positive), inclusive.
+    fn max_pos_range(self) -> CodeOffset {
+        match self {
+            LabelUse::Jal20 => ((1 << 19) - 1) * 2,
+            LabelUse::PCRelLo12I | LabelUse::PCRelHi20 | LabelUse::PCRel32 => {
+                Inst::imm_max() as CodeOffset
+            }
+            LabelUse::B12 => ((1 << 11) - 1) * 2,
+        }
+    }
+
+    /// Maximum PC-relative range (negative).
+    fn max_neg_range(self) -> CodeOffset {
+        match self {
+            LabelUse::PCRel32 => Inst::imm_min().abs() as CodeOffset,
+            _ => self.max_pos_range() + 2,
+        }
+    }
+
+    /// Size of window into code needed to do the patch.
+    fn patch_size(self) -> CodeOffset {
+        match self {
+            LabelUse::Jal20 | LabelUse::B12 | LabelUse::PCRelHi20 | LabelUse::PCRelLo12I => 4,
+            LabelUse::PCRel32 => 8,
+        }
+    }
+
+    /// Perform the patch.
+    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
+        assert!(use_offset % 4 == 0);
+        assert!(label_offset % 4 == 0);
+        let offset = (label_offset as i64) - (use_offset as i64);
+
+        // re-check range
+        assert!(
+            offset >= -(self.max_neg_range() as i64) && offset <= (self.max_pos_range() as i64),
+            "{:?} offset '{}' use_offset:'{}' label_offset:'{}'  must not exceed max range.",
+            self,
+            offset,
+            use_offset,
+            label_offset,
+        );
+        self.patch_raw_offset(buffer, offset);
+    }
+
+    /// Is a veneer supported for this label reference type?
+    fn supports_veneer(self) -> bool {
+        match self {
+            Self::Jal20 | Self::B12 => true,
+            _ => false,
+        }
+    }
+
+    /// How large is the veneer, if supported?
+    fn veneer_size(self) -> CodeOffset {
+        match self {
+            Self::B12 | Self::Jal20 => 8,
+            _ => unreachable!(),
+        }
+    }
+
+    /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return
+    /// an offset and label-use for the veneer's use of the original label.
+    fn generate_veneer(
+        self,
+        buffer: &mut [u8],
+        veneer_offset: CodeOffset,
+    ) -> (CodeOffset, LabelUse) {
+        let base = writable_spilltmp_reg();
+        {
+            let x = enc_auipc(base, Imm20::from_bits(0)).to_le_bytes();
+            buffer[0] = x[0];
+            buffer[1] = x[1];
+            buffer[2] = x[2];
+            buffer[3] = x[3];
+        }
+        {
+            let x = enc_jalr(writable_zero_reg(), base.to_reg(), Imm12::from_bits(0)).to_le_bytes();
+            buffer[4] = x[0];
+            buffer[5] = x[1];
+            buffer[6] = x[2];
+            buffer[7] = x[3];
+        }
+        (veneer_offset, Self::PCRel32)
+    }
+
+    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<LabelUse> {
+        match (reloc, addend) {
+            (Reloc::RiscvCall, _) => Some(Self::PCRel32),
+            _ => None,
+        }
+    }
+}
+
+impl LabelUse {
+    fn offset_in_range(self, offset: i64) -> bool {
+        let min = -(self.max_neg_range() as i64);
+        let max = self.max_pos_range() as i64;
+        offset >= min && offset <= max
+    }
+
+    fn patch_raw_offset(self, buffer: &mut [u8], offset: i64) {
+        let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
+        match self {
+            LabelUse::Jal20 => {
+                let offset = offset as u32;
+                let v = ((offset >> 12 & 0b1111_1111) << 12)
+                    | ((offset >> 11 & 0b1) << 20)
+                    | ((offset >> 1 & 0b11_1111_1111) << 21)
+                    | ((offset >> 20 & 0b1) << 31);
+                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));
+            }
+            LabelUse::PCRel32 => {
+                let insn2 = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]);
+                Inst::generate_imm(offset as u64, |imm20, imm12| {
+                    let imm20 = imm20.unwrap_or_default();
+                    let imm12 = imm12.unwrap_or_default();
+                    // Encode the OR-ed-in value with zero_reg(). The
+                    // register parameter must be in the original
+                    // encoded instruction and or'ing in zeroes does not
+                    // change it.
+                    buffer[0..4].clone_from_slice(&u32::to_le_bytes(
+                        insn | enc_auipc(writable_zero_reg(), imm20),
+                    ));
+                    buffer[4..8].clone_from_slice(&u32::to_le_bytes(
+                        insn2 | enc_jalr(writable_zero_reg(), zero_reg(), imm12),
+                    ));
+                })
+                // expect make sure we handled.
+                .expect("we have check the range before,this is a compiler error.");
+            }
+
+            LabelUse::B12 => {
+                let offset = offset as u32;
+                let v = ((offset >> 11 & 0b1) << 7)
+                    | ((offset >> 1 & 0b1111) << 8)
+                    | ((offset >> 5 & 0b11_1111) << 25)
+                    | ((offset >> 12 & 0b1) << 31);
+                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));
+            }
+
+            LabelUse::PCRelHi20 => {
+                // See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
+                //
+                // We need to add 0x800 to ensure that we land at the next page as soon as it goes out of range for the
+                // Lo12 relocation. That relocation is signed and has a maximum range of -2048..2047. So when we get an
+                // offset of 2048, we need to land at the next page and subtract instead.
+                let offset = offset as u32;
+                let hi20 = offset.wrapping_add(0x800) >> 12;
+                let insn = (insn & 0xFFF) | (hi20 << 12);
+                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn));
+            }
+
+            LabelUse::PCRelLo12I => {
+                // `offset` is the offset from the current instruction to the target address.
+                //
+                // However we are trying to compute the offset to the target address from the previous instruction.
+                // The previous instruction should be the one that contains the PCRelHi20 relocation and
+                // stores/references the program counter (`auipc` usually).
+                //
+                // Since we are trying to compute the offset from the previous instruction, we can
+                // represent it as offset = target_address - (current_instruction_address - 4)
+                // which is equivalent to offset = target_address - current_instruction_address + 4.
+                //
+                // Thus we need to add 4 to the offset here.
+                let lo12 = (offset + 4) as u32 & 0xFFF;
+                let insn = (insn & 0xFFFFF) | (lo12 << 20);
+                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn));
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    #[test]
+    fn label_use_max_range() {
+        assert!(LabelUse::B12.max_neg_range() == LabelUse::B12.max_pos_range() + 2);
+        assert!(LabelUse::Jal20.max_neg_range() == LabelUse::Jal20.max_pos_range() + 2);
+        assert!(LabelUse::PCRel32.max_pos_range() == (Inst::imm_max() as CodeOffset));
+        assert!(LabelUse::PCRel32.max_neg_range() == (Inst::imm_min().abs() as CodeOffset));
+        assert!(LabelUse::B12.max_pos_range() == ((1 << 11) - 1) * 2);
+    }
+}
diff --git a/cranelift/codegen/src/isa/zkasm/inst/regs.rs b/cranelift/codegen/src/isa/zkasm/inst/regs.rs
new file mode 100644
index 000000000000..7f76b9baa070
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/inst/regs.rs
@@ -0,0 +1,237 @@
+//! Riscv64 ISA definitions: registers.
+//!
+
+use crate::settings;
+
+use crate::machinst::{Reg, Writable};
+
+use crate::machinst::RealReg;
+use alloc::vec;
+use alloc::vec::Vec;
+
+use regalloc2::VReg;
+use regalloc2::{MachineEnv, PReg, RegClass};
+
+#[inline]
+pub fn a0() -> Reg {
+    x_reg(10)
+}
+
+#[inline]
+pub fn b0() -> Reg {
+    x_reg(11)
+}
+
+// second argument of function call
+#[inline]
+pub fn a1() -> Reg {
+    x_reg(5)
+}
+
+// third argument of function call
+#[inline]
+pub fn a2() -> Reg {
+    x_reg(6)
+}
+
+#[inline]
+pub fn writable_a0() -> Writable<Reg> {
+    Writable::from_reg(a0())
+}
+#[inline]
+pub fn writable_a1() -> Writable<Reg> {
+    Writable::from_reg(a1())
+}
+#[inline]
+pub fn writable_a2() -> Writable<Reg> {
+    Writable::from_reg(a2())
+}
+
+#[inline]
+pub fn fa0() -> Reg {
+    f_reg(10)
+}
+#[inline]
+pub fn writable_fa0() -> Writable<Reg> {
+    Writable::from_reg(fa0())
+}
+#[inline]
+pub fn writable_fa1() -> Writable<Reg> {
+    Writable::from_reg(fa1())
+}
+#[inline]
+pub fn fa1() -> Reg {
+    f_reg(11)
+}
+
+#[inline]
+pub fn fa7() -> Reg {
+    f_reg(17)
+}
+
+/// Get a reference to the zero-register.
+#[inline]
+pub fn zero_reg() -> Reg {
+    x_reg(0)
+}
+
+/// Get a writable reference to the zero-register (this discards a result).
+#[inline]
+pub fn writable_zero_reg() -> Writable<Reg> {
+    Writable::from_reg(zero_reg())
+}
+
+#[inline]
+pub fn stack_reg() -> Reg {
+    x_reg(2)
+}
+
+/// Get a writable reference to the stack-pointer register.
+#[inline]
+pub fn writable_stack_reg() -> Writable<Reg> {
+    Writable::from_reg(stack_reg())
+}
+
+/// Get a reference to the link register (x1).
+pub fn link_reg() -> Reg {
+    x_reg(1)
+}
+
+/// Get a writable reference to the link register.
+#[inline]
+pub fn writable_link_reg() -> Writable<Reg> {
+    Writable::from_reg(link_reg())
+}
+
+/// Get a reference to the context register (CTX).
+pub fn context_reg() -> Reg {
+    x_reg(12)
+}
+
+/// Get a reference to the frame pointer (x29).
+#[inline]
+pub fn fp_reg() -> Reg {
+    x_reg(8)
+}
+
+/// Get a writable reference to the frame pointer.
+#[inline]
+pub fn writable_fp_reg() -> Writable<Reg> {
+    Writable::from_reg(fp_reg())
+}
+
+/// Get a reference to the first temporary, sometimes "spill temporary",
+/// register. This register is used in various ways as a temporary.
+#[inline]
+pub fn spilltmp_reg() -> Reg {
+    x_reg(31)
+}
+
+/// Get a writable reference to the spilltmp reg.
+#[inline]
+pub fn writable_spilltmp_reg() -> Writable<Reg> {
+    Writable::from_reg(spilltmp_reg())
+}
+
+///spilltmp2
+#[inline]
+pub fn spilltmp_reg2() -> Reg {
+    x_reg(30)
+}
+
+/// Get a writable reference to the spilltmp2 reg.
+#[inline]
+pub fn writable_spilltmp_reg2() -> Writable<Reg> {
+    Writable::from_reg(spilltmp_reg2())
+}
+
+pub fn crate_reg_eviroment(_flags: &settings::Flags) -> MachineEnv {
+    let preferred_regs_by_class: [Vec<PReg>; 3] = {
+        // Registers are A, B, C, D, E.
+        let x_registers: Vec<PReg> = (5..=7)
+            .chain(10..=12)
+            .map(|i| PReg::new(i, RegClass::Int))
+            .collect();
+
+        let f_registers: Vec<PReg> = Vec::new();
+        // (0..=7)
+        // .chain(10..=17)
+        // .chain(28..=31)
+        // .map(|i| PReg::new(i, RegClass::Float))
+        // .collect();
+
+        let v_registers: Vec<PReg> = Vec::new();
+        // (0..=31).map(|i| PReg::new(i, RegClass::Vector)).collect();
+
+        [x_registers, f_registers, v_registers]
+    };
+
+    let non_preferred_regs_by_class: [Vec<PReg>; 3] = {
+        let x_registers: Vec<PReg> = Vec::new();
+        // (9..=9)
+        // .chain(18..=27)
+        // .map(|i| PReg::new(i, RegClass::Int))
+        // .collect();
+
+        let f_registers: Vec<PReg> = Vec::new();
+        // (8..=9)
+        // .chain(18..=27)
+        // .map(|i| PReg::new(i, RegClass::Float))
+        // .collect();
+
+        let v_registers = vec![];
+
+        [x_registers, f_registers, v_registers]
+    };
+
+    MachineEnv {
+        preferred_regs_by_class,
+        non_preferred_regs_by_class,
+        fixed_stack_slots: vec![],
+        scratch_by_class: [None, None, None],
+    }
+}
+
+#[inline]
+pub fn x_reg(enc: usize) -> Reg {
+    let p_reg = PReg::new(enc, RegClass::Int);
+    let v_reg = VReg::new(p_reg.index(), p_reg.class());
+    Reg::from(v_reg)
+}
+pub const fn px_reg(enc: usize) -> PReg {
+    PReg::new(enc, RegClass::Int)
+}
+
+#[inline]
+pub fn f_reg(enc: usize) -> Reg {
+    let p_reg = PReg::new(enc, RegClass::Float);
+    let v_reg = VReg::new(p_reg.index(), p_reg.class());
+    Reg::from(v_reg)
+}
+pub const fn pf_reg(enc: usize) -> PReg {
+    PReg::new(enc, RegClass::Float)
+}
+#[inline]
+pub(crate) fn real_reg_to_reg(x: RealReg) -> Reg {
+    let v_reg = VReg::new(x.hw_enc() as usize, x.class());
+    Reg::from(v_reg)
+}
+
+#[allow(dead_code)]
+pub(crate) fn x_reg_range(start: usize, end: usize) -> Vec<Writable<Reg>> {
+    let mut regs = vec![];
+    for i in start..=end {
+        regs.push(Writable::from_reg(x_reg(i)));
+    }
+    regs
+}
+
+#[inline]
+pub fn v_reg(enc: usize) -> Reg {
+    let p_reg = PReg::new(enc, RegClass::Vector);
+    let v_reg = VReg::new(p_reg.index(), p_reg.class());
+    Reg::from(v_reg)
+}
+pub const fn pv_reg(enc: usize) -> PReg {
+    PReg::new(enc, RegClass::Vector)
+}
diff --git a/cranelift/codegen/src/isa/zkasm/inst/unwind.rs b/cranelift/codegen/src/isa/zkasm/inst/unwind.rs
new file mode 100644
index 000000000000..1e2bb904db74
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/inst/unwind.rs
@@ -0,0 +1,2 @@
+#[cfg(feature = "unwind")]
+pub(crate) mod systemv;
diff --git a/cranelift/codegen/src/isa/zkasm/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/zkasm/inst/unwind/systemv.rs
new file mode 100644
index 000000000000..d050560ffec0
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/inst/unwind/systemv.rs
@@ -0,0 +1,174 @@
+//! Unwind information for System V ABI (Riscv64).
+
+use crate::isa::unwind::systemv::RegisterMappingError;
+use crate::isa::zkasm::inst::regs;
+use crate::machinst::Reg;
+use gimli::{write::CommonInformationEntry, Encoding, Format, Register};
+use regalloc2::RegClass;
+
+/// Creates a new zkasm common information entry (CIE).
+pub fn create_cie() -> CommonInformationEntry {
+    use gimli::write::CallFrameInstruction;
+
+    let mut entry = CommonInformationEntry::new(
+        Encoding {
+            address_size: 8,
+            format: Format::Dwarf32,
+            version: 1,
+        },
+        4,  // Code alignment factor
+        -8, // Data alignment factor
+        Register(regs::link_reg().to_real_reg().unwrap().hw_enc() as u16),
+    );
+
+    // Every frame will start with the call frame address (CFA) at SP
+    let sp = Register(regs::stack_reg().to_real_reg().unwrap().hw_enc().into());
+    entry.add_instruction(CallFrameInstruction::Cfa(sp, 0));
+
+    entry
+}
+
+/// Map Cranelift registers to their corresponding Gimli registers.
+pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
+    let reg_offset = match reg.class() {
+        RegClass::Int => 0,
+        RegClass::Float => 32,
+        RegClass::Vector => 64,
+    };
+
+    let reg = reg.to_real_reg().unwrap().hw_enc() as u16;
+    Ok(Register(reg_offset + reg))
+}
+
+pub(crate) struct RegisterMapper;
+
+impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
+    fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
+        Ok(map_reg(reg)?.0)
+    }
+    fn sp(&self) -> u16 {
+        regs::stack_reg().to_real_reg().unwrap().hw_enc() as u16
+    }
+    fn fp(&self) -> Option<u16> {
+        Some(regs::fp_reg().to_real_reg().unwrap().hw_enc() as u16)
+    }
+    fn lr(&self) -> Option<u16> {
+        Some(regs::link_reg().to_real_reg().unwrap().hw_enc() as u16)
+    }
+    fn lr_offset(&self) -> Option<u32> {
+        Some(8)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::cursor::{Cursor, FuncCursor};
+
+    use crate::ir::{
+        types, AbiParam, Function, InstBuilder, Signature, StackSlotData, StackSlotKind,
+        UserFuncName,
+    };
+    use crate::isa::{lookup, CallConv};
+    use crate::settings::{builder, Flags};
+    use crate::Context;
+    use gimli::write::Address;
+    use std::str::FromStr;
+    use target_lexicon::triple;
+
+    #[test]
+    fn test_simple_func() {
+        let isa = lookup(triple!("zkasm"))
+            .expect("expect zkasm ISA")
+            .finish(Flags::new(builder()))
+            .expect("Creating compiler backend");
+
+        let mut context = Context::for_function(create_function(
+            CallConv::SystemV,
+            Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
+        ));
+
+        let code = context
+            .compile(&*isa, &mut Default::default())
+            .expect("expected compilation");
+
+        let fde = match code
+            .create_unwind_info(isa.as_ref())
+            .expect("can create unwind info")
+        {
+            Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+                info.to_fde(Address::Constant(1234))
+            }
+            _ => panic!("expected unwind information"),
+        };
+
+        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 40, lsda: None, instructions: [(12, CfaOffset(16)), (12, Offset(Register(8), -16)), (12, Offset(Register(1), -8)), (16, CfaRegister(Register(8)))] }");
+    }
+
+    fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
+        let mut func =
+            Function::with_name_signature(UserFuncName::user(0, 0), Signature::new(call_conv));
+
+        let block0 = func.dfg.make_block();
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_block(block0);
+        pos.ins().return_(&[]);
+
+        if let Some(stack_slot) = stack_slot {
+            func.sized_stack_slots.push(stack_slot);
+        }
+
+        func
+    }
+
+    #[test]
+    fn test_multi_return_func() {
+        let isa = lookup(triple!("zkasm"))
+            .expect("expect zkasm ISA")
+            .finish(Flags::new(builder()))
+            .expect("Creating compiler backend");
+
+        let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
+
+        let code = context
+            .compile(&*isa, &mut Default::default())
+            .expect("expected compilation");
+
+        let fde = match code
+            .create_unwind_info(isa.as_ref())
+            .expect("can create unwind info")
+        {
+            Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+                info.to_fde(Address::Constant(4321))
+            }
+            _ => panic!("expected unwind information"),
+        };
+
+        assert_eq!(
+            format!("{:?}", fde),
+            "FrameDescriptionEntry { address: Constant(4321), length: 20, lsda: None, instructions: [] }"
+        );
+    }
+
+    fn create_multi_return_function(call_conv: CallConv) -> Function {
+        let mut sig = Signature::new(call_conv);
+        sig.params.push(AbiParam::new(types::I32));
+        let mut func = Function::with_name_signature(UserFuncName::user(0, 0), sig);
+
+        let block0 = func.dfg.make_block();
+        let v0 = func.dfg.append_block_param(block0, types::I32);
+        let block1 = func.dfg.make_block();
+        let block2 = func.dfg.make_block();
+
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_block(block0);
+        pos.ins().brif(v0, block2, &[], block1, &[]);
+
+        pos.insert_block(block1);
+        pos.ins().return_(&[]);
+
+        pos.insert_block(block2);
+        pos.ins().return_(&[]);
+
+        func
+    }
+}
diff --git a/cranelift/codegen/src/isa/zkasm/inst/vector.rs b/cranelift/codegen/src/isa/zkasm/inst/vector.rs
new file mode 100644
index 000000000000..afd248379875
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/inst/vector.rs
@@ -0,0 +1,996 @@
+use crate::isa::zkasm::inst::AllocationConsumer;
+use crate::isa::zkasm::inst::EmitState;
+use crate::isa::zkasm::lower::isle::generated_code::VecAluOpRRRR;
+use crate::isa::zkasm::lower::isle::generated_code::{
+    VecAMode, VecAluOpRImm5, VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, VecAluOpRRRImm5, VecAvl,
+    VecElementWidth, VecLmul, VecMaskMode, VecOpCategory, VecOpMasking, VecTailMode,
+};
+use crate::machinst::RegClass;
+use crate::Reg;
+use core::fmt;
+
+use super::{Type, UImm5};
+
+impl VecAvl {
+    pub fn _static(size: u32) -> Self {
+        VecAvl::Static {
+            size: UImm5::maybe_from_u8(size as u8).expect("Invalid size for AVL"),
+        }
+    }
+
+    pub fn is_static(&self) -> bool {
+        match self {
+            VecAvl::Static { .. } => true,
+        }
+    }
+
+    pub fn unwrap_static(&self) -> UImm5 {
+        match self {
+            VecAvl::Static { size } => *size,
+        }
+    }
+}
+
+// TODO: Can we tell ISLE to derive this?
+impl Copy for VecAvl {}
+
+// TODO: Can we tell ISLE to derive this?
+impl PartialEq for VecAvl {
+    fn eq(&self, other: &Self) -> bool {
+        match (self, other) {
+            (VecAvl::Static { size: lhs }, VecAvl::Static { size: rhs }) => lhs == rhs,
+        }
+    }
+}
+
+impl fmt::Display for VecAvl {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            VecAvl::Static { size } => write!(f, "{}", size),
+        }
+    }
+}
+
+impl VecElementWidth {
+    pub fn from_type(ty: Type) -> Self {
+        Self::from_bits(ty.lane_bits())
+    }
+
+    pub fn from_bits(bits: u32) -> Self {
+        match bits {
+            8 => VecElementWidth::E8,
+            16 => VecElementWidth::E16,
+            32 => VecElementWidth::E32,
+            64 => VecElementWidth::E64,
+            _ => panic!("Invalid number of bits for VecElementWidth: {}", bits),
+        }
+    }
+
+    pub fn bits(&self) -> u32 {
+        match self {
+            VecElementWidth::E8 => 8,
+            VecElementWidth::E16 => 16,
+            VecElementWidth::E32 => 32,
+            VecElementWidth::E64 => 64,
+        }
+    }
+
+    pub fn encode(&self) -> u32 {
+        match self {
+            VecElementWidth::E8 => 0b000,
+            VecElementWidth::E16 => 0b001,
+            VecElementWidth::E32 => 0b010,
+            VecElementWidth::E64 => 0b011,
+        }
+    }
+}
+
+impl fmt::Display for VecElementWidth {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "e{}", self.bits())
+    }
+}
+
+impl VecLmul {
+    pub fn encode(&self) -> u32 {
+        match self {
+            VecLmul::LmulF8 => 0b101,
+            VecLmul::LmulF4 => 0b110,
+            VecLmul::LmulF2 => 0b111,
+            VecLmul::Lmul1 => 0b000,
+            VecLmul::Lmul2 => 0b001,
+            VecLmul::Lmul4 => 0b010,
+            VecLmul::Lmul8 => 0b011,
+        }
+    }
+}
+
+impl fmt::Display for VecLmul {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            VecLmul::LmulF8 => write!(f, "mf8"),
+            VecLmul::LmulF4 => write!(f, "mf4"),
+            VecLmul::LmulF2 => write!(f, "mf2"),
+            VecLmul::Lmul1 => write!(f, "m1"),
+            VecLmul::Lmul2 => write!(f, "m2"),
+            VecLmul::Lmul4 => write!(f, "m4"),
+            VecLmul::Lmul8 => write!(f, "m8"),
+        }
+    }
+}
+
+impl VecTailMode {
+    pub fn encode(&self) -> u32 {
+        match self {
+            VecTailMode::Agnostic => 1,
+            VecTailMode::Undisturbed => 0,
+        }
+    }
+}
+
+impl fmt::Display for VecTailMode {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            VecTailMode::Agnostic => write!(f, "ta"),
+            VecTailMode::Undisturbed => write!(f, "tu"),
+        }
+    }
+}
+
+impl VecMaskMode {
+    pub fn encode(&self) -> u32 {
+        match self {
+            VecMaskMode::Agnostic => 1,
+            VecMaskMode::Undisturbed => 0,
+        }
+    }
+}
+
+impl fmt::Display for VecMaskMode {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            VecMaskMode::Agnostic => write!(f, "ma"),
+            VecMaskMode::Undisturbed => write!(f, "mu"),
+        }
+    }
+}
+
+/// Vector Type (VType)
+///
+/// vtype provides the default type used to interpret the contents of the vector register file.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct VType {
+    pub sew: VecElementWidth,
+    pub lmul: VecLmul,
+    pub tail_mode: VecTailMode,
+    pub mask_mode: VecMaskMode,
+}
+
+impl VType {
+    // https://github.com/riscv/riscv-v-spec/blob/master/vtype-format.adoc
+    pub fn encode(&self) -> u32 {
+        let mut bits = 0;
+        bits |= self.lmul.encode();
+        bits |= self.sew.encode() << 3;
+        bits |= self.tail_mode.encode() << 6;
+        bits |= self.mask_mode.encode() << 7;
+        bits
+    }
+}
+
+impl fmt::Display for VType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "{}, {}, {}, {}",
+            self.sew, self.lmul, self.tail_mode, self.mask_mode
+        )
+    }
+}
+
+/// Vector State (VState)
+///
+/// VState represents the state of the vector unit that each instruction expects before execution.
+/// Unlike VType or any of the other types here, VState is not a part of the RISC-V ISA. It is
+/// used by our instruction emission code to ensure that the vector unit is in the correct state.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct VState {
+    pub avl: VecAvl,
+    pub vtype: VType,
+}
+
+impl VState {
+    pub fn from_type(ty: Type) -> Self {
+        VState {
+            avl: VecAvl::_static(ty.lane_count()),
+            vtype: VType {
+                sew: VecElementWidth::from_type(ty),
+                lmul: VecLmul::Lmul1,
+                tail_mode: VecTailMode::Agnostic,
+                mask_mode: VecMaskMode::Agnostic,
+            },
+        }
+    }
+}
+
+impl fmt::Display for VState {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "#avl={}, #vtype=({})", self.avl, self.vtype)
+    }
+}
+
+impl VecOpCategory {
+    pub fn encode(&self) -> u32 {
+        // See: https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#101-vector-arithmetic-instruction-encoding
+        match self {
+            VecOpCategory::OPIVV => 0b000,
+            VecOpCategory::OPFVV => 0b001,
+            VecOpCategory::OPMVV => 0b010,
+            VecOpCategory::OPIVI => 0b011,
+            VecOpCategory::OPIVX => 0b100,
+            VecOpCategory::OPFVF => 0b101,
+            VecOpCategory::OPMVX => 0b110,
+            VecOpCategory::OPCFG => 0b111,
+        }
+    }
+}
+
+impl VecOpMasking {
+    pub fn encode(&self) -> u32 {
+        match self {
+            VecOpMasking::Enabled { .. } => 0,
+            VecOpMasking::Disabled => 1,
+        }
+    }
+
+    pub(crate) fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self {
+        match self {
+            VecOpMasking::Enabled { reg } => VecOpMasking::Enabled {
+                reg: allocs.next(*reg),
+            },
+            VecOpMasking::Disabled => VecOpMasking::Disabled,
+        }
+    }
+}
+
+impl VecAluOpRRRR {
+    pub fn opcode(&self) -> u32 {
+        // Vector Opcode
+        0x57
+    }
+    pub fn funct3(&self) -> u32 {
+        self.category().encode()
+    }
+
+    pub fn funct6(&self) -> u32 {
+        // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
+        match self {
+            VecAluOpRRRR::VmaccVV | VecAluOpRRRR::VmaccVX => 0b101101,
+            VecAluOpRRRR::VnmsacVV | VecAluOpRRRR::VnmsacVX => 0b101111,
+            VecAluOpRRRR::VfmaccVV | VecAluOpRRRR::VfmaccVF => 0b101100,
+            VecAluOpRRRR::VfnmaccVV | VecAluOpRRRR::VfnmaccVF => 0b101101,
+            VecAluOpRRRR::VfmsacVV | VecAluOpRRRR::VfmsacVF => 0b101110,
+            VecAluOpRRRR::VfnmsacVV | VecAluOpRRRR::VfnmsacVF => 0b101111,
+        }
+    }
+
+    pub fn category(&self) -> VecOpCategory {
+        match self {
+            VecAluOpRRRR::VmaccVV | VecAluOpRRRR::VnmsacVV => VecOpCategory::OPMVV,
+            VecAluOpRRRR::VmaccVX | VecAluOpRRRR::VnmsacVX => VecOpCategory::OPMVX,
+            VecAluOpRRRR::VfmaccVV
+            | VecAluOpRRRR::VfnmaccVV
+            | VecAluOpRRRR::VfmsacVV
+            | VecAluOpRRRR::VfnmsacVV => VecOpCategory::OPFVV,
+            VecAluOpRRRR::VfmaccVF
+            | VecAluOpRRRR::VfnmaccVF
+            | VecAluOpRRRR::VfmsacVF
+            | VecAluOpRRRR::VfnmsacVF => VecOpCategory::OPFVF,
+        }
+    }
+
+    // vs1 is the only variable source, vs2 is fixed.
+    pub fn vs1_regclass(&self) -> RegClass {
+        match self.category() {
+            VecOpCategory::OPMVV | VecOpCategory::OPFVV => RegClass::Vector,
+            VecOpCategory::OPMVX => RegClass::Int,
+            VecOpCategory::OPFVF => RegClass::Float,
+            _ => unreachable!(),
+        }
+    }
+}
+
+impl fmt::Display for VecAluOpRRRR {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let mut s = format!("{self:?}");
+        s.make_ascii_lowercase();
+        let (opcode, category) = s.split_at(s.len() - 2);
+        f.write_str(&format!("{opcode}.{category}"))
+    }
+}
+
+impl VecAluOpRRRImm5 {
+    pub fn opcode(&self) -> u32 {
+        // Vector Opcode
+        0x57
+    }
+    pub fn funct3(&self) -> u32 {
+        self.category().encode()
+    }
+
+    pub fn funct6(&self) -> u32 {
+        // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
+        match self {
+            VecAluOpRRRImm5::VslideupVI => 0b001110,
+        }
+    }
+
+    pub fn category(&self) -> VecOpCategory {
+        match self {
+            VecAluOpRRRImm5::VslideupVI => VecOpCategory::OPIVI,
+        }
+    }
+
+    pub fn imm_is_unsigned(&self) -> bool {
+        match self {
+            VecAluOpRRRImm5::VslideupVI => true,
+        }
+    }
+
+    /// Some instructions do not allow the source and destination registers to overlap.
+    pub fn forbids_src_dst_overlaps(&self) -> bool {
+        match self {
+            VecAluOpRRRImm5::VslideupVI => true,
+        }
+    }
+}
+
+impl fmt::Display for VecAluOpRRRImm5 {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let mut s = format!("{self:?}");
+        s.make_ascii_lowercase();
+        let (opcode, category) = s.split_at(s.len() - 2);
+        f.write_str(&format!("{opcode}.{category}"))
+    }
+}
+
+impl VecAluOpRRR {
+    pub fn opcode(&self) -> u32 {
+        // Vector Opcode
+        0x57
+    }
+    pub fn funct3(&self) -> u32 {
+        self.category().encode()
+    }
+    pub fn funct6(&self) -> u32 {
+        // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
+        match self {
+            VecAluOpRRR::VaddVV
+            | VecAluOpRRR::VaddVX
+            | VecAluOpRRR::VfaddVV
+            | VecAluOpRRR::VfaddVF => 0b000000,
+            VecAluOpRRR::VsubVV
+            | VecAluOpRRR::VsubVX
+            | VecAluOpRRR::VfsubVV
+            | VecAluOpRRR::VfsubVF => 0b000010,
+            VecAluOpRRR::VrsubVX => 0b000011,
+            VecAluOpRRR::VmulVV | VecAluOpRRR::VmulVX => 0b100101,
+            VecAluOpRRR::VmulhVV | VecAluOpRRR::VmulhVX => 0b100111,
+            VecAluOpRRR::VmulhuVV
+            | VecAluOpRRR::VmulhuVX
+            | VecAluOpRRR::VfmulVV
+            | VecAluOpRRR::VfmulVF => 0b100100,
+            VecAluOpRRR::VsmulVV | VecAluOpRRR::VsmulVX => 0b100111,
+            VecAluOpRRR::VsllVV | VecAluOpRRR::VsllVX => 0b100101,
+            VecAluOpRRR::VsrlVV | VecAluOpRRR::VsrlVX => 0b101000,
+            VecAluOpRRR::VsraVV | VecAluOpRRR::VsraVX => 0b101001,
+            VecAluOpRRR::VandVV | VecAluOpRRR::VandVX => 0b001001,
+            VecAluOpRRR::VorVV | VecAluOpRRR::VorVX => 0b001010,
+            VecAluOpRRR::VxorVV | VecAluOpRRR::VxorVX => 0b001011,
+            VecAluOpRRR::VminuVV | VecAluOpRRR::VminuVX | VecAluOpRRR::VredminuVS => 0b000100,
+            VecAluOpRRR::VminVV | VecAluOpRRR::VminVX => 0b000101,
+            VecAluOpRRR::VmaxuVV | VecAluOpRRR::VmaxuVX | VecAluOpRRR::VredmaxuVS => 0b000110,
+            VecAluOpRRR::VmaxVV | VecAluOpRRR::VmaxVX => 0b000111,
+            VecAluOpRRR::VslidedownVX => 0b001111,
+            VecAluOpRRR::VfrsubVF => 0b100111,
+            VecAluOpRRR::VmergeVVM
+            | VecAluOpRRR::VmergeVXM
+            | VecAluOpRRR::VfmergeVFM
+            | VecAluOpRRR::VcompressVM => 0b010111,
+            VecAluOpRRR::VfdivVV
+            | VecAluOpRRR::VfdivVF
+            | VecAluOpRRR::VsadduVV
+            | VecAluOpRRR::VsadduVX => 0b100000,
+            VecAluOpRRR::VfrdivVF | VecAluOpRRR::VsaddVV | VecAluOpRRR::VsaddVX => 0b100001,
+            VecAluOpRRR::VfminVV => 0b000100,
+            VecAluOpRRR::VfmaxVV => 0b000110,
+            VecAluOpRRR::VssubuVV | VecAluOpRRR::VssubuVX => 0b100010,
+            VecAluOpRRR::VssubVV | VecAluOpRRR::VssubVX => 0b100011,
+            VecAluOpRRR::VfsgnjVV | VecAluOpRRR::VfsgnjVF => 0b001000,
+            VecAluOpRRR::VfsgnjnVV => 0b001001,
+            VecAluOpRRR::VfsgnjxVV => 0b001010,
+            VecAluOpRRR::VrgatherVV | VecAluOpRRR::VrgatherVX => 0b001100,
+            VecAluOpRRR::VwadduVV | VecAluOpRRR::VwadduVX => 0b110000,
+            VecAluOpRRR::VwaddVV | VecAluOpRRR::VwaddVX => 0b110001,
+            VecAluOpRRR::VwsubuVV | VecAluOpRRR::VwsubuVX => 0b110010,
+            VecAluOpRRR::VwsubVV | VecAluOpRRR::VwsubVX => 0b110011,
+            VecAluOpRRR::VwadduWV | VecAluOpRRR::VwadduWX => 0b110100,
+            VecAluOpRRR::VwaddWV | VecAluOpRRR::VwaddWX => 0b110101,
+            VecAluOpRRR::VwsubuWV | VecAluOpRRR::VwsubuWX => 0b110110,
+            VecAluOpRRR::VwsubWV | VecAluOpRRR::VwsubWX => 0b110111,
+            VecAluOpRRR::VmseqVV
+            | VecAluOpRRR::VmseqVX
+            | VecAluOpRRR::VmfeqVV
+            | VecAluOpRRR::VmfeqVF => 0b011000,
+            VecAluOpRRR::VmsneVV
+            | VecAluOpRRR::VmsneVX
+            | VecAluOpRRR::VmfleVV
+            | VecAluOpRRR::VmfleVF
+            | VecAluOpRRR::VmandMM => 0b011001,
+            VecAluOpRRR::VmsltuVV | VecAluOpRRR::VmsltuVX | VecAluOpRRR::VmorMM => 0b011010,
+            VecAluOpRRR::VmsltVV
+            | VecAluOpRRR::VmsltVX
+            | VecAluOpRRR::VmfltVV
+            | VecAluOpRRR::VmfltVF => 0b011011,
+            VecAluOpRRR::VmsleuVV
+            | VecAluOpRRR::VmsleuVX
+            | VecAluOpRRR::VmfneVV
+            | VecAluOpRRR::VmfneVF => 0b011100,
+            VecAluOpRRR::VmsleVV
+            | VecAluOpRRR::VmsleVX
+            | VecAluOpRRR::VmfgtVF
+            | VecAluOpRRR::VmnandMM => 0b011101,
+            VecAluOpRRR::VmsgtuVX | VecAluOpRRR::VmnorMM => 0b011110,
+            VecAluOpRRR::VmsgtVX | VecAluOpRRR::VmfgeVF => 0b011111,
+        }
+    }
+
+    pub fn category(&self) -> VecOpCategory {
+        match self {
+            VecAluOpRRR::VaddVV
+            | VecAluOpRRR::VsaddVV
+            | VecAluOpRRR::VsadduVV
+            | VecAluOpRRR::VsubVV
+            | VecAluOpRRR::VssubVV
+            | VecAluOpRRR::VssubuVV
+            | VecAluOpRRR::VsmulVV
+            | VecAluOpRRR::VsllVV
+            | VecAluOpRRR::VsrlVV
+            | VecAluOpRRR::VsraVV
+            | VecAluOpRRR::VandVV
+            | VecAluOpRRR::VorVV
+            | VecAluOpRRR::VxorVV
+            | VecAluOpRRR::VminuVV
+            | VecAluOpRRR::VminVV
+            | VecAluOpRRR::VmaxuVV
+            | VecAluOpRRR::VmaxVV
+            | VecAluOpRRR::VmergeVVM
+            | VecAluOpRRR::VrgatherVV
+            | VecAluOpRRR::VmseqVV
+            | VecAluOpRRR::VmsneVV
+            | VecAluOpRRR::VmsltuVV
+            | VecAluOpRRR::VmsltVV
+            | VecAluOpRRR::VmsleuVV
+            | VecAluOpRRR::VmsleVV => VecOpCategory::OPIVV,
+            VecAluOpRRR::VwaddVV
+            | VecAluOpRRR::VwaddWV
+            | VecAluOpRRR::VwadduVV
+            | VecAluOpRRR::VwadduWV
+            | VecAluOpRRR::VwsubVV
+            | VecAluOpRRR::VwsubWV
+            | VecAluOpRRR::VwsubuVV
+            | VecAluOpRRR::VwsubuWV
+            | VecAluOpRRR::VmulVV
+            | VecAluOpRRR::VmulhVV
+            | VecAluOpRRR::VmulhuVV
+            | VecAluOpRRR::VredmaxuVS
+            | VecAluOpRRR::VredminuVS
+            | VecAluOpRRR::VcompressVM
+            | VecAluOpRRR::VmandMM
+            | VecAluOpRRR::VmorMM
+            | VecAluOpRRR::VmnandMM
+            | VecAluOpRRR::VmnorMM => VecOpCategory::OPMVV,
+            VecAluOpRRR::VwaddVX
+            | VecAluOpRRR::VwadduVX
+            | VecAluOpRRR::VwadduWX
+            | VecAluOpRRR::VwaddWX
+            | VecAluOpRRR::VwsubVX
+            | VecAluOpRRR::VwsubuVX
+            | VecAluOpRRR::VwsubuWX
+            | VecAluOpRRR::VwsubWX
+            | VecAluOpRRR::VmulVX
+            | VecAluOpRRR::VmulhVX
+            | VecAluOpRRR::VmulhuVX => VecOpCategory::OPMVX,
+            VecAluOpRRR::VaddVX
+            | VecAluOpRRR::VsaddVX
+            | VecAluOpRRR::VsadduVX
+            | VecAluOpRRR::VsubVX
+            | VecAluOpRRR::VssubVX
+            | VecAluOpRRR::VssubuVX
+            | VecAluOpRRR::VrsubVX
+            | VecAluOpRRR::VsmulVX
+            | VecAluOpRRR::VsllVX
+            | VecAluOpRRR::VsrlVX
+            | VecAluOpRRR::VsraVX
+            | VecAluOpRRR::VandVX
+            | VecAluOpRRR::VorVX
+            | VecAluOpRRR::VxorVX
+            | VecAluOpRRR::VminuVX
+            | VecAluOpRRR::VminVX
+            | VecAluOpRRR::VmaxuVX
+            | VecAluOpRRR::VmaxVX
+            | VecAluOpRRR::VslidedownVX
+            | VecAluOpRRR::VmergeVXM
+            | VecAluOpRRR::VrgatherVX
+            | VecAluOpRRR::VmseqVX
+            | VecAluOpRRR::VmsneVX
+            | VecAluOpRRR::VmsltuVX
+            | VecAluOpRRR::VmsltVX
+            | VecAluOpRRR::VmsleuVX
+            | VecAluOpRRR::VmsleVX
+            | VecAluOpRRR::VmsgtuVX
+            | VecAluOpRRR::VmsgtVX => VecOpCategory::OPIVX,
+            VecAluOpRRR::VfaddVV
+            | VecAluOpRRR::VfsubVV
+            | VecAluOpRRR::VfmulVV
+            | VecAluOpRRR::VfdivVV
+            | VecAluOpRRR::VfmaxVV
+            | VecAluOpRRR::VfminVV
+            | VecAluOpRRR::VfsgnjVV
+            | VecAluOpRRR::VfsgnjnVV
+            | VecAluOpRRR::VfsgnjxVV
+            | VecAluOpRRR::VmfeqVV
+            | VecAluOpRRR::VmfneVV
+            | VecAluOpRRR::VmfltVV
+            | VecAluOpRRR::VmfleVV => VecOpCategory::OPFVV,
+            VecAluOpRRR::VfaddVF
+            | VecAluOpRRR::VfsubVF
+            | VecAluOpRRR::VfrsubVF
+            | VecAluOpRRR::VfmulVF
+            | VecAluOpRRR::VfdivVF
+            | VecAluOpRRR::VfrdivVF
+            | VecAluOpRRR::VfmergeVFM
+            | VecAluOpRRR::VfsgnjVF
+            | VecAluOpRRR::VmfeqVF
+            | VecAluOpRRR::VmfneVF
+            | VecAluOpRRR::VmfltVF
+            | VecAluOpRRR::VmfleVF
+            | VecAluOpRRR::VmfgtVF
+            | VecAluOpRRR::VmfgeVF => VecOpCategory::OPFVF,
+        }
+    }
+
+    // vs1 is the only variable source, vs2 is fixed.
+    pub fn vs1_regclass(&self) -> RegClass {
+        match self.category() {
+            VecOpCategory::OPIVV | VecOpCategory::OPFVV | VecOpCategory::OPMVV => RegClass::Vector,
+            VecOpCategory::OPIVX | VecOpCategory::OPMVX => RegClass::Int,
+            VecOpCategory::OPFVF => RegClass::Float,
+            _ => unreachable!(),
+        }
+    }
+
+    /// Some instructions do not allow the source and destination registers to overlap.
+    pub fn forbids_src_dst_overlaps(&self) -> bool {
+        match self {
+            VecAluOpRRR::VrgatherVV
+            | VecAluOpRRR::VrgatherVX
+            | VecAluOpRRR::VcompressVM
+            | VecAluOpRRR::VwadduVV
+            | VecAluOpRRR::VwadduVX
+            | VecAluOpRRR::VwaddVV
+            | VecAluOpRRR::VwaddVX
+            | VecAluOpRRR::VwadduWV
+            | VecAluOpRRR::VwadduWX
+            | VecAluOpRRR::VwaddWV
+            | VecAluOpRRR::VwaddWX
+            | VecAluOpRRR::VwsubuVV
+            | VecAluOpRRR::VwsubuVX
+            | VecAluOpRRR::VwsubVV
+            | VecAluOpRRR::VwsubVX
+            | VecAluOpRRR::VwsubuWV
+            | VecAluOpRRR::VwsubuWX
+            | VecAluOpRRR::VwsubWV
+            | VecAluOpRRR::VwsubWX => true,
+            _ => false,
+        }
+    }
+}
+
+impl fmt::Display for VecAluOpRRR {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let suffix_length = match self {
+            VecAluOpRRR::VmergeVVM | VecAluOpRRR::VmergeVXM | VecAluOpRRR::VfmergeVFM => 3,
+            _ => 2,
+        };
+
+        let mut s = format!("{self:?}");
+        s.make_ascii_lowercase();
+        let (opcode, category) = s.split_at(s.len() - suffix_length);
+        f.write_str(&format!("{opcode}.{category}"))
+    }
+}
+
+impl VecAluOpRRImm5 {
+    pub fn opcode(&self) -> u32 {
+        // Vector Opcode
+        0x57
+    }
+    pub fn funct3(&self) -> u32 {
+        self.category().encode()
+    }
+
+    pub fn funct6(&self) -> u32 {
+        // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
+        match self {
+            VecAluOpRRImm5::VaddVI => 0b000000,
+            VecAluOpRRImm5::VrsubVI => 0b000011,
+            VecAluOpRRImm5::VsllVI => 0b100101,
+            VecAluOpRRImm5::VsrlVI => 0b101000,
+            VecAluOpRRImm5::VsraVI => 0b101001,
+            VecAluOpRRImm5::VandVI => 0b001001,
+            VecAluOpRRImm5::VorVI => 0b001010,
+            VecAluOpRRImm5::VxorVI => 0b001011,
+            VecAluOpRRImm5::VslidedownVI => 0b001111,
+            VecAluOpRRImm5::VssrlVI => 0b101010,
+            VecAluOpRRImm5::VmergeVIM => 0b010111,
+            VecAluOpRRImm5::VsadduVI => 0b100000,
+            VecAluOpRRImm5::VsaddVI => 0b100001,
+            VecAluOpRRImm5::VrgatherVI => 0b001100,
+            VecAluOpRRImm5::VmvrV => 0b100111,
+            VecAluOpRRImm5::VnclipWI => 0b101111,
+            VecAluOpRRImm5::VnclipuWI => 0b101110,
+            VecAluOpRRImm5::VmseqVI => 0b011000,
+            VecAluOpRRImm5::VmsneVI => 0b011001,
+            VecAluOpRRImm5::VmsleuVI => 0b011100,
+            VecAluOpRRImm5::VmsleVI => 0b011101,
+            VecAluOpRRImm5::VmsgtuVI => 0b011110,
+            VecAluOpRRImm5::VmsgtVI => 0b011111,
+        }
+    }
+
+    pub fn category(&self) -> VecOpCategory {
+        match self {
+            VecAluOpRRImm5::VaddVI
+            | VecAluOpRRImm5::VrsubVI
+            | VecAluOpRRImm5::VsllVI
+            | VecAluOpRRImm5::VsrlVI
+            | VecAluOpRRImm5::VsraVI
+            | VecAluOpRRImm5::VandVI
+            | VecAluOpRRImm5::VorVI
+            | VecAluOpRRImm5::VxorVI
+            | VecAluOpRRImm5::VssrlVI
+            | VecAluOpRRImm5::VslidedownVI
+            | VecAluOpRRImm5::VmergeVIM
+            | VecAluOpRRImm5::VsadduVI
+            | VecAluOpRRImm5::VsaddVI
+            | VecAluOpRRImm5::VrgatherVI
+            | VecAluOpRRImm5::VmvrV
+            | VecAluOpRRImm5::VnclipWI
+            | VecAluOpRRImm5::VnclipuWI
+            | VecAluOpRRImm5::VmseqVI
+            | VecAluOpRRImm5::VmsneVI
+            | VecAluOpRRImm5::VmsleuVI
+            | VecAluOpRRImm5::VmsleVI
+            | VecAluOpRRImm5::VmsgtuVI
+            | VecAluOpRRImm5::VmsgtVI => VecOpCategory::OPIVI,
+        }
+    }
+
+    pub fn imm_is_unsigned(&self) -> bool {
+        match self {
+            VecAluOpRRImm5::VsllVI
+            | VecAluOpRRImm5::VsrlVI
+            | VecAluOpRRImm5::VssrlVI
+            | VecAluOpRRImm5::VsraVI
+            | VecAluOpRRImm5::VslidedownVI
+            | VecAluOpRRImm5::VrgatherVI
+            | VecAluOpRRImm5::VmvrV
+            | VecAluOpRRImm5::VnclipWI
+            | VecAluOpRRImm5::VnclipuWI => true,
+            VecAluOpRRImm5::VaddVI
+            | VecAluOpRRImm5::VrsubVI
+            | VecAluOpRRImm5::VandVI
+            | VecAluOpRRImm5::VorVI
+            | VecAluOpRRImm5::VxorVI
+            | VecAluOpRRImm5::VmergeVIM
+            | VecAluOpRRImm5::VsadduVI
+            | VecAluOpRRImm5::VsaddVI
+            | VecAluOpRRImm5::VmseqVI
+            | VecAluOpRRImm5::VmsneVI
+            | VecAluOpRRImm5::VmsleuVI
+            | VecAluOpRRImm5::VmsleVI
+            | VecAluOpRRImm5::VmsgtuVI
+            | VecAluOpRRImm5::VmsgtVI => false,
+        }
+    }
+
+    /// Some instructions do not allow the source and destination registers to overlap.
+    pub fn forbids_src_dst_overlaps(&self) -> bool {
+        match self {
+            VecAluOpRRImm5::VrgatherVI => true,
+            _ => false,
+        }
+    }
+}
+
+impl fmt::Display for VecAluOpRRImm5 {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let suffix_length = match self {
+            VecAluOpRRImm5::VmergeVIM => 3,
+            _ => 2,
+        };
+
+        let mut s = format!("{self:?}");
+        s.make_ascii_lowercase();
+        let (opcode, category) = s.split_at(s.len() - suffix_length);
+        f.write_str(&format!("{opcode}.{category}"))
+    }
+}
+
+impl VecAluOpRR {
+    pub fn opcode(&self) -> u32 {
+        // Vector Opcode
+        0x57
+    }
+
+    pub fn funct3(&self) -> u32 {
+        self.category().encode()
+    }
+
+    pub fn funct6(&self) -> u32 {
+        // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
+        match self {
+            VecAluOpRR::VmvSX | VecAluOpRR::VmvXS | VecAluOpRR::VfmvSF | VecAluOpRR::VfmvFS => {
+                0b010000
+            }
+            VecAluOpRR::VzextVF2
+            | VecAluOpRR::VzextVF4
+            | VecAluOpRR::VzextVF8
+            | VecAluOpRR::VsextVF2
+            | VecAluOpRR::VsextVF4
+            | VecAluOpRR::VsextVF8 => 0b010010,
+            VecAluOpRR::VfsqrtV => 0b010011,
+            VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0b010111,
+        }
+    }
+
+    pub fn category(&self) -> VecOpCategory {
+        match self {
+            VecAluOpRR::VmvSX => VecOpCategory::OPMVX,
+            VecAluOpRR::VmvXS
+            | VecAluOpRR::VzextVF2
+            | VecAluOpRR::VzextVF4
+            | VecAluOpRR::VzextVF8
+            | VecAluOpRR::VsextVF2
+            | VecAluOpRR::VsextVF4
+            | VecAluOpRR::VsextVF8 => VecOpCategory::OPMVV,
+            VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => VecOpCategory::OPFVF,
+            VecAluOpRR::VfmvFS | VecAluOpRR::VfsqrtV => VecOpCategory::OPFVV,
+            VecAluOpRR::VmvVV => VecOpCategory::OPIVV,
+            VecAluOpRR::VmvVX => VecOpCategory::OPIVX,
+        }
+    }
+
+    /// Returns the auxiliary encoding field for the instruction, if any.
+    pub fn aux_encoding(&self) -> u32 {
+        match self {
+            // VRXUNARY0
+            VecAluOpRR::VmvSX => 0b00000,
+            // VWXUNARY0
+            VecAluOpRR::VmvXS => 0b00000,
+            // VRFUNARY0
+            VecAluOpRR::VfmvSF => 0b00000,
+            // VWFUNARY0
+            VecAluOpRR::VfmvFS => 0b00000,
+            // VFUNARY1
+            VecAluOpRR::VfsqrtV => 0b00000,
+            // VXUNARY0
+            VecAluOpRR::VzextVF8 => 0b00010,
+            VecAluOpRR::VsextVF8 => 0b00011,
+            VecAluOpRR::VzextVF4 => 0b00100,
+            VecAluOpRR::VsextVF4 => 0b00101,
+            VecAluOpRR::VzextVF2 => 0b00110,
+            VecAluOpRR::VsextVF2 => 0b00111,
+            // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states:
+            // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved.
+            VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0,
+        }
+    }
+
+    /// Most of these opcodes have the source register encoded in the VS2 field and
+    /// the `aux_encoding` field in VS1. However some special snowflakes have it the
+    /// other way around. As far as I can tell only vmv.v.* are backwards.
+    pub fn vs_is_vs2_encoded(&self) -> bool {
+        match self {
+            VecAluOpRR::VmvXS
+            | VecAluOpRR::VfmvFS
+            | VecAluOpRR::VfsqrtV
+            | VecAluOpRR::VzextVF2
+            | VecAluOpRR::VzextVF4
+            | VecAluOpRR::VzextVF8
+            | VecAluOpRR::VsextVF2
+            | VecAluOpRR::VsextVF4
+            | VecAluOpRR::VsextVF8 => true,
+            VecAluOpRR::VmvSX
+            | VecAluOpRR::VfmvSF
+            | VecAluOpRR::VmvVV
+            | VecAluOpRR::VmvVX
+            | VecAluOpRR::VfmvVF => false,
+        }
+    }
+
+    pub fn dst_regclass(&self) -> RegClass {
+        match self {
+            VecAluOpRR::VfmvSF
+            | VecAluOpRR::VmvSX
+            | VecAluOpRR::VmvVV
+            | VecAluOpRR::VmvVX
+            | VecAluOpRR::VfmvVF
+            | VecAluOpRR::VfsqrtV
+            | VecAluOpRR::VzextVF2
+            | VecAluOpRR::VzextVF4
+            | VecAluOpRR::VzextVF8
+            | VecAluOpRR::VsextVF2
+            | VecAluOpRR::VsextVF4
+            | VecAluOpRR::VsextVF8 => RegClass::Vector,
+            VecAluOpRR::VmvXS => RegClass::Int,
+            VecAluOpRR::VfmvFS => RegClass::Float,
+        }
+    }
+
+    pub fn src_regclass(&self) -> RegClass {
+        match self {
+            VecAluOpRR::VmvXS
+            | VecAluOpRR::VfmvFS
+            | VecAluOpRR::VmvVV
+            | VecAluOpRR::VfsqrtV
+            | VecAluOpRR::VzextVF2
+            | VecAluOpRR::VzextVF4
+            | VecAluOpRR::VzextVF8
+            | VecAluOpRR::VsextVF2
+            | VecAluOpRR::VsextVF4
+            | VecAluOpRR::VsextVF8 => RegClass::Vector,
+            VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => RegClass::Float,
+            VecAluOpRR::VmvSX | VecAluOpRR::VmvVX => RegClass::Int,
+        }
+    }
+
+    /// Some instructions do not allow the source and destination registers to overlap.
+    pub fn forbids_src_dst_overlaps(&self) -> bool {
+        match self {
+            VecAluOpRR::VzextVF2
+            | VecAluOpRR::VzextVF4
+            | VecAluOpRR::VzextVF8
+            | VecAluOpRR::VsextVF2
+            | VecAluOpRR::VsextVF4
+            | VecAluOpRR::VsextVF8 => true,
+            _ => false,
+        }
+    }
+}
+
+impl fmt::Display for VecAluOpRR {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(match self {
+            VecAluOpRR::VmvSX => "vmv.s.x",
+            VecAluOpRR::VmvXS => "vmv.x.s",
+            VecAluOpRR::VfmvSF => "vfmv.s.f",
+            VecAluOpRR::VfmvFS => "vfmv.f.s",
+            VecAluOpRR::VfsqrtV => "vfsqrt.v",
+            VecAluOpRR::VzextVF2 => "vzext.vf2",
+            VecAluOpRR::VzextVF4 => "vzext.vf4",
+            VecAluOpRR::VzextVF8 => "vzext.vf8",
+            VecAluOpRR::VsextVF2 => "vsext.vf2",
+            VecAluOpRR::VsextVF4 => "vsext.vf4",
+            VecAluOpRR::VsextVF8 => "vsext.vf8",
+            VecAluOpRR::VmvVV => "vmv.v.v",
+            VecAluOpRR::VmvVX => "vmv.v.x",
+            VecAluOpRR::VfmvVF => "vfmv.v.f",
+        })
+    }
+}
+
+impl VecAluOpRImm5 {
+    pub fn opcode(&self) -> u32 {
+        // Vector Opcode
+        0x57
+    }
+    pub fn funct3(&self) -> u32 {
+        self.category().encode()
+    }
+
+    pub fn funct6(&self) -> u32 {
+        // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
+        match self {
+            VecAluOpRImm5::VmvVI => 0b010111,
+        }
+    }
+
+    pub fn category(&self) -> VecOpCategory {
+        match self {
+            VecAluOpRImm5::VmvVI => VecOpCategory::OPIVI,
+        }
+    }
+
+    /// Returns the auxiliary encoding field for the instruction, if any.
+    pub fn aux_encoding(&self) -> u32 {
+        match self {
+            // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states:
+            // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved.
+            VecAluOpRImm5::VmvVI => 0,
+        }
+    }
+}
+
+impl fmt::Display for VecAluOpRImm5 {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(match self {
+            VecAluOpRImm5::VmvVI => "vmv.v.i",
+        })
+    }
+}
+
+impl VecAMode {
+    pub fn get_base_register(&self) -> Option<Reg> {
+        match self {
+            VecAMode::UnitStride { base, .. } => base.get_base_register(),
+        }
+    }
+
+    pub fn get_allocatable_register(&self) -> Option<Reg> {
+        match self {
+            VecAMode::UnitStride { base, .. } => base.get_allocatable_register(),
+        }
+    }
+
+    pub(crate) fn with_allocs(self, allocs: &mut AllocationConsumer<'_>) -> Self {
+        match self {
+            VecAMode::UnitStride { base } => VecAMode::UnitStride {
+                base: base.with_allocs(allocs),
+            },
+        }
+    }
+
+    pub(crate) fn get_offset_with_state(&self, state: &EmitState) -> i64 {
+        match self {
+            VecAMode::UnitStride { base, .. } => base.get_offset_with_state(state),
+        }
+    }
+
+    /// `mop` field, described in Table 7 of Section 7.2. Vector Load/Store Addressing Modes
+    /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes
+    pub fn mop(&self) -> u32 {
+        match self {
+            VecAMode::UnitStride { .. } => 0b00,
+        }
+    }
+
+    /// `lumop` field, described in Table 9 of Section 7.2. Vector Load/Store Addressing Modes
+    /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes
+    pub fn lumop(&self) -> u32 {
+        match self {
+            VecAMode::UnitStride { .. } => 0b00000,
+        }
+    }
+
+    /// `sumop` field, described in Table 10 of Section 7.2. Vector Load/Store Addressing Modes
+    /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes
+    pub fn sumop(&self) -> u32 {
+        match self {
+            VecAMode::UnitStride { .. } => 0b00000,
+        }
+    }
+
+    /// The `nf[2:0]` field encodes the number of fields in each segment. For regular vector loads and
+    /// stores, nf=0, indicating that a single value is moved between a vector register group and memory
+    /// at each element position. Larger values in the nf field are used to access multiple contiguous
+    /// fields within a segment as described in Section 7.8 Vector Load/Store Segment Instructions.
+    ///
+    /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes
+    pub fn nf(&self) -> u32 {
+        match self {
+            VecAMode::UnitStride { .. } => 0b000,
+        }
+    }
+}
diff --git a/cranelift/codegen/src/isa/zkasm/inst_vector.isle b/cranelift/codegen/src/isa/zkasm/inst_vector.isle
new file mode 100644
index 000000000000..cadf4911f989
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/inst_vector.isle
@@ -0,0 +1,1773 @@
+;; Represents the possible widths of an element when used in an operation.
+(type VecElementWidth (enum
+  (E8)
+  (E16)
+  (E32)
+  (E64)
+))
+
+;; Vector Register Group Multiplier (LMUL)
+;;
+;; The LMUL setting specifies how we should group registers together. LMUL can
+;; also be a fractional value, reducing the number of bits used in a single
+;; vector register. Fractional LMUL is used to increase the number of effective
+;; usable vector register groups when operating on mixed-width values.
+(type VecLmul (enum
+  (LmulF8)
+  (LmulF4)
+  (LmulF2)
+  (Lmul1)
+  (Lmul2)
+  (Lmul4)
+  (Lmul8)
+))
+
+;; Tail Mode
+;;
+;; The tail mode specifies how the tail elements of a vector register are handled.
+(type VecTailMode (enum
+  ;; Tail Agnostic means that the tail elements are left in an undefined state.
+  (Agnostic)
+  ;; Tail Undisturbed means that the tail elements are left in their original values.
+  (Undisturbed)
+))
+
+;; Mask Mode
+;;
+;; The mask mode specifies how the masked elements of a vector register are handled.
+(type VecMaskMode (enum
+  ;; Mask Agnostic means that the masked out elements are left in an undefined state.
+  (Agnostic)
+  ;; Mask Undisturbed means that the masked out elements are left in their original values.
+  (Undisturbed)
+))
+
+;; Application Vector Length (AVL)
+;;
+;; This setting specifies the number of elements that are going to be processed
+;; in a single instruction. Note: We may end up processing fewer elements than
+;; the AVL setting, if they don't fit in a single register.
+(type VecAvl (enum
+  ;; Static AVL emits a `vsetivli` that uses a constant value
+  (Static (size UImm5))
+  ;; TODO: Add a dynamic, register based AVL mode when we are able to properly test it
+))
+
+(type VType (primitive VType))
+(type VState (primitive VState))
+
+
+;; Vector Opcode Category
+;;
+;; These categories are used to determine the type of operands that are allowed in the
+;; instruction.
+(type VecOpCategory (enum
+  (OPIVV)
+  (OPFVV)
+  (OPMVV)
+  (OPIVI)
+  (OPIVX)
+  (OPFVF)
+  (OPMVX)
+  (OPCFG)
+))
+
+;; Vector Opcode Masking
+;;
+;; When masked, the instruction will only operate on the elements that are dictated by
+;; the mask register. Currently this is always fixed to v0.
+(type VecOpMasking (enum
+  (Enabled (reg Reg))
+  (Disabled)
+))
+
+(decl pure masked (VReg) VecOpMasking)
+(rule (masked reg) (VecOpMasking.Enabled reg))
+
+(decl pure unmasked () VecOpMasking)
+(rule (unmasked) (VecOpMasking.Disabled))
+
+;; Register to Register ALU Ops
+(type VecAluOpRRR (enum
+  ;; Vector-Vector Opcodes
+  (VaddVV)
+  (VsaddVV)
+  (VsadduVV)
+  (VwaddVV)
+  (VwaddWV)
+  (VwadduVV)
+  (VwadduWV)
+  (VsubVV)
+  (VwsubVV)
+  (VwsubWV)
+  (VwsubuVV)
+  (VwsubuWV)
+  (VssubVV)
+  (VssubuVV)
+  (VmulVV)
+  (VmulhVV)
+  (VmulhuVV)
+  (VsmulVV)
+  (VsllVV)
+  (VsrlVV)
+  (VsraVV)
+  (VandVV)
+  (VorVV)
+  (VxorVV)
+  (VmaxVV)
+  (VmaxuVV)
+  (VminVV)
+  (VminuVV)
+  (VfaddVV)
+  (VfsubVV)
+  (VfmulVV)
+  (VfdivVV)
+  (VfminVV)
+  (VfmaxVV)
+  (VfsgnjVV)
+  (VfsgnjnVV)
+  (VfsgnjxVV)
+  (VmergeVVM)
+  (VredmaxuVS)
+  (VredminuVS)
+  (VrgatherVV)
+  (VcompressVM)
+  (VmseqVV)
+  (VmsneVV)
+  (VmsltuVV)
+  (VmsltVV)
+  (VmsleuVV)
+  (VmsleVV)
+  (VmfeqVV)
+  (VmfneVV)
+  (VmfltVV)
+  (VmfleVV)
+  (VmandMM)
+  (VmorMM)
+  (VmnandMM)
+  (VmnorMM)
+
+
+  ;; Vector-Scalar Opcodes
+  (VaddVX)
+  (VsaddVX)
+  (VsadduVX)
+  (VwaddVX)
+  (VwaddWX)
+  (VwadduVX)
+  (VwadduWX)
+  (VsubVX)
+  (VrsubVX)
+  (VwsubVX)
+  (VwsubWX)
+  (VwsubuVX)
+  (VwsubuWX)
+  (VssubVX)
+  (VssubuVX)
+  (VmulVX)
+  (VmulhVX)
+  (VmulhuVX)
+  (VsmulVX)
+  (VsllVX)
+  (VsrlVX)
+  (VsraVX)
+  (VandVX)
+  (VorVX)
+  (VxorVX)
+  (VmaxVX)
+  (VmaxuVX)
+  (VminVX)
+  (VminuVX)
+  (VslidedownVX)
+  (VfaddVF)
+  (VfsubVF)
+  (VfrsubVF)
+  (VfmulVF)
+  (VfdivVF)
+  (VfsgnjVF)
+  (VfrdivVF)
+  (VmergeVXM)
+  (VfmergeVFM)
+  (VrgatherVX)
+  (VmseqVX)
+  (VmsneVX)
+  (VmsltuVX)
+  (VmsltVX)
+  (VmsleuVX)
+  (VmsleVX)
+  (VmsgtuVX)
+  (VmsgtVX)
+  (VmfeqVF)
+  (VmfneVF)
+  (VmfltVF)
+  (VmfleVF)
+  (VmfgtVF)
+  (VmfgeVF)
+))
+
+
+
+;; Register-Imm ALU Ops that modify the destination register
+(type VecAluOpRRRImm5 (enum
+  (VslideupVI)
+))
+
+;; Register-Register ALU Ops that modify the destination register
+(type VecAluOpRRRR (enum
+  ;; Vector-Vector Opcodes
+  (VmaccVV)
+  (VnmsacVV)
+  (VfmaccVV)
+  (VfnmaccVV)
+  (VfmsacVV)
+  (VfnmsacVV)
+
+  ;; Vector-Scalar Opcodes
+  (VmaccVX)
+  (VnmsacVX)
+  (VfmaccVF)
+  (VfnmaccVF)
+  (VfmsacVF)
+  (VfnmsacVF)
+))
+
+;; Register-Imm ALU Ops
+(type VecAluOpRRImm5 (enum
+  ;; Regular VI Opcodes
+  (VaddVI)
+  (VsaddVI)
+  (VsadduVI)
+  (VrsubVI)
+  (VsllVI)
+  (VsrlVI)
+  (VsraVI)
+  (VandVI)
+  (VorVI)
+  (VxorVI)
+  (VssrlVI)
+  (VslidedownVI)
+  (VmergeVIM)
+  (VrgatherVI)
+  ;; This opcode represents multiple instructions `vmv1r`/`vmv2r`/`vmv4r`/etc...
+  ;; The immediate field specifies how many registers should be copied.
+  (VmvrV)
+  (VnclipWI)
+  (VnclipuWI)
+  (VmseqVI)
+  (VmsneVI)
+  (VmsleuVI)
+  (VmsleVI)
+  (VmsgtuVI)
+  (VmsgtVI)
+))
+
+;; Imm only ALU Ops
+(type VecAluOpRImm5 (enum
+  (VmvVI)
+))
+
+;; These are all of the special cases that have weird encodings. They are all
+;; single source, single destination instructions, and usually use one of
+;; the two source registers as auxiliary encoding space.
+(type VecAluOpRR (enum
+  (VmvSX)
+  (VmvXS)
+  (VfmvSF)
+  (VfmvFS)
+  ;; vmv.v* is special in that vs2 must be v0 (and is ignored) otherwise the instruction is illegal.
+  (VmvVV)
+  (VmvVX)
+  (VfmvVF)
+  (VfsqrtV)
+  (VsextVF2)
+  (VsextVF4)
+  (VsextVF8)
+  (VzextVF2)
+  (VzextVF4)
+  (VzextVF8)
+))
+
+;; Returns the canonical destination type for a VecAluOpRRImm5.
+(decl pure vec_alu_rr_dst_type (VecAluOpRR) Type)
+(extern constructor vec_alu_rr_dst_type vec_alu_rr_dst_type)
+
+
+;; Vector Addressing Mode
+(type VecAMode (enum
+  ;; Vector unit-stride operations access elements stored contiguously in memory
+  ;; starting from the base effective address.
+  (UnitStride
+    (base AMode))
+  ;; TODO: Constant Stride
+  ;; TODO: Indexed Operations
+))
+
+
+;; Builds a static VState matching a SIMD type.
+;; The VState is guaranteed to be static with AVL set to the number of lanes.
+;; Element size is set to the size of the type.
+;; LMUL is set to 1.
+;; Tail mode is set to agnostic.
+;; Mask mode is set to agnostic.
+(decl pure vstate_from_type (Type) VState)
+(extern constructor vstate_from_type vstate_from_type)
+(convert Type VState vstate_from_type)
+
+;; Alters the LMUL of a VState to mf2
+(decl pure vstate_mf2 (VState) VState)
+(extern constructor vstate_mf2 vstate_mf2)
+
+;; Extracts an element width from a SIMD type.
+(decl pure element_width_from_type (Type) VecElementWidth)
+(rule (element_width_from_type ty)
+  (if-let $I8 (lane_type ty))
+  (VecElementWidth.E8))
+(rule (element_width_from_type ty)
+  (if-let $I16 (lane_type ty))
+  (VecElementWidth.E16))
+(rule (element_width_from_type ty)
+  (if-let $I32 (lane_type ty))
+  (VecElementWidth.E32))
+(rule (element_width_from_type ty)
+  (if-let $F32 (lane_type ty))
+  (VecElementWidth.E32))
+(rule (element_width_from_type ty)
+  (if-let $I64 (lane_type ty))
+  (VecElementWidth.E64))
+(rule (element_width_from_type ty)
+  (if-let $F64 (lane_type ty))
+  (VecElementWidth.E64))
+
+(decl pure min_vec_reg_size () u64)
+(extern constructor min_vec_reg_size min_vec_reg_size)
+
+;; An extractor that matches any type that is known to fit in a single vector
+;; register.
+(decl ty_vec_fits_in_register (Type) Type)
+(extern extractor ty_vec_fits_in_register ty_vec_fits_in_register)
+
+;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; As noted in the RISC-V Vector Extension Specification, rs2 is the first
+;; source register and rs1 is the second source register. This is the opposite
+;; of the usual RISC-V register order.
+;; See Section 10.1 of the RISC-V Vector Extension Specification.
+
+
+;; Helper for emitting `MInst.VecAluRRRR` instructions.
+;; These instructions modify the destination register.
+(decl vec_alu_rrrr (VecAluOpRRRR VReg VReg Reg VecOpMasking  VState) VReg)
+(rule (vec_alu_rrrr op vd_src vs2 vs1 mask vstate)
+      (let ((vd WritableVReg (temp_writable_vreg))
+            (_ Unit (emit (MInst.VecAluRRRR op vd vd_src vs2 vs1 mask vstate))))
+        vd))
+
+;; Helper for emitting `MInst.VecAluRRRImm5` instructions.
+;; These instructions modify the destination register.
+(decl vec_alu_rrr_imm5 (VecAluOpRRRImm5 VReg VReg Imm5 VecOpMasking  VState) VReg)
+(rule (vec_alu_rrr_imm5 op vd_src vs2 imm mask vstate)
+      (let ((vd WritableVReg (temp_writable_vreg))
+            (_ Unit (emit (MInst.VecAluRRRImm5 op vd vd_src vs2 imm mask vstate))))
+        vd))
+
+;; Helper for emitting `MInst.VecAluRRRImm5` instructions where the immediate
+;; is zero extended instead of sign extended.
+(decl vec_alu_rrr_uimm5 (VecAluOpRRRImm5 VReg VReg UImm5 VecOpMasking VState) VReg)
+(rule (vec_alu_rrr_uimm5 op vd_src vs2 imm mask vstate)
+      (vec_alu_rrr_imm5 op vd_src vs2 (uimm5_bitcast_to_imm5 imm) mask vstate))
+
+;; Helper for emitting `MInst.VecAluRRR` instructions.
+(decl vec_alu_rrr (VecAluOpRRR Reg Reg VecOpMasking VState) Reg)
+(rule (vec_alu_rrr op vs2 vs1 mask vstate)
+      (let ((vd WritableVReg (temp_writable_vreg))
+            (_ Unit (emit (MInst.VecAluRRR op vd vs2 vs1 mask vstate))))
+        vd))
+
+;; Helper for emitting `MInst.VecAluRRImm5` instructions.
+(decl vec_alu_rr_imm5 (VecAluOpRRImm5 Reg Imm5 VecOpMasking  VState) Reg)
+(rule (vec_alu_rr_imm5 op vs2 imm mask vstate)
+      (let ((vd WritableVReg (temp_writable_vreg))
+            (_ Unit (emit (MInst.VecAluRRImm5 op vd vs2 imm mask vstate))))
+        vd))
+
+;; Helper for emitting `MInst.VecAluRRImm5` instructions where the immediate
+;; is zero extended instead of sign extended.
+(decl vec_alu_rr_uimm5 (VecAluOpRRImm5 Reg UImm5 VecOpMasking VState) Reg)
+(rule (vec_alu_rr_uimm5 op vs2 imm mask vstate)
+      (vec_alu_rr_imm5 op vs2 (uimm5_bitcast_to_imm5 imm) mask vstate))
+
+;; Helper for emitting `MInst.VecAluRRImm5` instructions that use the Imm5 as
+;; auxiliary encoding space.
+(decl vec_alu_rr (VecAluOpRR Reg VecOpMasking VState) Reg)
+(rule (vec_alu_rr op vs mask vstate)
+      (let ((vd WritableReg (temp_writable_reg (vec_alu_rr_dst_type op)))
+            (_ Unit (emit (MInst.VecAluRR op vd vs mask vstate))))
+        vd))
+
+;; Helper for emitting `MInst.VecAluRImm5` instructions.
+(decl vec_alu_r_imm5 (VecAluOpRImm5 Imm5 VecOpMasking VState) Reg)
+(rule (vec_alu_r_imm5 op imm mask vstate)
+      (let ((vd WritableVReg (temp_writable_vreg))
+            (_ Unit (emit (MInst.VecAluRImm5 op vd imm mask vstate))))
+        vd))
+
+;; Helper for emitting `MInst.VecLoad` instructions.
+(decl vec_load (VecElementWidth VecAMode MemFlags VecOpMasking VState) Reg)
+(rule (vec_load eew from flags mask vstate)
+      (let ((vd WritableVReg (temp_writable_vreg))
+            (_ Unit (emit (MInst.VecLoad eew vd from flags mask vstate))))
+        vd))
+
+;; Helper for emitting `MInst.VecStore` instructions.
+(decl vec_store (VecElementWidth VecAMode VReg MemFlags VecOpMasking VState) InstOutput)
+(rule (vec_store eew to from flags mask vstate)
+      (side_effect
+        (SideEffectNoResult.Inst (MInst.VecStore eew to from flags mask vstate))))
+
+;; Helper for emitting the `vadd.vv` instruction.
+(decl rv_vadd_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vadd_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VaddVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vadd.vx` instruction.
+(decl rv_vadd_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vadd_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VaddVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vadd.vi` instruction.
+(decl rv_vadd_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vadd_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VaddVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vsadd.vv` instruction.
+(decl rv_vsadd_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vsadd_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsaddVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsadd.vx` instruction.
+(decl rv_vsadd_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vsadd_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsaddVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsadd.vi` instruction.
+(decl rv_vsadd_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vsadd_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VsaddVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vsaddu.vv` instruction.
+(decl rv_vsaddu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vsaddu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsadduVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsaddu.vx` instruction.
+(decl rv_vsaddu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vsaddu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsadduVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsaddu.vi` instruction.
+(decl rv_vsaddu_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vsaddu_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VsadduVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vwadd.vv` instruction.
+;;
+;;  Widening integer add, 2*SEW = SEW + SEW
+(decl rv_vwadd_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vwadd_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwaddVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwadd.vx` instruction.
+;;
+;;  Widening integer add, 2*SEW = SEW + SEW
+(decl rv_vwadd_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vwadd_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwaddVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwadd.wv` instruction.
+;;
+;;  Widening integer add, 2*SEW = 2*SEW + SEW
+(decl rv_vwadd_wv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vwadd_wv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwaddWV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwadd.wx` instruction.
+;;
+;;  Widening integer add, 2*SEW = 2*SEW + SEW
+(decl rv_vwadd_wx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vwadd_wx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwaddWX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwaddu.vv` instruction.
+;;
+;; Widening unsigned integer add, 2*SEW = SEW + SEW
+(decl rv_vwaddu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vwaddu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwadduVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwaddu.vv` instruction.
+;;
+;; Widening unsigned integer add, 2*SEW = SEW + SEW
+(decl rv_vwaddu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vwaddu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwadduVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwaddu.wv` instruction.
+;;
+;;  Widening integer add, 2*SEW = 2*SEW + SEW
+(decl rv_vwaddu_wv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vwaddu_wv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwadduWV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwaddu.wx` instruction.
+;;
+;;  Widening integer add, 2*SEW = 2*SEW + SEW
+(decl rv_vwaddu_wx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vwaddu_wx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwadduWX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsub.vv` instruction.
+(decl rv_vsub_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vsub_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsubVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsub.vx` instruction.
+(decl rv_vsub_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vsub_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsubVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vrsub.vx` instruction.
+(decl rv_vrsub_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vrsub_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VrsubVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwsub.vv` instruction.
+;;
+;;  Widening integer sub, 2*SEW = SEW + SEW
+(decl rv_vwsub_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vwsub_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwsubVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwsub.vx` instruction.
+;;
+;;  Widening integer sub, 2*SEW = SEW + SEW
+(decl rv_vwsub_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vwsub_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwsubVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwsub.wv` instruction.
+;;
+;;  Widening integer sub, 2*SEW = 2*SEW + SEW
+(decl rv_vwsub_wv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vwsub_wv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwsubWV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwsub.wx` instruction.
+;;
+;;  Widening integer sub, 2*SEW = 2*SEW + SEW
+(decl rv_vwsub_wx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vwsub_wx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwsubWX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwsubu.vv` instruction.
+;;
+;; Widening unsigned integer sub, 2*SEW = SEW + SEW
+(decl rv_vwsubu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vwsubu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwsubuVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwsubu.vv` instruction.
+;;
+;; Widening unsigned integer sub, 2*SEW = SEW + SEW
+(decl rv_vwsubu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vwsubu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwsubuVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwsubu.wv` instruction.
+;;
+;;  Widening integer sub, 2*SEW = 2*SEW + SEW
+(decl rv_vwsubu_wv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vwsubu_wv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwsubuWV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwsubu.wx` instruction.
+;;
+;;  Widening integer sub, 2*SEW = 2*SEW + SEW
+(decl rv_vwsubu_wx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vwsubu_wx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwsubuWX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vssub.vv` instruction.
+(decl rv_vssub_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vssub_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VssubVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vssub.vx` instruction.
+(decl rv_vssub_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vssub_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VssubVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vssubu.vv` instruction.
+(decl rv_vssubu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vssubu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VssubuVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vssubu.vx` instruction.
+(decl rv_vssubu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vssubu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VssubuVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vneg.v` pseudo-instruction.
+(decl rv_vneg_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vneg_v vs2 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VrsubVX) vs2 (zero_reg) mask vstate))
+
+;; Helper for emitting the `vrsub.vi` instruction.
+(decl rv_vrsub_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vrsub_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VrsubVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vmul.vv` instruction.
+(decl rv_vmul_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmul_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmulVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmul.vx` instruction.
+(decl rv_vmul_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmul_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmulVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmulh.vv` instruction.
+(decl rv_vmulh_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmulh_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmulhVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmulh.vx` instruction.
+(decl rv_vmulh_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmulh_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmulhVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmulhu.vv` instruction.
+(decl rv_vmulhu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmulhu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmulhuVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmulhu.vx` instruction.
+(decl rv_vmulhu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmulhu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmulhuVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsmul.vv` instruction.
+;;
+;; Signed saturating and rounding fractional multiply
+;; # vd[i] = clip(roundoff_signed(vs2[i]*vs1[i], SEW-1))
+(decl rv_vsmul_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vsmul_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsmulVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsmul.vx` instruction.
+;;
+;; Signed saturating and rounding fractional multiply
+;; # vd[i] = clip(roundoff_signed(vs2[i]*x[rs1], SEW-1))
+(decl rv_vsmul_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vsmul_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsmulVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmacc.vv` instruction.
+;;
+;; Integer multiply-add, overwrite addend
+;; # vd[i] = +(vs1[i] * vs2[i]) + vd[i]
+(decl rv_vmacc_vv (VReg VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmacc_vv vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VmaccVV) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmacc.vx` instruction.
+;;
+;; Integer multiply-add, overwrite addend
+;; # vd[i] = +(x[rs1] * vs2[i]) + vd[i]
+(decl rv_vmacc_vx (VReg VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmacc_vx vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VmaccVX) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vnmsac.vv` instruction.
+;;
+;; Integer multiply-sub, overwrite minuend
+;; # vd[i] = -(vs1[i] * vs2[i]) + vd[i]
+(decl rv_vnmsac_vv (VReg VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vnmsac_vv vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VnmsacVV) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vnmsac.vx` instruction.
+;;
+;; Integer multiply-sub, overwrite minuend
+;; # vd[i] = -(x[rs1] * vs2[i]) + vd[i]
+(decl rv_vnmsac_vx (VReg VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vnmsac_vx vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VnmsacVX) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `sll.vv` instruction.
+(decl rv_vsll_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vsll_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsllVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `sll.vx` instruction.
+(decl rv_vsll_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vsll_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsllVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsll.vi` instruction.
+(decl rv_vsll_vi (VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vsll_vi vs2 imm mask vstate)
+  (vec_alu_rr_uimm5 (VecAluOpRRImm5.VsllVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `srl.vv` instruction.
+(decl rv_vsrl_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vsrl_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsrlVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `srl.vx` instruction.
+(decl rv_vsrl_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vsrl_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsrlVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsrl.vi` instruction.
+(decl rv_vsrl_vi (VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vsrl_vi vs2 imm mask vstate)
+  (vec_alu_rr_uimm5 (VecAluOpRRImm5.VsrlVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `sra.vv` instruction.
+(decl rv_vsra_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vsra_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsraVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `sra.vx` instruction.
+(decl rv_vsra_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vsra_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsraVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsra.vi` instruction.
+(decl rv_vsra_vi (VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vsra_vi vs2 imm mask vstate)
+  (vec_alu_rr_uimm5 (VecAluOpRRImm5.VsraVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vand.vv` instruction.
+(decl rv_vand_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vand_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VandVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vand.vx` instruction.
+(decl rv_vand_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vand_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VandVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vand.vi` instruction.
+(decl rv_vand_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vand_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VandVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vor.vv` instruction.
+(decl rv_vor_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vor_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VorVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vor.vx` instruction.
+(decl rv_vor_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vor_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VorVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vor.vi` instruction.
+(decl rv_vor_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vor_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VorVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vxor.vv` instruction.
+(decl rv_vxor_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vxor_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VxorVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vxor.vx` instruction.
+(decl rv_vxor_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vxor_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VxorVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vxor.vi` instruction.
+(decl rv_vxor_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vxor_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VxorVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vssrl.vi` instruction.
+;;
+;; vd[i] = (unsigned(vs2[i]) >> imm) + r
+;;
+;; `r` here is the rounding mode currently selected.
+(decl rv_vssrl_vi (VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vssrl_vi vs2 imm mask vstate)
+  (vec_alu_rr_uimm5 (VecAluOpRRImm5.VssrlVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vnot.v` instruction.
+;; This is just a mnemonic for `vxor.vi vd, vs, -1`
+(decl rv_vnot_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vnot_v vs2 mask vstate)
+  (if-let neg1 (imm5_from_i8 -1))
+  (rv_vxor_vi vs2 neg1 mask vstate))
+
+;; Helper for emitting the `vmax.vv` instruction.
+(decl rv_vmax_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmax_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmaxVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmax.vx` instruction.
+(decl rv_vmax_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmax_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmaxVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmin.vv` instruction.
+(decl rv_vmin_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmin_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VminVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmin.vx` instruction.
+(decl rv_vmin_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmin_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VminVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmaxu.vv` instruction.
+(decl rv_vmaxu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmaxu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmaxuVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmaxu.vx` instruction.
+(decl rv_vmaxu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmaxu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmaxuVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vminu.vv` instruction.
+(decl rv_vminu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vminu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VminuVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vminu.vx` instruction.
+(decl rv_vminu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vminu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VminuVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfadd.vv` instruction.
+(decl rv_vfadd_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfadd_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfaddVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfadd.vf` instruction.
+(decl rv_vfadd_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfadd_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfaddVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfsub.vv` instruction.
+(decl rv_vfsub_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfsub_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsubVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfsub.vf` instruction.
+(decl rv_vfsub_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfsub_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsubVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfrsub.vf` instruction.
+(decl rv_vfrsub_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfrsub_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfrsubVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfmul.vv` instruction.
+(decl rv_vfmul_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfmul_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfmulVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfmul.vf` instruction.
+(decl rv_vfmul_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfmul_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfmulVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfmacc.vv` instruction.
+;;
+;; FP multiply-accumulate, overwrites addend
+;; # vd[i] = +(vs1[i] * vs2[i]) + vd[i]
+(decl rv_vfmacc_vv (VReg VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfmacc_vv vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VfmaccVV) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfmacc.vf` instruction.
+;;
+;; FP multiply-accumulate, overwrites addend
+;; # vd[i] = +(f[rs1] * vs2[i]) + vd[i]
+(decl rv_vfmacc_vf (VReg VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfmacc_vf vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VfmaccVF) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfnmacc.vv` instruction.
+;;
+;; FP negate-(multiply-accumulate), overwrites subtrahend
+;; # vd[i] = -(vs1[i] * vs2[i]) - vd[i]
+(decl rv_vfnmacc_vv (VReg VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfnmacc_vv vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VfnmaccVV) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfnmacc.vf` instruction.
+;;
+;; FP negate-(multiply-accumulate), overwrites subtrahend
+;; # vd[i] = -(f[rs1] * vs2[i]) - vd[i]
+(decl rv_vfnmacc_vf (VReg VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfnmacc_vf vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VfnmaccVF) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfmsac.vv` instruction.
+;;
+;; FP multiply-subtract-accumulator, overwrites subtrahend
+;; # vd[i] = +(vs1[i] * vs2[i]) - vd[i]
+(decl rv_vfmsac_vv (VReg VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfmsac_vv vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VfmsacVV) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfmsac.vf` instruction.
+;;
+;; FP multiply-subtract-accumulator, overwrites subtrahend
+;; # vd[i] = +(f[rs1] * vs2[i]) - vd[i]
+(decl rv_vfmsac_vf (VReg VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfmsac_vf vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VfmsacVF) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfnmsac.vv` instruction.
+;;
+;; FP negate-(multiply-subtract-accumulator), overwrites minuend
+;; # vd[i] = -(vs1[i] * vs2[i]) + vd[i]
+(decl rv_vfnmsac_vv (VReg VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfnmsac_vv vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VfnmsacVV) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfnmsac.vf` instruction.
+;;
+;; FP negate-(multiply-subtract-accumulator), overwrites minuend
+;; # vd[i] = -(f[rs1] * vs2[i]) + vd[i]
+(decl rv_vfnmsac_vf (VReg VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfnmsac_vf vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VfnmsacVF) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfdiv.vv` instruction.
+(decl rv_vfdiv_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfdiv_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfdivVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfdiv.vf` instruction.
+(decl rv_vfdiv_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfdiv_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfdivVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfrdiv.vf` instruction.
+(decl rv_vfrdiv_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfrdiv_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfrdivVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfmin.vv` instruction.
+(decl rv_vfmin_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfmin_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfminVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfmax.vv` instruction.
+(decl rv_vfmax_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfmax_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfmaxVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfsgnj.vv` ("Floating Point Sign Injection") instruction.
+;; The output of this instruction is `vs2` with the sign bit from `vs1`
+(decl rv_vfsgnj_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfsgnj_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsgnjVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfsgnj.vf` ("Floating Point Sign Injection") instruction.
+(decl rv_vfsgnj_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfsgnj_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsgnjVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfsgnjn.vv` ("Floating Point Sign Injection Negated") instruction.
+;; The output of this instruction is `vs2` with the negated sign bit from `vs1`
+(decl rv_vfsgnjn_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfsgnjn_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsgnjnVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfneg.v` instruction.
+;; This instruction is a mnemonic for `vfsgnjn.vv vd, vs, vs`
+(decl rv_vfneg_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfneg_v vs mask vstate) (rv_vfsgnjn_vv vs vs mask vstate))
+
+;; Helper for emitting the `vfsgnjx.vv` ("Floating Point Sign Injection Exclusive") instruction.
+;; The output of this instruction is `vs2` with the XOR of the sign bits from `vs2` and `vs1`.
+;; When `vs2 == vs1` this implements `fabs`
+(decl rv_vfsgnjx_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfsgnjx_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsgnjxVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfabs.v` instruction.
+;; This instruction is a mnemonic for `vfsgnjx.vv vd, vs, vs`
+(decl rv_vfabs_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfabs_v vs mask vstate) (rv_vfsgnjx_vv vs vs mask vstate))
+
+;; Helper for emitting the `vfsqrt.v` instruction.
+;; This instruction splats the F regsiter into all elements of the destination vector.
+(decl rv_vfsqrt_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfsqrt_v vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfsqrtV) vs mask vstate))
+
+;; Helper for emitting the `vslidedown.vx` instruction.
+;; `vslidedown` moves all elements in the vector down by n elements.
+;; The top most elements are up to the tail policy.
+(decl rv_vslidedown_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vslidedown_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VslidedownVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vslidedown.vi` instruction.
+;; Unlike other `vi` instructions the immediate is zero extended.
+(decl rv_vslidedown_vi (VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vslidedown_vi vs2 imm mask vstate)
+  (vec_alu_rr_uimm5 (VecAluOpRRImm5.VslidedownVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vslideup.vi` instruction.
+;; Unlike other `vi` instructions the immediate is zero extended.
+;; This is implemented as a 2 source operand instruction, since it only
+;; partially modifies the destination register.
+(decl rv_vslideup_vvi (VReg VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vslideup_vvi vd vs2 imm mask vstate)
+  (vec_alu_rrr_uimm5 (VecAluOpRRRImm5.VslideupVI) vd vs2 imm mask vstate))
+
+;; Helper for emitting the `vmv.x.s` instruction.
+;; This instruction copies the first element of the source vector to the destination X register.
+;; Masked versions of this instuction are not supported.
+(decl rv_vmv_xs (VReg VState) XReg)
+(rule (rv_vmv_xs vs vstate)
+  (vec_alu_rr (VecAluOpRR.VmvXS) vs (unmasked) vstate))
+
+;; Helper for emitting the `vfmv.f.s` instruction.
+;; This instruction copies the first element of the source vector to the destination F register.
+;; Masked versions of this instuction are not supported.
+(decl rv_vfmv_fs (VReg VState) FReg)
+(rule (rv_vfmv_fs vs vstate)
+  (vec_alu_rr (VecAluOpRR.VfmvFS) vs (unmasked) vstate))
+
+;; Helper for emitting the `vmv.s.x` instruction.
+;; This instruction copies the source X register into first element of the source vector.
+;; Masked versions of this instuction are not supported.
+(decl rv_vmv_sx (XReg VState) VReg)
+(rule (rv_vmv_sx vs vstate)
+  (vec_alu_rr (VecAluOpRR.VmvSX) vs (unmasked) vstate))
+
+;; Helper for emitting the `vfmv.s.f` instruction.
+;; This instruction copies the source F register into first element of the source vector.
+;; Masked versions of this instuction are not supported.
+(decl rv_vfmv_sf (FReg VState) VReg)
+(rule (rv_vfmv_sf vs vstate)
+  (vec_alu_rr (VecAluOpRR.VfmvSF) vs (unmasked) vstate))
+
+;; Helper for emitting the `vmv.v.x` instruction.
+;; This instruction splats the X regsiter into all elements of the destination vector.
+;; Masked versions of this instruction are called `vmerge`
+(decl rv_vmv_vx (XReg VState) VReg)
+(rule (rv_vmv_vx vs vstate)
+  (vec_alu_rr (VecAluOpRR.VmvVX) vs (unmasked) vstate))
+
+;; Helper for emitting the `vfmv.v.f` instruction.
+;; This instruction splats the F regsiter into all elements of the destination vector.
+;; Masked versions of this instruction are called `vmerge`
+(decl rv_vfmv_vf (FReg VState) VReg)
+(rule (rv_vfmv_vf vs vstate)
+  (vec_alu_rr (VecAluOpRR.VfmvVF) vs (unmasked) vstate))
+
+;; Helper for emitting the `vmv.v.i` instruction.
+;; This instruction splat's the immediate value into all elements of the destination vector.
+;; Masked versions of this instruction are called `vmerge`
+(decl rv_vmv_vi (Imm5 VState) VReg)
+(rule (rv_vmv_vi imm vstate)
+  (vec_alu_r_imm5 (VecAluOpRImm5.VmvVI) imm (unmasked) vstate))
+
+;; Helper for emitting the `vmerge.vvm` instruction.
+;; This instruction merges the elements of the two source vectors into the destination vector
+;; based on a mask. Elements are taken from the first source vector if the mask bit is clear,
+;; and from the second source vector if the mask bit is set. This instruction is always masked.
+;;
+;; vd[i] = v0.mask[i] ? vs1[i] : vs2[i]
+(decl rv_vmerge_vvm (VReg VReg VReg VState) VReg)
+(rule (rv_vmerge_vvm vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmergeVVM) vs2 vs1 (masked mask) vstate))
+
+;; Helper for emitting the `vmerge.vxm` instruction.
+;; Elements are taken from the first source vector if the mask bit is clear, and from the X
+;; register if the mask bit is set. This instruction is always masked.
+;;
+;; vd[i] = v0.mask[i] ? x[rs1] : vs2[i]
+(decl rv_vmerge_vxm (VReg XReg VReg VState) VReg)
+(rule (rv_vmerge_vxm vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmergeVXM) vs2 vs1 (masked mask) vstate))
+
+;; Helper for emitting the `vfmerge.vfm` instruction.
+;; Elements are taken from the first source vector if the mask bit is clear, and from the F
+;; register if the mask bit is set. This instruction is always masked.
+;;
+;; vd[i] = v0.mask[i] ? f[rs1] : vs2[i]
+(decl rv_vfmerge_vfm (VReg FReg VReg VState) VReg)
+(rule (rv_vfmerge_vfm vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfmergeVFM) vs2 vs1 (masked mask) vstate))
+
+;; Helper for emitting the `vmerge.vim` instruction.
+;; Elements are taken from the first source vector if the mask bit is clear, and from the
+;; immediate value if the mask bit is set. This instruction is always masked.
+;;
+;; vd[i] = v0.mask[i] ? imm : vs2[i]
+(decl rv_vmerge_vim (VReg Imm5 VReg VState) VReg)
+(rule (rv_vmerge_vim vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VmergeVIM) vs2 imm (masked mask) vstate))
+
+
+;; Helper for emitting the `vredminu.vs` instruction.
+;;
+;; vd[0] = minu( vs1[0] , vs2[*] )
+(decl rv_vredminu_vs (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vredminu_vs vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VredminuVS) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vredmaxu.vs` instruction.
+;;
+;; vd[0] = maxu( vs1[0] , vs2[*] )
+(decl rv_vredmaxu_vs (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vredmaxu_vs vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VredmaxuVS) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vrgather.vv` instruction.
+;;
+;; vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]];
+(decl rv_vrgather_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vrgather_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VrgatherVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vrgather.vx` instruction.
+;;
+;; vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[x[rs1]]
+(decl rv_vrgather_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vrgather_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VrgatherVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vrgather.vi` instruction.
+(decl rv_vrgather_vi (VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vrgather_vi vs2 imm mask vstate)
+  (vec_alu_rr_uimm5 (VecAluOpRRImm5.VrgatherVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vcompress.vm` instruction.
+;;
+;; The vector compress instruction allows elements selected by a vector mask
+;; register from a source vector register group to be packed into contiguous
+;; elements at the start of the destination vector register group.
+;;
+;; The mask register is specified through vs1
+(decl rv_vcompress_vm (VReg VReg VState) VReg)
+(rule (rv_vcompress_vm vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VcompressVM) vs2 vs1 (unmasked) vstate))
+
+;; Helper for emitting the `vmseq.vv` (Vector Mask Set If Equal) instruction.
+(decl rv_vmseq_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmseq_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmseqVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmseq.vx` (Vector Mask Set If Equal) instruction.
+(decl rv_vmseq_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmseq_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmseqVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmseq.vi` (Vector Mask Set If Equal) instruction.
+(decl rv_vmseq_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vmseq_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VmseqVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vmsne.vv` (Vector Mask Set If Not Equal) instruction.
+(decl rv_vmsne_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmsne_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsneVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsne.vx` (Vector Mask Set If Not Equal) instruction.
+(decl rv_vmsne_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmsne_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsneVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsne.vi` (Vector Mask Set If Not Equal) instruction.
+(decl rv_vmsne_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vmsne_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsneVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vmsltu.vv` (Vector Mask Set If Less Than, Unsigned) instruction.
+(decl rv_vmsltu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmsltu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsltuVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsltu.vx` (Vector Mask Set If Less Than, Unsigned) instruction.
+(decl rv_vmsltu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmsltu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsltuVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmslt.vv` (Vector Mask Set If Less Than) instruction.
+(decl rv_vmslt_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmslt_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsltVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmslt.vx` (Vector Mask Set If Less Than) instruction.
+(decl rv_vmslt_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmslt_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsltVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsleu.vv` (Vector Mask Set If Less Than or Equal, Unsigned) instruction.
+(decl rv_vmsleu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmsleu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsleuVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsleu.vx` (Vector Mask Set If Less Than or Equal, Unsigned) instruction.
+(decl rv_vmsleu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmsleu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsleuVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsleu.vi` (Vector Mask Set If Less Than or Equal, Unsigned) instruction.
+(decl rv_vmsleu_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vmsleu_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsleuVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vmsle.vv` (Vector Mask Set If Less Than or Equal) instruction.
+(decl rv_vmsle_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmsle_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsleVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsle.vx` (Vector Mask Set If Less Than or Equal) instruction.
+(decl rv_vmsle_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmsle_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsleVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsle.vi` (Vector Mask Set If Less Than or Equal) instruction.
+(decl rv_vmsle_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vmsle_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsleVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vmsgt.vv` (Vector Mask Set If Greater Than, Unsigned) instruction.
+;; This is an alias for `vmsltu.vv` with the operands inverted.
+(decl rv_vmsgtu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmsgtu_vv vs2 vs1 mask vstate) (rv_vmsltu_vv vs1 vs2 mask vstate))
+
+;; Helper for emitting the `vmsgtu.vx` (Vector Mask Set If Greater Than, Unsigned) instruction.
+(decl rv_vmsgtu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmsgtu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsgtuVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsgtu.vi` (Vector Mask Set If Greater Than, Unsigned) instruction.
+(decl rv_vmsgtu_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vmsgtu_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsgtuVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vmsgt.vv` (Vector Mask Set If Greater Than) instruction.
+;; This is an alias for `vmslt.vv` with the operands inverted.
+(decl rv_vmsgt_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmsgt_vv vs2 vs1 mask vstate) (rv_vmslt_vv vs1 vs2 mask vstate))
+
+;; Helper for emitting the `vmsgt.vx` (Vector Mask Set If Greater Than) instruction.
+(decl rv_vmsgt_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmsgt_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsgtVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsgt.vi` (Vector Mask Set If Greater Than) instruction.
+(decl rv_vmsgt_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vmsgt_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsgtVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vmsgeu.vv` (Vector Mask Set If Greater Than or Equal, Unsigned) instruction.
+;; This is an alias for `vmsleu.vv` with the operands inverted.
+(decl rv_vmsgeu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmsgeu_vv vs2 vs1 mask vstate) (rv_vmsleu_vv vs1 vs2 mask vstate))
+
+;; Helper for emitting the `vmsge.vv` (Vector Mask Set If Greater Than or Equal) instruction.
+;; This is an alias for `vmsle.vv` with the operands inverted.
+(decl rv_vmsge_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmsge_vv vs2 vs1 mask vstate) (rv_vmsle_vv vs1 vs2 mask vstate))
+
+;; Helper for emitting the `vmfeq.vv` (Vector Mask Set If Float Equal) instruction.
+(decl rv_vmfeq_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmfeq_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfeqVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfeq.vf` (Vector Mask Set If Float Equal) instruction.
+(decl rv_vmfeq_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmfeq_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfeqVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfne.vv` (Vector Mask Set If Float Not Equal) instruction.
+(decl rv_vmfne_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmfne_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfneVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfne.vf` (Vector Mask Set If Float Not Equal) instruction.
+(decl rv_vmfne_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmfne_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfneVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmflt.vv` (Vector Mask Set If Float Less Than) instruction.
+(decl rv_vmflt_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmflt_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfltVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmflt.vf` (Vector Mask Set If Float Less Than) instruction.
+(decl rv_vmflt_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmflt_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfltVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfle.vv` (Vector Mask Set If Float Less Than Or Equal) instruction.
+(decl rv_vmfle_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmfle_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfleVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfle.vf` (Vector Mask Set If Float Less Than Or Equal) instruction.
+(decl rv_vmfle_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmfle_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfleVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfgt.vv` (Vector Mask Set If Float Greater Than) instruction.
+;; This is an alias for `vmflt.vv` with the operands inverted.
+(decl rv_vmfgt_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmfgt_vv vs2 vs1 mask vstate) (rv_vmflt_vv vs1 vs2 mask vstate))
+
+;; Helper for emitting the `vmfgt.vf` (Vector Mask Set If Float Greater Than) instruction.
+(decl rv_vmfgt_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmfgt_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfgtVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfge.vv` (Vector Mask Set If Float Greater Than Or Equal) instruction.
+;; This is an alias for `vmfle.vv` with the operands inverted.
+(decl rv_vmfge_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmfge_vv vs2 vs1 mask vstate) (rv_vmfle_vv vs1 vs2 mask vstate))
+
+;; Helper for emitting the `vmfge.vf` (Vector Mask Set If Float Greater Than Or Equal) instruction.
+(decl rv_vmfge_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmfge_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfgeVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vzext.vf2` instruction.
+;; Zero-extend SEW/2 source to SEW destination
+(decl rv_vzext_vf2 (VReg VecOpMasking VState) VReg)
+(rule (rv_vzext_vf2 vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VzextVF2) vs mask vstate))
+
+;; Helper for emitting the `vzext.vf4` instruction.
+;; Zero-extend SEW/4 source to SEW destination
+(decl rv_vzext_vf4 (VReg VecOpMasking VState) VReg)
+(rule (rv_vzext_vf4 vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VzextVF4) vs mask vstate))
+
+;; Helper for emitting the `vzext.vf8` instruction.
+;; Zero-extend SEW/8 source to SEW destination
+(decl rv_vzext_vf8 (VReg VecOpMasking VState) VReg)
+(rule (rv_vzext_vf8 vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VzextVF8) vs mask vstate))
+
+;; Helper for emitting the `vsext.vf2` instruction.
+;; Sign-extend SEW/2 source to SEW destination
+(decl rv_vsext_vf2 (VReg VecOpMasking VState) VReg)
+(rule (rv_vsext_vf2 vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VsextVF2) vs mask vstate))
+
+;; Helper for emitting the `vsext.vf4` instruction.
+;; Sign-extend SEW/4 source to SEW destination
+(decl rv_vsext_vf4 (VReg VecOpMasking VState) VReg)
+(rule (rv_vsext_vf4 vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VsextVF4) vs mask vstate))
+
+;; Helper for emitting the `vsext.vf8` instruction.
+;; Sign-extend SEW/8 source to SEW destination
+(decl rv_vsext_vf8 (VReg VecOpMasking VState) VReg)
+(rule (rv_vsext_vf8 vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VsextVF8) vs mask vstate))
+
+;; Helper for emitting the `vnclip.wi` instruction.
+;;
+;; vd[i] = clip(roundoff_signed(vs2[i], uimm))
+(decl rv_vnclip_wi (VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vnclip_wi vs2 imm mask vstate)
+  (vec_alu_rr_uimm5 (VecAluOpRRImm5.VnclipWI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vnclipu.wi` instruction.
+;;
+;; vd[i] = clip(roundoff_unsigned(vs2[i], uimm))
+(decl rv_vnclipu_wi (VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vnclipu_wi vs2 imm mask vstate)
+  (vec_alu_rr_uimm5 (VecAluOpRRImm5.VnclipuWI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vmand.mm` (Mask Bitwise AND) instruction.
+;;
+;; vd.mask[i] = vs2.mask[i] &&  vs1.mask[i]
+(decl rv_vmand_mm (VReg VReg VState) VReg)
+(rule (rv_vmand_mm vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmandMM) vs2 vs1 (unmasked) vstate))
+
+;; Helper for emitting the `vmor.mm` (Mask Bitwise OR) instruction.
+;;
+;; vd.mask[i] = vs2.mask[i] ||  vs1.mask[i]
+(decl rv_vmor_mm (VReg VReg VState) VReg)
+(rule (rv_vmor_mm vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmorMM) vs2 vs1 (unmasked) vstate))
+
+;; Helper for emitting the `vmnand.mm` (Mask Bitwise NAND) instruction.
+;;
+;; vd.mask[i] = !(vs2.mask[i] &&  vs1.mask[i])
+(decl rv_vmnand_mm (VReg VReg VState) VReg)
+(rule (rv_vmnand_mm vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmnandMM) vs2 vs1 (unmasked) vstate))
+
+;; Helper for emitting the `vmnot.m` (Mask Bitwise NOT) instruction.
+;; This is an alias for `vmnand.mm vd, vs, vs`
+;;
+;; vd.mask[i] = !vs.mask[i]
+(decl rv_vmnot_m (VReg VState) VReg)
+(rule (rv_vmnot_m vs vstate) (rv_vmnand_mm vs vs vstate))
+
+;; Helper for emitting the `vmnor.mm` (Mask Bitwise NOR) instruction.
+;;
+;; vd.mask[i] = !(vs2.mask[i] ||  vs1.mask[i])
+(decl rv_vmnor_mm (VReg VReg VState) VReg)
+(rule (rv_vmnor_mm vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmnorMM) vs2 vs1 (unmasked) vstate))
+
+;;;; Multi-Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl gen_extractlane (Type VReg u8) Reg)
+
+;; When extracting lane 0 for floats, we can use `vfmv.f.s` directly.
+(rule 3 (gen_extractlane (ty_vec_fits_in_register ty) src 0)
+  (if (ty_vector_float ty))
+  (rv_vfmv_fs src ty))
+
+;; When extracting lane 0 for integers, we can use `vmv.x.s` directly.
+(rule 2 (gen_extractlane (ty_vec_fits_in_register ty) src 0)
+  (if (ty_vector_not_float ty))
+  (rv_vmv_xs src ty))
+
+;; In the general case, we must first use a `vslidedown` to place the correct lane
+;; in index 0, and then use the appropriate `vmv` instruction.
+;; If the index fits into a 5-bit immediate, we can emit a `vslidedown.vi`.
+(rule 1 (gen_extractlane (ty_vec_fits_in_register ty) src (uimm5_from_u8 idx))
+  (gen_extractlane ty (rv_vslidedown_vi src idx (unmasked) ty) 0))
+
+;; Otherwise lower it into an X register.
+(rule 0 (gen_extractlane (ty_vec_fits_in_register ty) src idx)
+  (gen_extractlane ty (rv_vslidedown_vx src (imm $I64 idx) (unmasked) ty) 0))
+
+
+;; Build a vector mask from a u64
+;; TODO(#6571): We should merge this with the `vconst` rules, and take advantage of
+;; the other existing `vconst` rules.
+(decl gen_vec_mask (u64) VReg)
+
+;; When the immediate fits in a 5-bit immediate, we can use `vmv.v.i` directly.
+(rule 1 (gen_vec_mask (imm5_from_u64 imm))
+  (rv_vmv_vi imm (vstate_from_type $I64X2)))
+
+;; Materialize the mask into an X register, and move it into the bottom of
+;; the vector register.
+(rule 0 (gen_vec_mask mask)
+  (rv_vmv_sx (imm $I64 mask) (vstate_from_type $I64X2)))
+
+
+;; Loads a `VCodeConstant` value into a vector register. For some special `VCodeConstant`s
+;; we can use a dedicated instruction, otherwise we load the value from the pool.
+;;
+;; Type is the preferred type to use when loading the constant.
+(decl gen_constant (Type VCodeConstant) VReg)
+
+;; The fallback case is to load the constant from the pool.
+(rule (gen_constant ty n)
+  (vec_load
+    (element_width_from_type ty)
+    (VecAMode.UnitStride (gen_const_amode n))
+    (mem_flags_trusted)
+    (unmasked)
+    ty))
+
+
+;; Emits a vslidedown instruction that moves half the lanes down.
+(decl gen_slidedown_half (Type VReg) VReg)
+
+;; If the lane count can fit in a 5-bit immediate, we can use `vslidedown.vi`.
+(rule 1 (gen_slidedown_half (ty_vec_fits_in_register ty) src)
+  (if-let (uimm5_from_u64 amt) (u64_udiv (ty_lane_count ty) 2))
+  (rv_vslidedown_vi src amt (unmasked) ty))
+
+;; Otherwise lower it into an X register.
+(rule 0 (gen_slidedown_half (ty_vec_fits_in_register ty) src)
+  (if-let amt (u64_udiv (ty_lane_count ty) 2))
+  (rv_vslidedown_vx src (imm $I64 amt) (unmasked) ty))
+
+
+;; Expands a mask into SEW wide lanes. Enabled lanes are set to all ones, disabled
+;; lanes are set to all zeros.
+(decl gen_expand_mask (Type VReg) VReg)
+(rule (gen_expand_mask ty mask)
+  (if-let zero (imm5_from_i8 0))
+  (if-let neg1 (imm5_from_i8 -1))
+  (rv_vmerge_vim (rv_vmv_vi zero ty) neg1 mask ty))
+
+
+;; Builds a vector mask corresponding to the IntCC operation.
+;; TODO: We are still missing some rules here for immediates. See #6623
+(decl gen_icmp_mask (Type IntCC Value Value) VReg)
+
+;; IntCC.Equal
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x y)
+  (rv_vmseq_vv x y (unmasked) ty))
+
+(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x (splat y))
+  (rv_vmseq_vx x y (unmasked) ty))
+
+(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) (splat x) y)
+  (rv_vmseq_vx y x (unmasked) ty))
+
+(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x (replicated_imm5 y))
+  (rv_vmseq_vi x y (unmasked) ty))
+
+(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) (replicated_imm5 x) y)
+  (rv_vmseq_vi y x (unmasked) ty))
+
+;; IntCC.NotEqual
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x y)
+  (rv_vmsne_vv x y (unmasked) ty))
+
+(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x (splat y))
+  (rv_vmsne_vx x y (unmasked) ty))
+
+(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) (splat x) y)
+  (rv_vmsne_vx y x (unmasked) ty))
+
+(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x (replicated_imm5 y))
+  (rv_vmsne_vi x y (unmasked) ty))
+
+(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) (replicated_imm5 x) y)
+  (rv_vmsne_vi y x (unmasked) ty))
+
+;; IntCC.UnsignedLessThan
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) x y)
+  (rv_vmsltu_vv x y (unmasked) ty))
+
+(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) x (splat y))
+  (rv_vmsltu_vx x y (unmasked) ty))
+
+(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) (splat x) y)
+  (rv_vmsgtu_vx y x (unmasked) ty))
+
+(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) (replicated_imm5 x) y)
+  (rv_vmsgtu_vi y x (unmasked) ty))
+
+;; IntCC.SignedLessThan
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) x y)
+  (rv_vmslt_vv x y (unmasked) ty))
+
+(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) x (splat y))
+  (rv_vmslt_vx x y (unmasked) ty))
+
+(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) (splat x) y)
+  (rv_vmsgt_vx y x (unmasked) ty))
+
+(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) (replicated_imm5 x) y)
+  (rv_vmsgt_vi y x (unmasked) ty))
+
+;; IntCC.UnsignedLessThanOrEqual
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x y)
+  (rv_vmsleu_vv x y (unmasked) ty))
+
+(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x (splat y))
+  (rv_vmsleu_vx x y (unmasked) ty))
+
+(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x (replicated_imm5 y))
+  (rv_vmsleu_vi x y (unmasked) ty))
+
+;; IntCC.SignedLessThanOrEqual
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x y)
+  (rv_vmsle_vv x y (unmasked) ty))
+
+(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x (splat y))
+  (rv_vmsle_vx x y (unmasked) ty))
+
+(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x (replicated_imm5 y))
+  (rv_vmsle_vi x y (unmasked) ty))
+
+;; IntCC.UnsignedGreaterThan
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x y)
+  (rv_vmsgtu_vv x y (unmasked) ty))
+
+(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x (splat y))
+  (rv_vmsgtu_vx x y (unmasked) ty))
+
+(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) (splat x) y)
+  (rv_vmsltu_vx y x (unmasked) ty))
+
+(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x (replicated_imm5 y))
+  (rv_vmsgtu_vi x y (unmasked) ty))
+
+;; IntCC.SignedGreaterThan
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x y)
+  (rv_vmsgt_vv x y (unmasked) ty))
+
+(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x (splat y))
+  (rv_vmsgt_vx x y (unmasked) ty))
+
+(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) (splat x) y)
+  (rv_vmslt_vx y x (unmasked) ty))
+
+(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x (replicated_imm5 y))
+  (rv_vmsgt_vi x y (unmasked) ty))
+
+;; IntCC.UnsignedGreaterThanOrEqual
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThanOrEqual) x y)
+  (rv_vmsgeu_vv x y (unmasked) ty))
+
+(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThanOrEqual) (splat x) y)
+  (rv_vmsleu_vx y x (unmasked) ty))
+
+(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThanOrEqual) (replicated_imm5 x) y)
+  (rv_vmsleu_vi y x (unmasked) ty))
+
+;; IntCC.SignedGreaterThanOrEqual
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) x y)
+  (rv_vmsge_vv x y (unmasked) ty))
+
+(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) (splat x) y)
+  (rv_vmsle_vx y x (unmasked) ty))
+
+(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) (replicated_imm5 x) y)
+  (rv_vmsle_vi y x (unmasked) ty))
+
+
+
+;; Builds a vector mask corresponding to the FloatCC operation.
+(decl gen_fcmp_mask (Type FloatCC Value Value) VReg)
+
+;; FloatCC.Equal
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) x y)
+  (rv_vmfeq_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) x (splat y))
+  (rv_vmfeq_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) (splat x) y)
+  (rv_vmfeq_vf y x (unmasked) ty))
+
+;; FloatCC.NotEqual
+;; Note: This is UnorderedNotEqual. It is the only unoredered comparison that is not named as such.
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) x y)
+  (rv_vmfne_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) x (splat y))
+  (rv_vmfne_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) (splat x) y)
+  (rv_vmfne_vf y x (unmasked) ty))
+
+;; FloatCC.LessThan
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) x y)
+  (rv_vmflt_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) x (splat y))
+  (rv_vmflt_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) (splat x) y)
+  (rv_vmfgt_vf y x (unmasked) ty))
+
+;; FloatCC.LessThanOrEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) x y)
+  (rv_vmfle_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) x (splat y))
+  (rv_vmfle_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) (splat x) y)
+  (rv_vmfge_vf y x (unmasked) ty))
+
+;; FloatCC.GreaterThan
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) x y)
+  (rv_vmfgt_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) x (splat y))
+  (rv_vmfgt_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) (splat x) y)
+  (rv_vmflt_vf y x (unmasked) ty))
+
+;; FloatCC.GreaterThanOrEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) x y)
+  (rv_vmfge_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) x (splat y))
+  (rv_vmfge_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) (splat x) y)
+  (rv_vmfle_vf y x (unmasked) ty))
+
+;; FloatCC.Ordered
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Ordered) x y)
+  (rv_vmand_mm
+    (gen_fcmp_mask ty (FloatCC.Equal) x x)
+    (gen_fcmp_mask ty (FloatCC.Equal) y y)
+    ty))
+
+;; FloatCC.Unordered
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Unordered) x y)
+  (rv_vmor_mm
+    (gen_fcmp_mask ty (FloatCC.NotEqual) x x)
+    (gen_fcmp_mask ty (FloatCC.NotEqual) y y)
+    ty))
+
+;; FloatCC.OrderedNotEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.OrderedNotEqual) x y)
+  (rv_vmor_mm
+    (gen_fcmp_mask ty (FloatCC.LessThan) x y)
+    (gen_fcmp_mask ty (FloatCC.LessThan) y x)
+    ty))
+
+;; FloatCC.UnorderedOrEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrEqual) x y)
+  (rv_vmnor_mm
+    (gen_fcmp_mask ty (FloatCC.LessThan) x y)
+    (gen_fcmp_mask ty (FloatCC.LessThan) y x)
+    ty))
+
+;; FloatCC.UnorderedOrGreaterThan
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrGreaterThan) x y)
+  (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.LessThanOrEqual) x y) ty))
+
+;; FloatCC.UnorderedOrGreaterThanOrEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrGreaterThanOrEqual) x y)
+  (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.LessThan) x y) ty))
+
+;; FloatCC.UnorderedOrLessThan
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrLessThan) x y)
+  (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.GreaterThanOrEqual) x y) ty))
+
+;; FloatCC.UnorderedOrLessThanOrEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrLessThanOrEqual) x y)
+  (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.GreaterThan) x y) ty))
diff --git a/cranelift/codegen/src/isa/zkasm/lower.isle b/cranelift/codegen/src/isa/zkasm/lower.isle
new file mode 100644
index 000000000000..ad9bd74abc68
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/lower.isle
@@ -0,0 +1,1877 @@
+;; zkasm instruction selection and CLIF-to-MachInst lowering.
+
+;; The main lowering constructor term: takes a clif `Inst` and returns the
+;; register(s) within which the lowered instruction's result values live.
+(decl partial lower (Inst) InstOutput)
+
+;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type ty (iconst (u64_from_imm64 n))))
+  (imm ty n))
+
+;; ;;;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (ty_vec_fits_in_register ty) (vconst n)))
+  (gen_constant ty (const_to_vconst n)))
+
+;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (f32const (u32_from_ieee32 n)))
+  (imm $F32 n))
+
+;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (f64const (u64_from_ieee64 n)))
+  (imm $F64 n))
+
+;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type ty (null)))
+  (imm ty 0))
+
+
+;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Base case, simply adding things in registers.
+(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x y)))
+  (rv_add x y))
+
+(rule 1 (lower (iadd (imm32_from_value x) (imm32_from_value y)))
+  (zk_add x y))
+
+;; Fused Multiply Accumulate Rules `vmacc`
+;;
+;; I dont think we can use `vmadd`/`vmnsub` here since it just modifies the multiplication
+;; register instead of the addition one. The actual pattern matched seems to be
+;; exactly the same.
+
+(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (imul y z))))
+  (rv_vmacc_vv x y z (unmasked) ty))
+
+(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (imul y (splat z)))))
+  (rv_vmacc_vx x y z (unmasked) ty))
+
+(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (imul (splat y) z))))
+  (rv_vmacc_vx x z y (unmasked) ty))
+
+(rule 13 (lower (has_type (ty_vec_fits_in_register ty) (iadd (imul x y) z)))
+  (rv_vmacc_vv z x y (unmasked) ty))
+
+(rule 14 (lower (has_type (ty_vec_fits_in_register ty) (iadd (imul x (splat y)) z)))
+  (rv_vmacc_vx z x y (unmasked) ty))
+
+(rule 15 (lower (has_type (ty_vec_fits_in_register ty) (iadd (imul (splat x) y) z)))
+  (rv_vmacc_vx z y x (unmasked) ty))
+
+;; Fused Multiply Subtract Rules `vnmsac`
+
+(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (ineg (imul y z)))))
+  (rv_vnmsac_vv x y z (unmasked) ty))
+
+(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (ineg (imul y (splat z))))))
+  (rv_vnmsac_vx x y z (unmasked) ty))
+
+(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (ineg (imul (splat y) z)))))
+  (rv_vnmsac_vx x z y (unmasked) ty))
+
+(rule 13 (lower (has_type (ty_vec_fits_in_register ty) (iadd (ineg (imul x y)) z)))
+  (rv_vnmsac_vv z x y (unmasked) ty))
+
+(rule 14 (lower (has_type (ty_vec_fits_in_register ty) (iadd (ineg (imul x (splat y))) z)))
+  (rv_vnmsac_vx z x y (unmasked) ty))
+
+(rule 15 (lower (has_type (ty_vec_fits_in_register ty) (iadd (ineg (imul (splat x) y)) z)))
+  (rv_vnmsac_vx z y x (unmasked) ty))
+
+;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;;
+(rule
+  (lower (has_type (fits_in_64 ty) (uadd_overflow_trap x y tc)))
+  (let ((res ValueRegs (lower_uadd_overflow x y ty))
+        (_ InstOutput (gen_trapif (value_regs_get res 1) tc)))
+    (value_regs_get res 0)))
+
+
+;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Base case, simply subtracting things in registers.
+
+(rule (lower (has_type (ty_int_ref_scalar_64 ty) (isub x y)))
+  (rv_sub x y))
+
+(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (isub x y)))
+  (rv_subw x y))
+
+(rule 2 (lower (has_type $I128 (isub x y)))
+  (i128_sub x y))
+
+;; SIMD Vectors
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (isub x y)))
+  (rv_vsub_vv x y (unmasked) ty))
+
+(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (isub x (splat y))))
+  (rv_vsub_vx x y (unmasked) ty))
+
+(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (isub x (splat (sextend y @ (value_type sext_ty))))))
+  (if-let half_ty (ty_half_width ty))
+  (if-let $true (ty_equal (lane_type half_ty) sext_ty))
+  (rv_vwsub_wx x y (unmasked) (vstate_mf2 half_ty)))
+
+(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (isub x (splat (uextend y @ (value_type uext_ty))))))
+  (if-let half_ty (ty_half_width ty))
+  (if-let $true (ty_equal (lane_type half_ty) uext_ty))
+  (rv_vwsubu_wx x y (unmasked) (vstate_mf2 half_ty)))
+
+(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (isub (splat x) y)))
+  (rv_vrsub_vx y x (unmasked) ty))
+
+(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (isub (replicated_imm5 x) y)))
+  (rv_vrsub_vi y x (unmasked) ty))
+
+
+;; Signed Widening Low Subtractions
+
+(rule 5 (lower (has_type (ty_vec_fits_in_register _) (isub x (swiden_low y @ (value_type in_ty)))))
+  (rv_vwsub_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_low x @ (value_type in_ty))
+                                                           (swiden_low y))))
+  (rv_vwsub_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_low x @ (value_type in_ty))
+                                                           (splat (sextend y @ (value_type sext_ty))))))
+  (if-let $true (ty_equal (lane_type in_ty) sext_ty))
+  (rv_vwsub_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+;; Signed Widening High Subtractions
+;; These are the same as the low widenings, but we first slide down the inputs.
+
+(rule 5 (lower (has_type (ty_vec_fits_in_register _) (isub x (swiden_high y @ (value_type in_ty)))))
+  (rv_vwsub_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_high x @ (value_type in_ty))
+                                                           (swiden_high y))))
+  (rv_vwsub_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_high x @ (value_type in_ty))
+                                                           (splat (sextend y @ (value_type sext_ty))))))
+  (if-let $true (ty_equal (lane_type in_ty) sext_ty))
+  (rv_vwsub_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+;; Unsigned Widening Low Subtractions
+
+(rule 5 (lower (has_type (ty_vec_fits_in_register _) (isub x (uwiden_low y @ (value_type in_ty)))))
+  (rv_vwsubu_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_low x @ (value_type in_ty))
+                                                           (uwiden_low y))))
+  (rv_vwsubu_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_low x @ (value_type in_ty))
+                                                           (splat (uextend y @ (value_type uext_ty))))))
+  (if-let $true (ty_equal (lane_type in_ty) uext_ty))
+  (rv_vwsubu_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+;; Unsigned Widening High Subtractions
+;; These are the same as the low widenings, but we first slide down the inputs.
+
+(rule 5 (lower (has_type (ty_vec_fits_in_register _) (isub x (uwiden_high y @ (value_type in_ty)))))
+  (rv_vwsubu_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_high x @ (value_type in_ty))
+                                                           (uwiden_high y))))
+  (rv_vwsubu_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_high x @ (value_type in_ty))
+                                                           (splat (uextend y @ (value_type uext_ty))))))
+  (if-let $true (ty_equal (lane_type in_ty) uext_ty))
+  (rv_vwsubu_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+;; Signed Widening Mixed High/Low Subtractions
+
+(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_low x @ (value_type in_ty))
+                                                           (swiden_high y))))
+  (rv_vwsub_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (swiden_high x @ (value_type in_ty))
+                                                           (swiden_low y))))
+  (rv_vwsub_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+;; Unsigned Widening Mixed High/Low Subtractions
+
+(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_low x @ (value_type in_ty))
+                                                           (uwiden_high y))))
+  (rv_vwsubu_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 8 (lower (has_type (ty_vec_fits_in_register _) (isub (uwiden_high x @ (value_type in_ty))
+                                                           (uwiden_low y))))
+  (rv_vwsubu_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+
+;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (ty_int ty) (ineg val)))
+  (neg ty val))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (ineg x)))
+  (rv_vneg_v x (unmasked) ty))
+
+
+;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (imul x y)))
+  (rv_mul x y))
+
+(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (imul x y)))
+  (rv_mulw x y))
+
+;; for I128
+(rule 2 (lower (has_type $I128 (imul x y)))
+  (let
+    ((x_regs ValueRegs x)
+      (x_lo XReg (value_regs_get x_regs 0))
+      (x_hi XReg (value_regs_get x_regs 1))
+
+      ;; Get the high/low registers for `y`.
+      (y_regs ValueRegs y)
+      (y_lo XReg (value_regs_get y_regs 0))
+      (y_hi XReg (value_regs_get y_regs 1))
+
+      ;; 128bit mul formula:
+      ;;   dst_lo = x_lo * y_lo
+      ;;   dst_hi = mulhu(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo)
+      ;;
+      ;; We can convert the above formula into the following
+      ;; mulhu   dst_hi, x_lo, y_lo
+      ;; madd    dst_hi, x_lo, y_hi, dst_hi
+      ;; madd    dst_hi, x_hi, y_lo, dst_hi
+      ;; madd    dst_lo, x_lo, y_lo, zero
+      (dst_hi1 XReg (rv_mulhu x_lo y_lo))
+      (dst_hi2 XReg (madd x_lo y_hi dst_hi1))
+      (dst_hi XReg (madd x_hi y_lo dst_hi2))
+      (dst_lo XReg (madd x_lo y_lo (zero_reg))))
+    (value_regs dst_lo dst_hi)))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (imul x y)))
+  (rv_vmul_vv x y (unmasked) ty))
+
+(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (imul (splat x) y)))
+  (rv_vmul_vx y x (unmasked) ty))
+
+(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (imul x (splat y))))
+  (rv_vmul_vx x y (unmasked) ty))
+
+
+(rule 6 (lower (imul (imm32_from_value x) (imm32_from_value y)))
+  (zk_mul x y))
+
+
+;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (smulhi x y)))
+  (lower_smlhi ty (sext x ty $I64) (sext y ty $I64)))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (smulhi x y)))
+  (rv_vmulh_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (smulhi (splat x) y)))
+  (rv_vmulh_vx y x (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (smulhi x (splat y))))
+  (rv_vmulh_vx x y (unmasked) ty))
+
+;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (umulhi x y)))
+  (lower_umlhi ty (zext x ty $I64) (zext y ty $I64)))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (umulhi x y)))
+  (rv_vmulhu_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (umulhi (splat x) y)))
+  (rv_vmulhu_vx y x (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (umulhi x (splat y))))
+  (rv_vmulhu_vx x y (unmasked) ty))
+
+;;;; Rules for `div` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule -1 (lower (has_type (fits_in_32 ty) (udiv x y)))
+  (let
+    ((y2 XReg (zext y ty $I64))
+      (_ InstOutput (gen_div_by_zero y2)))
+    (rv_divuw (zext x ty $I64) y2)))
+
+(rule -1 (lower (has_type (fits_in_32 ty) (sdiv x y)))
+  (let
+    ((a XReg (sext x ty $I64))
+      (b XReg (sext y ty $I64))
+      (_ InstOutput (gen_div_overflow a b ty))
+      (_ InstOutput (gen_div_by_zero b)))
+    (rv_divw a b)))
+
+(rule (lower (has_type $I64 (sdiv x y)))
+  (let
+    ((_ InstOutput (gen_div_overflow x y $I64))
+      (_ InstOutput (gen_div_by_zero y))    )
+    (rv_div x y)))
+
+(rule (lower (has_type $I64 (udiv x y)))
+  (let
+    ((_ InstOutput (gen_div_by_zero y)))
+    (rv_divu x y)))
+
+;;;; Rules for `rem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule -1 (lower (has_type (fits_in_16 ty) (urem x y)))
+  (let
+    ((y2 XReg (zext y ty $I64))
+      (_ InstOutput (gen_div_by_zero y2)))
+    (rv_remuw (zext x ty $I64) y2)))
+
+(rule -1 (lower (has_type (fits_in_16 ty) (srem x y)))
+  (let
+    ((y2 XReg (sext y ty $I64))
+      (_ InstOutput (gen_div_by_zero y2)))
+    (rv_remw (sext x ty $I64) y2)))
+
+(rule (lower (has_type $I32 (srem x y)))
+  (let
+    ((y2 XReg (sext y $I32 $I64))
+      (_ InstOutput (gen_div_by_zero y2)))
+   (rv_remw x y2)))
+
+(rule (lower (has_type $I32 (urem x y)))
+  (let
+    ((y2 XReg (zext y $I32 $I64))
+        (_ InstOutput (gen_div_by_zero y2)))
+    (rv_remuw x y2)))
+
+(rule (lower (has_type $I64 (srem x y)))
+  (let
+    ((_ InstOutput (gen_div_by_zero y)))
+    (rv_rem x y)))
+
+(rule (lower (has_type $I64 (urem x y)))
+  (let
+    ((_ InstOutput (gen_div_by_zero y)))
+    (rv_remu x y)))
+
+;;;; Rules for `and` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_int ty) (band x y)))
+  (gen_and ty x y))
+
+;; Special cases for when one operand is an immediate that fits in 12 bits.
+(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (band x (imm12_from_value y))))
+  (rv_andi x y))
+
+(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (band (imm12_from_value x) y)))
+  (rv_andi y x))
+
+(rule 3 (lower (has_type (ty_scalar_float ty) (band x y)))
+  (lower_float_binary (AluOPRRR.And) x y ty))
+
+;; Specialized lowerings for `(band x (bnot y))` which is additionally produced
+;; by Cranelift's `band_not` instruction that is legalized into the simpler
+;; forms early on.
+
+(rule 4 (lower (has_type (fits_in_64 (ty_int ty)) (band x (bnot y))))
+  (if-let $true (has_zbb))
+  (rv_andn x y))
+
+(rule 5 (lower (has_type (fits_in_64 (ty_int ty)) (band (bnot y) x)))
+  (if-let $true (has_zbb))
+  (rv_andn x y))
+
+(rule 6 (lower (has_type $I128 (band x (bnot y))))
+  (if-let $true (has_zbb))
+  (let ((low XReg (rv_andn (value_regs_get x 0) (value_regs_get y 0)))
+        (high XReg (rv_andn (value_regs_get x 1) (value_regs_get y 1))))
+    (value_regs low high)))
+
+(rule 7 (lower (has_type $I128 (band (bnot y) x)))
+  (if-let $true (has_zbb))
+  (let ((low XReg (rv_andn (value_regs_get x 0) (value_regs_get y 0)))
+        (high XReg (rv_andn (value_regs_get x 1) (value_regs_get y 1))))
+    (value_regs low high)))
+
+(rule 8 (lower (has_type (ty_vec_fits_in_register ty) (band x y)))
+  (rv_vand_vv x y (unmasked) ty))
+
+(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (band x (splat y))))
+  (if (ty_vector_not_float ty))
+  (rv_vand_vx x y (unmasked) ty))
+
+(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (band (splat x) y)))
+  (if (ty_vector_not_float ty))
+  (rv_vand_vx y x (unmasked) ty))
+
+(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (band x (replicated_imm5 y))))
+  (rv_vand_vi x y (unmasked) ty))
+
+(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (band (replicated_imm5 x) y)))
+  (rv_vand_vi y x (unmasked) ty))
+
+
+;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_int ty) (bor x y)))
+  (gen_or ty x y))
+
+;; Special cases for when one operand is an immediate that fits in 12 bits.
+(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bor x (imm12_from_value y))))
+  (rv_ori x y))
+
+(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (bor (imm12_from_value x) y)))
+  (rv_ori y x))
+
+(rule 3 (lower (has_type (ty_scalar_float ty) (bor x y)))
+  (lower_float_binary (AluOPRRR.Or) x y ty))
+
+;; Specialized lowerings for `(bor x (bnot y))` which is additionally produced
+;; by Cranelift's `bor_not` instruction that is legalized into the simpler
+;; forms early on.
+
+(rule 4 (lower (has_type (fits_in_64 (ty_int ty)) (bor x (bnot y))))
+  (if-let $true (has_zbb))
+  (rv_orn x y))
+
+(rule 5 (lower (has_type (fits_in_64 (ty_int ty)) (bor (bnot y) x)))
+  (if-let $true (has_zbb))
+  (rv_orn x y))
+
+(rule 6 (lower (has_type $I128 (bor x (bnot y))))
+  (if-let $true (has_zbb))
+  (let ((low XReg (rv_orn (value_regs_get x 0) (value_regs_get y 0)))
+        (high XReg (rv_orn (value_regs_get x 1) (value_regs_get y 1))))
+    (value_regs low high)))
+
+(rule 7 (lower (has_type $I128 (bor (bnot y) x)))
+  (if-let $true (has_zbb))
+  (let ((low XReg (rv_orn (value_regs_get x 0) (value_regs_get y 0)))
+        (high XReg (rv_orn (value_regs_get x 1) (value_regs_get y 1))))
+    (value_regs low high)))
+
+(rule 8 (lower (has_type (ty_vec_fits_in_register ty) (bor x y)))
+  (rv_vor_vv x y (unmasked) ty))
+
+(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (bor x (splat y))))
+  (if (ty_vector_not_float ty))
+  (rv_vor_vx x y (unmasked) ty))
+
+(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (bor (splat x) y)))
+  (if (ty_vector_not_float ty))
+  (rv_vor_vx y x (unmasked) ty))
+
+(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (bor x (replicated_imm5 y))))
+  (rv_vor_vi x y (unmasked) ty))
+
+(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (bor (replicated_imm5 x) y)))
+  (rv_vor_vi y x (unmasked) ty))
+
+
+;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x y)))
+  (rv_xor x y))
+
+;; Special cases for when one operand is an immediate that fits in 12 bits.
+(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x (imm12_from_value y))))
+  (rv_xori x y))
+
+(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (bxor (imm12_from_value x) y)))
+  (rv_xori y x))
+
+(rule 3 (lower (has_type $I128 (bxor x y)))
+  (lower_b128_binary (AluOPRRR.Xor) x y))
+
+(rule 4 (lower (has_type (ty_scalar_float ty) (bxor x y)))
+  (lower_float_binary (AluOPRRR.Xor) x y ty))
+
+(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (bxor x y)))
+  (rv_vxor_vv x y (unmasked) ty))
+
+(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (bxor x (splat y))))
+  (if (ty_vector_not_float ty))
+  (rv_vxor_vx x y (unmasked) ty))
+
+(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (bxor (splat x) y)))
+  (if (ty_vector_not_float ty))
+  (rv_vxor_vx y x (unmasked) ty))
+
+(rule 8 (lower (has_type (ty_vec_fits_in_register ty) (bxor x (replicated_imm5 y))))
+  (rv_vxor_vi x y (unmasked) ty))
+
+(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (bxor (replicated_imm5 x) y)))
+  (rv_vxor_vi y x (unmasked) ty))
+
+
+;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_scalar ty) (bnot x)))
+  (gen_bnot ty x))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (bnot x)))
+  (rv_vnot_v x (unmasked) ty))
+
+;;;; Rules for `bit_reverse` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 (ty_int ty)) (bitrev x)))
+  (lower_bit_reverse x ty))
+
+(rule 1 (lower (has_type $I128 (bitrev x)))
+  (let ((val ValueRegs x)
+    (lo_rev XReg (lower_bit_reverse (value_regs_get val 0) $I64))
+    (hi_rev XReg (lower_bit_reverse (value_regs_get val 1) $I64)))
+    (value_regs hi_rev lo_rev)))
+
+;;;; Rules for `bswap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bswap x)))
+  (gen_bswap ty x))
+
+(rule 2 (lower (has_type $I128 (bswap x)))
+  (value_regs
+    (gen_bswap $I64 (value_regs_get x 1))
+    (gen_bswap $I64 (value_regs_get x 0))))
+
+
+;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 ty) (ctz x)))
+  (lower_ctz ty x))
+
+(rule 1 (lower (has_type $I128 (ctz x)))
+  (lower_ctz_128 x))
+
+;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 ty) (clz x)))
+  (lower_clz ty x))
+
+(rule 1 (lower (has_type $I128 (clz x)))
+  (lower_clz_i128 x))
+
+;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 ty) (cls x)))
+  (lower_cls ty x))
+
+(rule 1 (lower (has_type $I128 (cls x)))
+  (lower_cls_i128 x))
+
+;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type out_ty (uextend val @ (value_type in_ty))))
+  (extend val (ExtendOp.Zero) in_ty out_ty))
+
+;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type out_ty (sextend val @ (value_type in_ty))))
+  (extend val (ExtendOp.Signed) in_ty out_ty))
+
+;; The instructions below are present in RV64I and sign-extend the result to 64 bits.
+
+(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (iadd x y)))))
+  (rv_addw x y))
+
+(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (isub x y)))))
+  (rv_subw x y))
+
+(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (ishl x y)))))
+  (rv_sllw x (value_regs_get y 0)))
+
+(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (ushr x y)))))
+  (rv_srlw x (value_regs_get y 0)))
+
+(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (sshr x y)))))
+  (rv_sraw x (value_regs_get y 0)))
+
+
+(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (iadd x (imm12_from_value y))))))
+  (rv_addiw x y))
+
+(rule 3 (lower (has_type $I64 (sextend (has_type $I32 (iadd (imm12_from_value x) y)))))
+  (rv_addiw y x))
+
+(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (ishl x (imm12_from_value y))))))
+  (rv_slliw x y))
+
+(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (ushr x (imm12_from_value y))))))
+  (rv_srliw x y))
+
+(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (sshr x (imm12_from_value y))))))
+  (rv_sraiw x y))
+
+;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (popcnt x)))
+  (lower_popcnt x ty))
+
+(rule 1 (lower (has_type $I128 (popcnt x)))
+  (lower_popcnt_i128 x))
+
+;; Popcount using multiply.
+;; This is popcount64c() from
+;; http://en.wikipedia.org/wiki/Hamming_weight
+;;
+;; Here's the C version for 32 bits:
+;;  x = x - ((x>> 1) & 0x55555555);
+;;  x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+;;  x = ((x + (x >> 4)) & 0x0F0F0F0F);
+;;  return (x * 0x01010101) >> 24; // Here 24 is the type width - 8.
+;;
+;; TODO: LLVM generates a much better implementation for I8X16. See: https://godbolt.org/z/qr6vf9Gr3
+;; For the other types it seems to be largely the same.
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (popcnt x)))
+  (if-let one (u64_to_uimm5 1))
+  (if-let two (u64_to_uimm5 2))
+  (if-let four (u64_to_uimm5 4))
+
+  (let (;; x = x - ((x >> 1) & 0x55555555);
+        (mask_55 XReg (imm (lane_type ty) (u64_and 0x5555555555555555 (ty_mask (lane_type ty)))))
+        (count2_shr VReg (rv_vsrl_vi x one (unmasked) ty))
+        (count2_and VReg (rv_vand_vx count2_shr mask_55 (unmasked) ty))
+        (count2 VReg (rv_vsub_vv x count2_and (unmasked) ty))
+
+        ;; x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+        (mask_33 XReg (imm (lane_type ty) (u64_and 0x3333333333333333 (ty_mask (lane_type ty)))))
+        (count4_shr VReg (rv_vsrl_vi count2 two (unmasked) ty))
+        (count4_and VReg (rv_vand_vx count4_shr mask_33 (unmasked) ty))
+        (count4_lhs VReg (rv_vand_vx count2 mask_33 (unmasked) ty))
+        (count4 VReg (rv_vadd_vv count4_lhs count4_and (unmasked) ty))
+
+        ;; x = (x + (x >> 4)) & 0x0F0F0F0F;
+        (mask_0f XReg (imm (lane_type ty) (u64_and 0x0f0f0f0f0f0f0f0f (ty_mask (lane_type ty)))))
+        (count8_shr VReg (rv_vsrl_vi count4 four (unmasked) ty))
+        (count8_add VReg (rv_vadd_vv count4 count8_shr (unmasked) ty))
+        (count8 VReg (rv_vand_vx count8_add mask_0f (unmasked) ty))
+
+        ;; (x * 0x01010101) >> (<ty_width> - 8)
+        (mask_01 XReg (imm (lane_type ty) (u64_and 0x0101010101010101 (ty_mask (lane_type ty)))))
+        (mul VReg (rv_vmul_vx count8 mask_01 (unmasked) ty))
+        (shift XReg (imm $I64 (u64_sub (ty_bits (lane_type ty)) 8)))
+        (res VReg (rv_vsrl_vx mul shift (unmasked) ty)))
+    res))
+
+;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; 8/16 bit types need a mask on the shift amount
+(rule 0 (lower (has_type (ty_int (ty_8_or_16 ty)) (ishl x y)))
+  (if-let mask (u64_to_imm12 (shift_mask ty)))
+  (rv_sllw x (rv_andi (value_regs_get y 0) mask)))
+
+;; Using the 32bit version of `sll` automatically masks the shift amount.
+(rule 1 (lower (has_type $I32 (ishl x y)))
+  (rv_sllw x (value_regs_get y 0)))
+
+;; Similarly, the 64bit version does the right thing.
+(rule 1 (lower (has_type $I64 (ishl x y)))
+  (rv_sll x (value_regs_get y 0)))
+
+;; If the shift amount is known. We can mask it and encode it in the instruction.
+(rule 2 (lower (has_type (int_fits_in_32 ty) (ishl x (maybe_uextend (imm12_from_value y)))))
+  (rv_slliw x (imm12_and y (shift_mask ty))))
+
+;; We technically don't need to mask the shift amount here. The instruction
+;; does the right thing. But it's neater when pretty printing it.
+(rule 3 (lower (has_type ty @ $I64 (ishl x (maybe_uextend (imm12_from_value y)))))
+  (rv_slli x (imm12_and y (shift_mask ty))))
+
+;; With `Zba` we have a shift that zero extends the LHS argument.
+(rule 4 (lower (has_type $I64 (ishl (uextend x @ (value_type $I32)) (maybe_uextend (imm12_from_value y)))))
+  (if-let $true (has_zba))
+  (rv_slliuw x y))
+
+;; I128 cases
+(rule 4 (lower (has_type $I128 (ishl x y)))
+  (let ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0)))
+        (shamt XReg (value_regs_get tmp 0))
+        (len_sub_shamt XReg (value_regs_get tmp 1))
+        ;;
+        (low XReg (rv_sll (value_regs_get x 0) shamt))
+        ;; high part.
+        (high_part1 XReg (rv_srl (value_regs_get x 0) len_sub_shamt))
+        (high_part2 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) high_part1))
+        ;;
+        (high_part3 XReg (rv_sll (value_regs_get x 1) shamt))
+        (high XReg (rv_or high_part2 high_part3))
+        ;;
+        (const64 XReg (load_u64_constant 64))
+        (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127))))
+    (value_regs
+      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 (zero_reg) low)
+      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high))))
+
+;; SIMD Cases
+;; We don't need to mask anything since it is done by the instruction according to SEW.
+
+(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (ishl x y)))
+  (rv_vsll_vx x (value_regs_get y 0) (unmasked) ty))
+
+(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (ishl x (maybe_uextend (uimm5_from_value y)))))
+  (rv_vsll_vi x y (unmasked) ty))
+
+;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; 8/16 bit types need a mask on the shift amount, and the LHS needs to be
+;; zero extended.
+(rule 0 (lower (has_type (ty_int (fits_in_16 ty)) (ushr x y)))
+  (if-let mask (u64_to_imm12 (shift_mask ty)))
+  (rv_srlw (zext x ty $I64) (rv_andi (value_regs_get y 0) mask)))
+
+;; Using the 32bit version of `srl` automatically masks the shift amount.
+(rule 1 (lower (has_type $I32 (ushr x y)))
+  (rv_srlw x (value_regs_get y 0)))
+
+;; Similarly, the 64bit version does the right thing.
+(rule 1 (lower (has_type $I64 (ushr x y)))
+  (rv_srl x (value_regs_get y 0)))
+
+;; When the RHS is known we can just encode it in the instruction.
+(rule 2 (lower (has_type (ty_int (fits_in_16 ty)) (ushr x (maybe_uextend (imm12_from_value y)))))
+  (rv_srliw (zext x ty $I64) (imm12_and y (shift_mask ty))))
+
+(rule 3 (lower (has_type $I32 (ushr x (maybe_uextend (imm12_from_value y)))))
+  (rv_srliw x y))
+
+(rule 3 (lower (has_type $I64 (ushr x (maybe_uextend (imm12_from_value y)))))
+  (rv_srli x y))
+
+(rule 3 (lower (has_type $I128 (ushr x y)))
+  (let ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0)))
+        (shamt XReg (value_regs_get tmp 0))
+        (len_sub_shamt XReg (value_regs_get tmp 1))
+        ;; low part.
+        (low_part1 XReg (rv_sll (value_regs_get x 1) len_sub_shamt))
+        (low_part2 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part1))
+        ;;
+        (low_part3 XReg (rv_srl (value_regs_get x 0) shamt))
+        (low XReg (rv_or low_part2 low_part3))
+        ;;
+        (const64 XReg (load_u64_constant 64))
+        ;;
+        (high XReg (rv_srl (value_regs_get x 1) shamt))
+        (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127))))
+    (value_regs
+      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low)
+      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 (zero_reg) high))))
+
+;; SIMD Cases
+;; We don't need to mask or extend anything since it is done by the instruction according to SEW.
+
+(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (ushr x y)))
+  (rv_vsrl_vx x (value_regs_get y 0) (unmasked) ty))
+
+(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (ushr x (maybe_uextend (uimm5_from_value y)))))
+  (rv_vsrl_vi x y (unmasked) ty))
+
+;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; 8/16 bit types need a mask on the shift amount, and the LHS needs to be
+;; zero extended.
+(rule 0 (lower (has_type (ty_int (fits_in_16 ty)) (sshr x y)))
+  (if-let mask (u64_to_imm12 (shift_mask ty)))
+  (rv_sraw (sext x ty $I64) (rv_andi (value_regs_get y 0) mask)))
+
+;; Using the 32bit version of `sra` automatically masks the shift amount.
+(rule 1 (lower (has_type $I32 (sshr x y)))
+  (rv_sraw x (value_regs_get y 0)))
+
+;; Similarly, the 64bit version does the right thing.
+(rule 1 (lower (has_type $I64 (sshr x y)))
+  (rv_sra x (value_regs_get y 0)))
+
+;; When the RHS is known we can just encode it in the instruction.
+(rule 2 (lower (has_type (ty_int (fits_in_16 ty)) (sshr x (maybe_uextend (imm12_from_value y)))))
+  (rv_sraiw (sext x ty $I64) (imm12_and y (shift_mask ty))))
+
+(rule 3 (lower (has_type $I32 (sshr x (maybe_uextend (imm12_from_value y)))))
+  (rv_sraiw x y))
+
+(rule 3 (lower (has_type $I64 (sshr x (maybe_uextend (imm12_from_value y)))))
+  (rv_srai x y))
+
+(rule 3 (lower (has_type $I128 (sshr x y)))
+  (let ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0)))
+        (shamt XReg (value_regs_get tmp 0))
+        (len_sub_shamt XReg (value_regs_get tmp 1))
+        ;; low part.
+        (low_part1 XReg (rv_sll (value_regs_get x 1) len_sub_shamt))
+        (low_part2 XReg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part1))
+        ;;
+        (low_part3 XReg (rv_srl (value_regs_get x 0) shamt))
+        (low XReg (rv_or low_part2 low_part3))
+        ;;
+        (const64 XReg (load_u64_constant 64))
+        ;;
+        (high XReg (rv_sra (value_regs_get x 1) shamt))
+        ;;
+        (const_neg_1 XReg (load_imm12 -1))
+        ;;
+        (high_replacement XReg (gen_select_reg (IntCC.SignedLessThan) (value_regs_get x 1) (zero_reg) const_neg_1 (zero_reg)))
+        (const64 XReg (load_u64_constant 64))
+        (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127))))
+    (value_regs
+      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low)
+      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high_replacement high))))
+
+;; SIMD Cases
+;; We don't need to mask or extend anything since it is done by the instruction according to SEW.
+
+(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (sshr x y)))
+  (rv_vsra_vx x (value_regs_get y 0) (unmasked) ty))
+
+(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (sshr x (maybe_uextend (uimm5_from_value y)))))
+  (rv_vsra_vi x y (unmasked) ty))
+
+
+;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 ty) (rotl x y)))
+  (lower_rotl ty (zext x ty $I64) (value_regs_get y 0)))
+
+(rule 1 (lower (has_type $I128 (rotl x y)))
+  (lower_i128_rotl x y))
+
+;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 ty) (rotr x y)))
+  (lower_rotr ty (zext x ty $I64) (value_regs_get y 0)))
+
+(rule 1 (lower (has_type $I128 (rotr x y)))
+  (lower_i128_rotr x y))
+
+
+;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_scalar_float ty) (fabs x)))
+  (rv_fabs ty x))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fabs x)))
+  (rv_vfabs_v x (unmasked) ty))
+
+;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_scalar_float ty) (fneg x)))
+  (rv_fneg ty x))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fneg x)))
+  (rv_vfneg_v x (unmasked) ty))
+
+;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_scalar_float ty) (fcopysign x y)))
+  (rv_fsgnj ty x y))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fcopysign x y)))
+  (rv_vfsgnj_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fcopysign x (splat y))))
+  (rv_vfsgnj_vf x y (unmasked) ty))
+
+;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_scalar_float ty) (fma x y z)))
+  (rv_fmadd ty x y z))
+
+;; (fma x y z) computes x * y + z
+;; vfmacc computes vd[i] = +(vs1[i] * vs2[i]) + vd[i]
+;; We need to reverse the order of the arguments
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fma x y z)))
+  (rv_vfmacc_vv z y x (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fma (splat x) y z)))
+  (rv_vfmacc_vf z y x (unmasked) ty))
+
+;; vfmsac computes vd[i] = +(vs1[i] * vs2[i]) - vd[i]
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fma x y (fneg z))))
+  (rv_vfmsac_vv z y x (unmasked) ty))
+
+(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (fma (splat x) y (fneg z))))
+  (rv_vfmsac_vf z y x (unmasked) ty))
+
+;; vfnmacc computes vd[i] = -(vs1[i] * vs2[i]) - vd[i]
+
+(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg x) y (fneg z))))
+  (rv_vfnmacc_vv z y x (unmasked) ty))
+
+(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg (splat x)) y (fneg z))))
+  (rv_vfnmacc_vf z y x (unmasked) ty))
+
+;; vfnmsac computes vd[i] = -(vs1[i] * vs2[i]) + vd[i]
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg x) y z)))
+  (rv_vfnmsac_vv z y x (unmasked) ty))
+
+(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg (splat x)) y z)))
+  (rv_vfnmsac_vf z y x (unmasked) ty))
+
+
+;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_scalar_float ty) (sqrt x)))
+  (rv_fsqrt ty x))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (sqrt x)))
+  (rv_vfsqrt_v x (unmasked) ty))
+
+;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule -1
+  ;;
+  (lower
+    (has_type (valid_atomic_transaction ty) (atomic_rmw flags op addr x)))
+  (gen_atomic (get_atomic_rmw_op ty op) addr x (atomic_amo)))
+
+;;; for I8 and I16
+(rule 1
+  (lower
+    (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags op addr x)))
+  (gen_atomic_rmw_loop op ty addr x))
+
+;;;special for I8 and I16 max min etc.
+;;;because I need uextend or sextend the value.
+(rule 2
+  (lower
+    (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $true) addr x)))
+  (gen_atomic_rmw_loop op ty addr (sext x ty $I64)))
+
+
+(rule 2
+  ;;
+  (lower
+    (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $false) addr x)))
+  ;;
+  (gen_atomic_rmw_loop op ty addr (zext x ty $I64)))
+
+;;;;;  Rules for `AtomicRmwOp.Sub`
+(rule
+  (lower
+    (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Sub) addr x)))
+  (let
+    ((tmp WritableReg (temp_writable_reg ty))
+     (x2 Reg (rv_neg x)))
+    (gen_atomic (get_atomic_rmw_op ty (AtomicRmwOp.Add)) addr x2 (atomic_amo))))
+
+(decl gen_atomic_rmw_loop (AtomicRmwOp Type XReg XReg) XReg)
+(rule
+  (gen_atomic_rmw_loop op ty addr x)
+  (let
+    ((dst WritableXReg (temp_writable_xreg))
+      (t0 WritableXReg (temp_writable_xreg))
+      (_ Unit (emit (MInst.AtomicRmwLoop (gen_atomic_offset addr ty) op dst ty (gen_atomic_p addr ty) x t0))))
+    (writable_reg_to_reg dst)))
+
+;;;;;  Rules for `AtomicRmwOp.Nand`
+(rule
+  (lower
+    (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Nand) addr x)))
+    (gen_atomic_rmw_loop (AtomicRmwOp.Nand) ty addr x))
+
+(decl is_atomic_rmw_max_etc (AtomicRmwOp bool) AtomicRmwOp)
+(extern extractor is_atomic_rmw_max_etc is_atomic_rmw_max_etc)
+
+;;;;;  Rules for `atomic load`;;;;;;;;;;;;;;;;;
+(rule
+  (lower (has_type (valid_atomic_transaction ty) (atomic_load flags p)))
+  (gen_atomic_load p ty))
+
+
+;;;;;  Rules for `atomic store`;;;;;;;;;;;;;;;;;
+(rule
+  (lower (atomic_store flags src @ (value_type (valid_atomic_transaction ty)) p))
+  (gen_atomic_store p ty src))
+
+(decl gen_atomic_offset (XReg Type) XReg)
+(rule 1 (gen_atomic_offset p (fits_in_16 ty))
+  (rv_slli (rv_andi p (imm12_const 3)) (imm12_const 3)))
+
+(rule (gen_atomic_offset p _)
+  (zero_reg))
+
+(decl gen_atomic_p (XReg Type) XReg)
+(rule 1 (gen_atomic_p p (fits_in_16 ty))
+  (rv_andi p (imm12_const -4)))
+
+(rule (gen_atomic_p p _)
+  p)
+
+
+;;;;;  Rules for `atomic cas`;;;;;;;;;;;;;;;;;
+(rule
+  (lower (has_type (valid_atomic_transaction ty) (atomic_cas flags p e x)))
+  (let
+    ((t0 WritableReg (temp_writable_reg ty))
+      (dst WritableReg (temp_writable_reg ty))
+      (_ Unit (emit (MInst.AtomicCas (gen_atomic_offset p ty) t0 dst (zext e ty $I64) (gen_atomic_p p ty) x ty))))
+    (writable_reg_to_reg dst)))
+
+;;;;;  Rules for `ireduce`;;;;;;;;;;;;;;;;;
+(rule
+  (lower (has_type ty (ireduce x)))
+  (value_regs_get x 0))
+
+;;;;;  Rules for `fpromote`;;;;;;;;;;;;;;;;;
+(rule (lower (fpromote x))
+  (rv_fcvtds x))
+
+;;;;;  Rules for `fdemote`;;;;;;;;;;;;;;;;;;
+(rule (lower (fdemote x))
+  (rv_fcvtsd x))
+
+
+;;;;;  Rules for for float arithmetic
+
+
+;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_scalar_float ty) (fadd x y)))
+  (rv_fadd ty x y))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fadd x y)))
+  (rv_vfadd_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fadd x (splat y))))
+  (rv_vfadd_vf x y (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fadd (splat x) y)))
+  (rv_vfadd_vf y x (unmasked) ty))
+
+
+;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_scalar_float ty) (fsub x y)))
+  (rv_fsub ty x y))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fsub x y)))
+  (rv_vfsub_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fsub x (splat y))))
+  (rv_vfsub_vf x y (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fsub (splat x) y)))
+  (rv_vfrsub_vf y x (unmasked) ty))
+
+;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_scalar_float ty) (fmul x y)))
+  (rv_fmul ty x y))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmul x y)))
+  (rv_vfmul_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fmul x (splat y))))
+  (rv_vfmul_vf x y (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fmul (splat x) y)))
+  (rv_vfmul_vf y x (unmasked) ty))
+
+
+;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_scalar_float ty) (fdiv x y)))
+  (rv_fdiv ty x y))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fdiv x y)))
+  (rv_vfdiv_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fdiv x (splat y))))
+  (rv_vfdiv_vf x y (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fdiv (splat x) y)))
+  (rv_vfrdiv_vf y x (unmasked) ty))
+
+;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_scalar_float ty) (fmin x y)))
+  (gen_float_select (FloatSelectOP.Min) x y ty))
+
+;; vfmin does almost the right thing, but it does not handle NaN's correctly.
+;; We should return a NaN if any of the inputs is a NaN, but vfmin returns the
+;; number input instead.
+;;
+;; TODO: We can improve this by using a masked `fmin` instruction that modifies
+;; the canonical nan register. That way we could avoid the `vmerge.vv` instruction.
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmin x y)))
+  (let ((is_not_nan VReg (gen_fcmp_mask ty (FloatCC.Ordered) x y))
+        (nan XReg (imm $I64 (canonical_nan_u64 (lane_type ty))))
+        (vec_nan VReg (rv_vmv_vx nan ty))
+        (min VReg (rv_vfmin_vv x y (unmasked) ty)))
+    (rv_vmerge_vvm vec_nan min is_not_nan ty)))
+
+;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_scalar_float ty) (fmax x y)))
+  (gen_float_select (FloatSelectOP.Max) x y ty))
+
+;; vfmax does almost the right thing, but it does not handle NaN's correctly.
+;; We should return a NaN if any of the inputs is a NaN, but vfmax returns the
+;; number input instead.
+;;
+;; TODO: We can improve this by using a masked `fmax` instruction that modifies
+;; the canonical nan register. That way we could avoid the `vmerge.vv` instruction.
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmax x y)))
+  (let ((is_not_nan VReg (gen_fcmp_mask ty (FloatCC.Ordered) x y))
+        (nan XReg (imm $I64 (canonical_nan_u64 (lane_type ty))))
+        (vec_nan VReg (rv_vmv_vx nan ty))
+        (max VReg (rv_vfmax_vv x y (unmasked) ty)))
+    (rv_vmerge_vvm vec_nan max is_not_nan ty)))
+
+;;;;;  Rules for `stack_addr`;;;;;;;;;
+(rule
+  (lower (stack_addr ss offset))
+  (gen_stack_addr ss offset))
+
+;;;;;  Rules for `is_null`;;;;;;;;;
+
+;; Null references are represented by the constant value `0`.
+(rule (lower (is_null v))
+  (rv_seqz v))
+
+;;;;;  Rules for `is_invalid`;;;;;;;;;
+
+;; Invalid references are represented by the constant value `-1`.
+(rule (lower (is_invalid v))
+  (rv_seqz (rv_addi v (imm12_const 1))))
+
+;;;;;  Rules for `select`;;;;;;;;;
+(rule
+  (lower (has_type ty (select c @ (value_type cty) x y)))
+  (gen_select ty (truthy_to_reg cty (normalize_cmp_value cty c (ExtendOp.Zero))) x y))
+
+(rule 1
+  (lower (has_type (fits_in_64 ty) (select (icmp cc a b @ (value_type (fits_in_64 in_ty))) x y)))
+  (let ((a XReg (truthy_to_reg in_ty (normalize_cmp_value in_ty a (intcc_to_extend_op cc))))
+        (b XReg (truthy_to_reg in_ty (normalize_cmp_value in_ty b (intcc_to_extend_op cc)))))
+    (gen_select_reg cc a b x y)))
+
+;;;;;  Rules for `bitselect`;;;;;;;;;
+
+;; Do a (c & x) | (~c & y) operation.
+(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (bitselect c x y)))
+  (let ((tmp_x XReg (rv_and c x))
+        (c_inverse XReg (rv_not c))
+        (tmp_y XReg (rv_and c_inverse y)))
+    (rv_or tmp_x tmp_y)))
+
+;; For vectors, we also do the same operation.
+;; We can technically use any type in the bitwise operations, but prefer
+;; using the type of the inputs so that we avoid emitting unnecessary
+;; `vsetvl` instructions. It's likeley that the vector unit is already
+;; configured for that type.
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (bitselect c x y)))
+  (let ((tmp_x VReg (rv_vand_vv c x (unmasked) ty))
+        (c_inverse VReg (rv_vnot_v c (unmasked) ty))
+        (tmp_y VReg (rv_vand_vv c_inverse y (unmasked) ty)))
+    (rv_vor_vv tmp_x tmp_y (unmasked) ty)))
+
+;; Special case for bitselects with cmp's as an input.
+;;
+;; This allows us to skip the mask expansion step and use the more efficient
+;; vmerge.vvm instruction.
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (bitselect (icmp cc a @ (value_type (ty_vec_fits_in_register cmp_ty)) b) x y)))
+  (let ((mask VReg (gen_icmp_mask cmp_ty cc a b)))
+    (rv_vmerge_vvm y x mask ty)))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (bitselect (fcmp cc a @ (value_type (ty_vec_fits_in_register cmp_ty)) b) x y)))
+  (let ((mask VReg (gen_fcmp_mask cmp_ty cc a b)))
+    (rv_vmerge_vvm y x mask ty)))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (bitselect (bitcast _ (fcmp cc a @ (value_type (ty_vec_fits_in_register cmp_ty)) b)) x y)))
+  (let ((mask VReg (gen_fcmp_mask cmp_ty cc a b)))
+    (rv_vmerge_vvm y x mask ty)))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (bitselect (bitcast _ (icmp cc a @ (value_type (ty_vec_fits_in_register cmp_ty)) b)) x y)))
+  (let ((mask VReg (gen_icmp_mask cmp_ty cc a b)))
+    (rv_vmerge_vvm y x mask ty)))
+
+
+;;;;;  Rules for `isplit`;;;;;;;;;
+(rule
+  (lower (isplit x))
+  (let
+    ((t1 XReg (value_regs_get x 0))
+      (t2 XReg (value_regs_get x 1)))
+    (output_pair t1 t2)))
+
+;;;;;  Rules for `iconcat`;;;;;;;;;
+(rule
+  (lower (has_type $I128 (iconcat x y)))
+  (let
+    ((t1 XReg x)
+      (t2 XReg y))
+    (value_regs t1 t2)))
+
+
+;;;;;  Rules for `smax`;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_int ty)  (smax x y)))
+  (gen_int_select ty (IntSelectOP.Smax) (ext_int_if_need $true x ty) (ext_int_if_need $true y ty)))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (smax x y)))
+  (rv_vmax_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (smax x (splat y))))
+  (rv_vmax_vx x y (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (smax (splat x) y)))
+  (rv_vmax_vx y x (unmasked) ty))
+
+;;;;;  Rules for `smin`;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_int ty)  (smin x y)))
+  (gen_int_select ty (IntSelectOP.Smin) (ext_int_if_need $true x ty) (ext_int_if_need $true y ty)))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (smin x y)))
+  (rv_vmin_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (smin x (splat y))))
+  (rv_vmin_vx x y (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (smin (splat x) y)))
+  (rv_vmin_vx y x (unmasked) ty))
+
+;;;;;  Rules for `umax`;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_int ty)  (umax x y)))
+  (gen_int_select ty (IntSelectOP.Umax) (ext_int_if_need $false x ty) (ext_int_if_need $false y ty)))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (umax x y)))
+  (rv_vmaxu_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (umax x (splat y))))
+  (rv_vmaxu_vx x y (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (umax (splat x) y)))
+  (rv_vmaxu_vx y x (unmasked) ty))
+
+;;;;;  Rules for `umin`;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_int ty) (umin x y)))
+  (gen_int_select ty (IntSelectOP.Umin) (ext_int_if_need $false x ty) (ext_int_if_need $false y ty)))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (umin x y)))
+  (rv_vminu_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (umin x (splat y))))
+  (rv_vminu_vx x y (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (umin (splat x) y)))
+  (rv_vminu_vx y x (unmasked) ty))
+
+
+;;;;;  Rules for `debugtrap`;;;;;;;;;
+(rule
+  (lower (debugtrap))
+  (side_effect (SideEffectNoResult.Inst (MInst.EBreak))))
+
+;;;;;  Rules for `fence`;;;;;;;;;
+(rule
+  (lower (fence))
+  (side_effect (SideEffectNoResult.Inst (MInst.Fence 15 15))))
+
+;;;;;  Rules for `trap`;;;;;;;;;
+(rule
+  (lower (trap code))
+  (udf code))
+
+;;;;;  Rules for `resumable_trap`;;;;;;;;;
+(rule
+  (lower (resumable_trap code))
+  (udf code))
+
+;;;;;  Rules for `uload8`;;;;;;;;;
+(rule
+  (lower (uload8 flags p @ (value_type (ty_addr64 _)) offset))
+  (gen_load p offset (int_load_op $false 8) flags $I64))
+;;;;;  Rules for `sload8`;;;;;;;;;
+(rule
+  (lower (sload8 flags p @ (value_type (ty_addr64 _)) offset))
+  (gen_load p offset (int_load_op $true 8) flags $I64))
+;;;;;  Rules for `uload16`;;;;;;;;;
+(rule
+  (lower (uload16 flags p @ (value_type (ty_addr64 _)) offset))
+  (gen_load p offset (int_load_op $false 16) flags $I64))
+
+;;;;;  Rules for `iload16`;;;;;;;;;
+(rule
+  (lower (sload16 flags p @ (value_type (ty_addr64 _)) offset))
+  (gen_load p offset (int_load_op $true 16) flags $I64))
+
+;;;;;  Rules for `uload32`;;;;;;;;;
+(rule
+  (lower (uload32 flags p @ (value_type (ty_addr64 _)) offset))
+  (gen_load p offset (int_load_op $false 32) flags $I64))
+
+;;;;;  Rules for `iload32`;;;;;;;;;
+(rule
+  (lower (sload32 flags p @ (value_type (ty_addr64 _)) offset))
+  (gen_load p offset (int_load_op $true 32) flags $I64))
+
+(rule
+  (lower (has_type ty (load flags p @ (value_type (ty_addr32 _)) offset)))
+  (gen_load p offset (load_op ty) flags ty)
+)
+;;;; for I128
+(rule 1
+  (lower (has_type $I128 (load flags p @ (value_type (ty_addr64 _)) offset)))
+  (gen_load_128 p offset flags))
+
+(rule 2
+  (lower (has_type (ty_vec_fits_in_register ty) (load flags p @ (value_type (ty_addr64 _)) offset)))
+  (let ((eew VecElementWidth (element_width_from_type ty)))
+    (vec_load eew (VecAMode.UnitStride (gen_amode p offset $I64)) flags (unmasked) ty)))
+
+;;;;;  Rules for Load + Extend Combos ;;;;;;;;;
+
+;; These rules cover the special loads that load a 64bit value and do some sort of extension.
+;; We don't have any special instructions to do this, so just load the 64 bits as a vector, and
+;; do a SEW/2 extension. This only reads half width elements from the source vector register
+;; extends it, and writes the back the full register.
+
+(decl gen_load64_extend (Type ExtendOp MemFlags XReg Offset32) VReg)
+
+(rule (gen_load64_extend ty (ExtendOp.Signed) flags addr offset)
+  (let ((eew VecElementWidth (element_width_from_type $I64))
+        (load_state VState (vstate_from_type $I64))
+        (loaded VReg (vec_load eew (VecAMode.UnitStride (gen_amode addr offset $I64)) flags (unmasked) load_state)))
+    (rv_vsext_vf2 loaded (unmasked) ty)))
+
+(rule (gen_load64_extend ty (ExtendOp.Zero) flags addr offset)
+  (let ((eew VecElementWidth (element_width_from_type $I64))
+        (load_state VState (vstate_from_type $I64))
+        (loaded VReg (vec_load eew (VecAMode.UnitStride (gen_amode addr offset $I64)) flags (unmasked) load_state)))
+    (rv_vzext_vf2 loaded (unmasked) ty)))
+
+;;;;;  Rules for `uload8x8`;;;;;;;;;;
+(rule (lower (has_type (ty_vec_fits_in_register ty @ $I16X8) (uload8x8 flags addr @ (value_type (ty_addr64 _)) offset)))
+  (gen_load64_extend ty (ExtendOp.Zero) flags addr offset))
+
+;;;;;  Rules for `uload16x4`;;;;;;;;;
+(rule (lower (has_type (ty_vec_fits_in_register ty @ $I32X4) (uload16x4 flags addr @ (value_type (ty_addr64 _)) offset)))
+  (gen_load64_extend ty (ExtendOp.Zero) flags addr offset))
+
+;;;;;  Rules for `uload32x2`;;;;;;;;;
+(rule (lower (has_type (ty_vec_fits_in_register ty @ $I64X2) (uload32x2 flags addr @ (value_type (ty_addr64 _)) offset)))
+  (gen_load64_extend ty (ExtendOp.Zero) flags addr offset))
+
+;;;;;  Rules for `sload8x8`;;;;;;;;;;
+(rule (lower (has_type (ty_vec_fits_in_register ty @ $I16X8) (sload8x8 flags addr @ (value_type (ty_addr64 _)) offset)))
+  (gen_load64_extend ty (ExtendOp.Signed) flags addr offset))
+
+;;;;;  Rules for `sload16x4`;;;;;;;;;
+(rule (lower (has_type (ty_vec_fits_in_register ty @ $I32X4) (sload16x4 flags addr @ (value_type (ty_addr64 _)) offset)))
+  (gen_load64_extend ty (ExtendOp.Signed) flags addr offset))
+
+;;;;;  Rules for `sload32x2`;;;;;;;;;
+(rule (lower (has_type (ty_vec_fits_in_register ty @ $I64X2) (sload32x2 flags addr @ (value_type (ty_addr64 _)) offset)))
+  (gen_load64_extend ty (ExtendOp.Signed) flags addr offset))
+
+;;;;;  Rules for `istore8`;;;;;;;;;
+(rule
+  (lower (istore8 flags x p @ (value_type (ty_addr64 _)) offset))
+  (gen_store p offset (StoreOP.Sb) flags x))
+;;;;;  Rules for `istore16`;;;;;;;;;
+(rule
+  (lower (istore16 flags x p @ (value_type (ty_addr64 _)) offset))
+  (gen_store p offset (StoreOP.Sh) flags x))
+
+;;;;;  Rules for `istore32`;;;;;;;;;
+(rule
+  (lower (istore32 flags x p @ (value_type (ty_addr64 _)) offset))
+  (gen_store p offset (StoreOP.Sw) flags x))
+
+;;;;;  Rules for `store`;;;;;;;;;
+(rule
+  (lower (store flags x @ (value_type ty) p @ (value_type (ty_addr32 _)) offset))
+  (gen_store p offset (store_op ty) flags x))
+
+;;; special for I128
+(rule 1
+  (lower (store flags x @ (value_type $I128 ) p @ (value_type (ty_addr64 _)) offset))
+  (gen_store_128 p offset flags x))
+
+(rule 2
+  (lower (store flags x @ (value_type (ty_vec_fits_in_register ty)) p @ (value_type (ty_addr64 _)) offset))
+  (let ((eew VecElementWidth (element_width_from_type ty)))
+    (vec_store eew (VecAMode.UnitStride (gen_amode p offset $I64)) x flags (unmasked) ty)))
+
+(decl gen_icmp (IntCC ValueRegs ValueRegs Type) XReg)
+(rule
+  (gen_icmp cc x y ty)
+  (let
+    ((result WritableXReg (temp_writable_xreg))
+      (_ Unit (emit (MInst.Icmp cc result x y ty))))
+    result))
+
+;;;;;  Rules for `icmp`;;;;;;;;;
+(rule 0 (lower (icmp cc x @ (value_type (ty_int ty)) y))
+  (lower_icmp cc x y ty))
+
+(rule 1 (lower (icmp cc x @ (value_type (ty_vec_fits_in_register ty)) y))
+  (gen_expand_mask ty (gen_icmp_mask ty cc x y)))
+
+
+;;;;;  Rules for `fcmp`;;;;;;;;;
+(rule 0 (lower (fcmp cc x @ (value_type (ty_scalar_float ty)) y))
+  (cmp_value (emit_fcmp cc ty x y)))
+
+(rule 1 (lower (fcmp cc x @ (value_type (ty_vec_fits_in_register ty)) y))
+  (gen_expand_mask ty (gen_fcmp_mask ty cc x y)))
+
+;;;;;  Rules for `func_addr`;;;;;;;;;
+(rule
+  (lower (func_addr (func_ref_data _ name _)))
+  (load_ext_name name 0))
+
+;;;;;  Rules for `fcvt_to_uint`;;;;;;;;;
+(rule
+  (lower (has_type to (fcvt_to_uint v @ (value_type from))))
+  (gen_fcvt_int $false v $false from to))
+
+;;;;;  Rules for `fcvt_to_sint`;;;;;;;;;
+(rule
+  (lower (has_type to (fcvt_to_sint v @ (value_type from))))
+  (gen_fcvt_int $false v $true from to))
+
+;;;;;  Rules for `fcvt_to_sint_sat`;;;;;;;;;
+(rule
+  (lower (has_type to (fcvt_to_sint_sat v @ (value_type from))))
+  (gen_fcvt_int $true v $true from to))
+
+;;;;;  Rules for `fcvt_to_uint_sat`;;;;;;;;;
+(rule
+  (lower (has_type to (fcvt_to_uint_sat v @ (value_type from))))
+  (gen_fcvt_int $true v $false from to))
+
+;;;;;  Rules for `fcvt_from_sint`;;;;;;;;;
+(rule
+  (lower (has_type to (fcvt_from_sint v @ (value_type from_ty))))
+  (let ((float_op FpuOPRR (int_convert_2_float_op from_ty $true to))
+        (value XReg (normalize_fcvt_from_int v from_ty (ExtendOp.Signed))))
+    (fpu_rr float_op to value)))
+
+;;;;;  Rules for `fcvt_from_uint`;;;;;;;;;
+(rule
+  (lower (has_type to (fcvt_from_uint v @ (value_type from_ty))))
+  (let ((float_op FpuOPRR (int_convert_2_float_op from_ty $false to))
+        (value XReg (normalize_fcvt_from_int v from_ty (ExtendOp.Zero))))
+    (fpu_rr float_op to value)))
+
+;;;;;  Rules for `symbol_value`;;;;;;;;;
+(rule
+   (lower (symbol_value (symbol_value_data name _ offset)))
+   (load_ext_name name offset)
+)
+;;;;;  Rules for `bitcast`;;;;;;;;;
+(rule
+   (lower (has_type out_ty (bitcast _ v @ (value_type in_ty))))
+   (gen_bitcast v in_ty out_ty))
+
+;;;;;  Rules for `ceil`;;;;;;;;;
+(rule
+  (lower (has_type ty (ceil x)))
+  (gen_float_round (FloatRoundOP.Ceil) x ty)
+)
+
+;;;;;  Rules for `floor`;;;;;;;;;
+(rule
+  (lower (has_type ty (floor x)))
+  (gen_float_round (FloatRoundOP.Floor) x ty))
+;;;;;  Rules for `trunc`;;;;;;;;;
+(rule
+  (lower (has_type ty (trunc x)))
+  (gen_float_round (FloatRoundOP.Trunc) x ty))
+
+;;;;;  Rules for `nearest`;;;;;;;;;
+(rule
+  (lower (has_type ty (nearest x)))
+  (gen_float_round (FloatRoundOP.Nearest) x ty))
+
+
+;;;;;  Rules for `select_spectre_guard`;;;;;;;;;
+
+;; SelectSpectreGuard is equivalent to Select, but we should not use a branch based
+;; lowering for it. Instead we use a conditional move based lowering.
+;;
+;; We don't have cmov's in RISC-V either, but we can emulate those using bitwise
+;; operations, which is what we do below.
+(rule (lower (has_type ty (select_spectre_guard cmp @ (value_type cmp_ty) x @ (value_type arg_ty) y)))
+  (let (;; Build a mask that is 0 or -1 depending on the input comparision value.
+        ;; `lower_bmask` handles normalizing the input.
+        (mask ValueRegs (lower_bmask arg_ty cmp_ty cmp))
+        ;; Using the mask above we can select either `x` or `y` by
+        ;; performing a bitwise `and` on both sides and then merging them
+        ;; together. We know that only the bits of one of the sides will be selected.
+        ;; TODO: We can use `andn` here if we have `Zbb`
+        (lhs ValueRegs (gen_and arg_ty x mask))
+        (rhs ValueRegs (gen_and arg_ty y (gen_bnot arg_ty mask))))
+    (gen_or arg_ty lhs rhs)))
+
+;;;;;  Rules for `bmask`;;;;;;;;;
+(rule
+  (lower (has_type oty (bmask x @ (value_type ity))))
+  (lower_bmask oty ity x))
+
+;; N.B.: the Ret itself is generated by the ABI.
+(rule (lower (return args))
+      (lower_return args))
+
+;;; Rules for `get_{frame,stack}_pointer` and `get_return_address` ;;;;;;;;;;;;;
+
+(rule (lower (get_frame_pointer))
+  (gen_mov_from_preg (fp_reg)))
+
+(rule (lower (get_stack_pointer))
+  (gen_mov_from_preg (sp_reg)))
+
+(rule (lower (get_return_address))
+  (load_ra))
+
+;;; Rules for `iabs` ;;;;;;;;;;;;;
+
+;; I64 and lower
+;; Generate the following code:
+;;   sext.{b,h,w} a0, a0
+;;   neg a1, a0
+;;   max a0, a0, a1
+(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (iabs x)))
+  (let ((extended XReg (sext x ty $I64))
+        (negated XReg (rv_neg extended)))
+    (max $I64 extended negated)))
+
+;; For vectors we generate the same code, but with vector instructions
+;; we can skip the sign extension, since the vector unit will only process
+;; Element Sized chunks.
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (iabs x)))
+  (let ((negated VReg (rv_vneg_v x (unmasked) ty)))
+    (rv_vmax_vv x negated (unmasked) ty)))
+
+;;;; Rules for calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (call (func_ref_data sig_ref extname dist) inputs))
+  (gen_call sig_ref extname dist inputs))
+
+(rule (lower (call_indirect sig_ref val inputs))
+  (gen_call_indirect sig_ref val inputs))
+
+;;;; Rules for `return_call` and `return_call_indirect` ;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (return_call (func_ref_data sig_ref extname dist) args))
+      (gen_return_call sig_ref extname dist args))
+
+(rule (lower (return_call_indirect sig_ref callee args))
+      (gen_return_call_indirect sig_ref callee args))
+
+
+;;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (extractlane x @ (value_type ty) (u8_from_uimm8 idx)))
+  (gen_extractlane ty x idx))
+
+;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; We can insert a lane by using a masked splat from an X register.
+;; Build a mask that is only enabled in the lane we want to insert.
+;; Then use a masked splat (vmerge) to insert the value.
+(rule 0 (lower (insertlane vec @ (value_type (ty_vec_fits_in_register ty))
+                           val @ (value_type (ty_int _))
+                           (u8_from_uimm8 lane)))
+  (let ((mask VReg (gen_vec_mask (u64_shl 1 lane))))
+    (rv_vmerge_vxm vec val mask ty)))
+
+;; Similar to above, but using the float variants of the instructions.
+(rule 1 (lower (insertlane vec @ (value_type (ty_vec_fits_in_register ty))
+                           val @ (value_type (ty_scalar_float _))
+                           (u8_from_uimm8 lane)))
+  (let ((mask VReg (gen_vec_mask (u64_shl 1 lane))))
+    (rv_vfmerge_vfm vec val mask ty)))
+
+;; If we are inserting from an Imm5 const we can use the immediate
+;; variant of vmerge.
+(rule 2 (lower (insertlane vec @ (value_type (ty_vec_fits_in_register ty))
+                           (iconst (u64_from_imm64 (imm5_from_u64 imm)))
+                           (u8_from_uimm8 lane)))
+  (let ((mask VReg (gen_vec_mask (u64_shl 1 lane))))
+    (rv_vmerge_vim vec imm mask ty)))
+
+;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type ty (splat n @ (value_type (ty_scalar_float _)))))
+  (rv_vfmv_vf n ty))
+
+(rule 1 (lower (has_type ty (splat n @ (value_type (ty_int_ref_scalar_64 _)))))
+  (rv_vmv_vx n ty))
+
+(rule 2 (lower (has_type ty (splat (iconst (u64_from_imm64 (imm5_from_u64 imm))))))
+  (rv_vmv_vi imm ty))
+
+;; TODO: We can splat out more patterns by using for example a vmv.v.i i8x16 for
+;; a i64x2 const with a compatible bit pattern. The AArch64 Backend does something
+;; similar in its splat rules.
+;; TODO: Look through bitcasts when splatting out registers. We can use
+;; `vmv.v.x` in a `(splat.f32x4 (bitcast.f32 val))`. And vice versa for integers.
+
+;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat x y)))
+  (rv_vsaddu_vv x y (unmasked) ty))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat x (splat y))))
+  (rv_vsaddu_vx x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat (splat x) y)))
+  (rv_vsaddu_vx y x (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat x (replicated_imm5 y))))
+  (rv_vsaddu_vi x y (unmasked) ty))
+
+(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat (replicated_imm5 x) y)))
+  (rv_vsaddu_vi y x (unmasked) ty))
+
+;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat x y)))
+  (rv_vsadd_vv x y (unmasked) ty))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat x (splat y))))
+  (rv_vsadd_vx x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat (splat x) y)))
+  (rv_vsadd_vx y x (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat x (replicated_imm5 y))))
+  (rv_vsadd_vi x y (unmasked) ty))
+
+(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat (replicated_imm5 x) y)))
+  (rv_vsadd_vi y x (unmasked) ty))
+
+;;;; Rules for `usub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (usub_sat x y)))
+  (rv_vssubu_vv x y (unmasked) ty))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (usub_sat x (splat y))))
+  (rv_vssubu_vx x y (unmasked) ty))
+
+;;;; Rules for `ssub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (ssub_sat x y)))
+  (rv_vssub_vv x y (unmasked) ty))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (ssub_sat x (splat y))))
+  (rv_vssub_vx x y (unmasked) ty))
+
+;;;; Rules for `vall_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Here we do a Vector Reduce operation. Get the unsigned minimum value of any
+;; lane in the vector. The fixed input to the reduce operation is a 1.
+;; This way, if any lane is 0, the result will be 0. Otherwise, the result will
+;; be a 1.
+;; The reduce operation leaves the result in the lowest lane, we then move it
+;; into the destination X register.
+(rule (lower (vall_true x @ (value_type (ty_vec_fits_in_register ty))))
+  (if-let one (imm5_from_i8 1))
+  ;; We don't need to broadcast the immediate into all lanes, only into lane 0.
+  ;; I did it this way since it uses one less instruction than with a vmv.s.x.
+  (let ((fixed VReg (rv_vmv_vi one ty))
+        (min VReg (rv_vredminu_vs x fixed (unmasked) ty)))
+    (rv_vmv_xs min ty)))
+
+
+;;;; Rules for `vany_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Here we do a Vector Reduce operation. Get the unsigned maximum value of the
+;; input vector register. Move the max to an X register, and do a `snez` on it
+;; to ensure its either 1 or 0.
+(rule (lower (vany_true x @ (value_type (ty_vec_fits_in_register ty))))
+  (let ((max VReg (rv_vredmaxu_vs x x (unmasked) ty))
+        (x_max XReg (rv_vmv_xs max ty)))
+    (rv_snez x_max)))
+
+
+;;;; Rules for `vhigh_bits` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; To check if the MSB of a lane is set, we do a `vmslt` with zero, this sets
+;; the mask bit to 1 if the value is negative (MSB 1) and 0 if not. We can then
+;; just move that mask to an X Register.
+;;
+;; We must ensure that the move to the X register has a SEW with enough bits
+;; to hold the full mask. Additionally, in some cases (e.g. i64x2) we are going
+;; to read some tail bits. These are undefined, so we need to further mask them
+;; off.
+(rule (lower (vhigh_bits x @ (value_type (ty_vec_fits_in_register ty))))
+  (let ((mask VReg (rv_vmslt_vx x (zero_reg) (unmasked) ty))
+        ;; Here we only need I64X1, but emit an AVL of 2 since it
+        ;; saves one vector state change in the case of I64X2.
+        ;;
+        ;; TODO: For types that have more lanes than element bits, we can
+        ;; use the original type as a VState and avoid a state change.
+        (x_mask XReg (rv_vmv_xs mask (vstate_from_type $I64X2))))
+    (gen_andi x_mask (ty_lane_mask ty))))
+
+;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (swizzle x y)))
+  (rv_vrgather_vv x y (unmasked) ty))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (swizzle x (splat y))))
+  (rv_vrgather_vx x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (swizzle x (replicated_uimm5 y))))
+  (rv_vrgather_vi x y (unmasked) ty))
+
+;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Use a vrgather to load all 0-15 lanes from x. And then modify the mask to load all
+;; 16-31 lanes from y. Finally, use a vor to combine the two vectors.
+;;
+;; vrgather will insert a 0 for lanes that are out of bounds, so we can let it load
+;; negative and out of bounds indexes.
+(rule (lower (has_type (ty_vec_fits_in_register ty @ $I8X16) (shuffle x y (vconst_from_immediate mask))))
+  (if-let neg16 (imm5_from_i8 -16))
+  (let ((x_mask VReg (gen_constant ty mask))
+        (x_lanes VReg (rv_vrgather_vv x x_mask (unmasked) ty))
+        (y_mask VReg (rv_vadd_vi x_mask neg16 (unmasked) ty))
+        (y_lanes VReg (rv_vrgather_vv y y_mask (unmasked) ty)))
+    (rv_vor_vv x_lanes y_lanes (unmasked) ty)))
+
+;;;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Slide down half the vector, and do a signed extension.
+(rule 0 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_high x @ (value_type in_ty))))
+  (rv_vsext_vf2 (gen_slidedown_half in_ty x) (unmasked) out_ty))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_high (swiden_high x @ (value_type in_ty)))))
+  (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty)))
+  (rv_vsext_vf4 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_high (swiden_high (swiden_high x @ (value_type in_ty))))))
+  (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty)))
+  (rv_vsext_vf8 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty))
+
+;;;; Rules for `uwiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Slide down half the vector, and do a zero extension.
+(rule 0 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_high x @ (value_type in_ty))))
+  (rv_vzext_vf2 (gen_slidedown_half in_ty x) (unmasked) out_ty))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_high (uwiden_high x @ (value_type in_ty)))))
+  (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty)))
+  (rv_vzext_vf4 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_high (uwiden_high (uwiden_high x @ (value_type in_ty))))))
+  (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty)))
+  (rv_vzext_vf8 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty))
+
+;;;; Rules for `swiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_low x)))
+  (rv_vsext_vf2 x (unmasked) out_ty))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_low (swiden_low x))))
+  (rv_vsext_vf4 x (unmasked) out_ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register out_ty) (swiden_low (swiden_low (swiden_low x)))))
+  (rv_vsext_vf8 x (unmasked) out_ty))
+
+;;;; Rules for `uwiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_low x)))
+  (rv_vzext_vf2 x (unmasked) out_ty))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_low (uwiden_low x))))
+  (rv_vzext_vf4 x (unmasked) out_ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register out_ty) (uwiden_low (uwiden_low (uwiden_low x)))))
+  (rv_vzext_vf8 x (unmasked) out_ty))
+
+;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; We don't have a dedicated instruction for this, rearrange the register elements
+;; and use a vadd.
+;;
+;; We do this by building two masks, one for the even elements and one for the odd
+;; elements. Using vcompress we can extract the elements and group them together.
+;;
+;; This is likely not the optimal way of doing this. LLVM does this using a bunch
+;; of vrgathers (See: https://godbolt.org/z/jq8Wj8WG4), that doesen't seem to be
+;; too much better than this.
+;;
+;; However V8 does something better. They use 2 vcompresses using LMUL2, that means
+;; that they can do the whole thing in 3 instructions (2 vcompress + vadd). We don't
+;; support LMUL > 1, so we can't do that.
+(rule (lower (has_type (ty_vec_fits_in_register ty) (iadd_pairwise x y)))
+  (if-let half_size (u64_to_uimm5 (u64_udiv (ty_lane_count ty) 2)))
+  (let ((odd_mask  VReg (gen_vec_mask 0x5555555555555555))
+        (lhs_lo VReg (rv_vcompress_vm x odd_mask ty))
+        (lhs_hi VReg (rv_vcompress_vm y odd_mask ty))
+        (lhs VReg (rv_vslideup_vvi lhs_lo lhs_hi half_size (unmasked) ty))
+
+        (even_mask VReg (gen_vec_mask 0xAAAAAAAAAAAAAAAA))
+        (rhs_lo VReg (rv_vcompress_vm x even_mask ty))
+        (rhs_hi VReg (rv_vcompress_vm y even_mask ty))
+        (rhs VReg (rv_vslideup_vvi rhs_lo rhs_hi half_size (unmasked) ty)))
+    (rv_vadd_vv lhs rhs (unmasked) ty)))
+
+;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `avg_round` computes the unsigned average with rounding: a := (x + y + 1) // 2
+;;
+;; See Section "2–5 Average of Two Integers" of the Hacker's Delight book
+;;
+;; The floor average of two integers without overflow can be computed as:
+;;     t = (x & y) + ((x ^ y) >> 1)
+;;
+;; The right shift should be a logical shift if the integers are unsigned.
+;;
+;; We are however interested in the ceiling average (x + y + 1). For that
+;; we use a special rounding mode in the right shift instruction.
+;;
+;; For the right shift instruction we use `vssrl` which is a Scaling Shift
+;; Right Logical instruction using the `vxrm` fixed-point rouding mode. The
+;; default rounding mode is `rnu` (round-to-nearest-up (add +0.5 LSB)).
+;; Which is coincidentally the rounding mode we want for `avg_round`.
+(rule (lower (has_type (ty_vec_fits_in_register ty) (avg_round x y)))
+  (if-let one (u64_to_uimm5 1))
+  (let ((lhs VReg (rv_vand_vv x y (unmasked) ty))
+        (xor  VReg (rv_vxor_vv x y (unmasked) ty))
+        (rhs VReg (rv_vssrl_vi xor one (unmasked) ty)))
+    (rv_vadd_vv lhs rhs (unmasked) ty)))
+
+;;;; Rules for `scalar_to_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (scalar_to_vector x)))
+  (if (ty_vector_not_float ty))
+  (let ((zero VReg (rv_vmv_vx (zero_reg) ty))
+        (mask VReg (gen_vec_mask 1)))
+    (rv_vmerge_vxm zero x mask ty)))
+
+(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (scalar_to_vector x)))
+  (if (ty_vector_float ty))
+  (let ((zero VReg (rv_vmv_vx (zero_reg) ty))
+        (elem VReg (rv_vfmv_sf x ty))
+        (mask VReg (gen_vec_mask 1)))
+    (rv_vmerge_vvm zero elem mask ty)))
+
+;;;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (sqmul_round_sat x y)))
+  (rv_vsmul_vv x y (unmasked) ty))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (sqmul_round_sat x (splat y))))
+  (rv_vsmul_vx x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (sqmul_round_sat (splat x) y)))
+  (rv_vsmul_vx y x (unmasked) ty))
+
+;;;; Rules for `snarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (ty_vec_fits_in_register out_ty) (snarrow x @ (value_type in_ty) y)))
+  (if-let lane_diff (u64_to_uimm5 (u64_udiv (ty_lane_count out_ty) 2)))
+  (if-let zero (u64_to_uimm5 0))
+  (let ((x_clip VReg (rv_vnclip_wi x zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty))))
+        (y_clip VReg (rv_vnclip_wi y zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty)))))
+    (rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty)))
+
+;;;; Rules for `uunarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (ty_vec_fits_in_register out_ty) (uunarrow x @ (value_type in_ty) y)))
+  (if-let lane_diff (u64_to_uimm5 (u64_udiv (ty_lane_count out_ty) 2)))
+  (if-let zero (u64_to_uimm5 0))
+  (let ((x_clip VReg (rv_vnclipu_wi x zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty))))
+        (y_clip VReg (rv_vnclipu_wi y zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty)))))
+    (rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty)))
+
+;;;; Rules for `unarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; We don't have a instruction that saturates a signed source into an unsigned destination.
+;; To correct for this we just remove negative values using `vmax` and then use the normal
+;; unsigned to unsigned narrowing instruction.
+
+(rule (lower (has_type (ty_vec_fits_in_register out_ty) (unarrow x @ (value_type in_ty) y)))
+  (if-let lane_diff (u64_to_uimm5 (u64_udiv (ty_lane_count out_ty) 2)))
+  (if-let zero (u64_to_uimm5 0))
+  (let ((x_pos VReg (rv_vmax_vx x (zero_reg) (unmasked) in_ty))
+        (y_pos VReg (rv_vmax_vx y (zero_reg) (unmasked) in_ty))
+        (x_clip VReg (rv_vnclipu_wi x_pos zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty))))
+        (y_clip VReg (rv_vnclipu_wi y_pos zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty)))))
+    (rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty)))
diff --git a/cranelift/codegen/src/isa/zkasm/lower.rs b/cranelift/codegen/src/isa/zkasm/lower.rs
new file mode 100644
index 000000000000..384fba864596
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/lower.rs
@@ -0,0 +1,33 @@
+//! Lowering rules for Riscv64.
+use crate::ir::Inst as IRInst;
+use crate::isa::zkasm::inst::*;
+use crate::isa::zkasm::Riscv64Backend;
+use crate::machinst::lower::*;
+use crate::machinst::*;
+pub mod isle;
+
+//=============================================================================
+// Lowering-backend trait implementation.
+
+impl LowerBackend for Riscv64Backend {
+    type MInst = Inst;
+
+    fn lower(&self, ctx: &mut Lower<Inst>, ir_inst: IRInst) -> Option<InstOutput> {
+        isle::lower(ctx, self, ir_inst)
+    }
+
+    fn lower_branch(
+        &self,
+        ctx: &mut Lower<Inst>,
+        ir_inst: IRInst,
+        targets: &[MachLabel],
+    ) -> Option<()> {
+        isle::lower_branch(ctx, self, ir_inst, targets)
+    }
+
+    fn maybe_pinned_reg(&self) -> Option<Reg> {
+        // pinned register is a register that you want put anything in it.
+        // right now zkasm not support this feature.
+        None
+    }
+}
diff --git a/cranelift/codegen/src/isa/zkasm/lower/isle.rs b/cranelift/codegen/src/isa/zkasm/lower/isle.rs
new file mode 100644
index 000000000000..cd8cab87b477
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/lower/isle.rs
@@ -0,0 +1,623 @@
+//! ISLE integration glue code for zkasm lowering.
+
+// Pull in the ISLE generated code.
+#[allow(unused)]
+pub mod generated_code;
+use generated_code::{Context, ExtendOp, MInst};
+
+// Types that the generated ISLE code uses via `use super::*`.
+use self::generated_code::{VecAluOpRR, VecLmul};
+use super::{writable_zero_reg, zero_reg};
+use crate::isa::zkasm::abi::Riscv64ABICallSite;
+use crate::isa::zkasm::lower::args::{FReg, VReg, WritableFReg, WritableVReg, WritableXReg, XReg};
+use crate::isa::zkasm::Riscv64Backend;
+use crate::machinst::Reg;
+use crate::machinst::{isle::*, MachInst, SmallInstVec};
+use crate::machinst::{VCodeConstant, VCodeConstantData};
+use crate::{
+    ir::{
+        immediates::*, types::*, AtomicRmwOp, BlockCall, ExternalName, Inst, InstructionData,
+        MemFlags, StackSlot, TrapCode, Value, ValueList,
+    },
+    isa::zkasm::inst::*,
+    machinst::{ArgPair, InstOutput, Lower},
+};
+use crate::{isa, isle_common_prelude_methods, isle_lower_prelude_methods};
+use regalloc2::PReg;
+use std::boxed::Box;
+use std::convert::TryFrom;
+use std::vec::Vec;
+
+type BoxCallInfo = Box<CallInfo>;
+type BoxCallIndInfo = Box<CallIndInfo>;
+type BoxReturnCallInfo = Box<ReturnCallInfo>;
+type BoxExternalName = Box<ExternalName>;
+type VecMachLabel = Vec<MachLabel>;
+type VecArgPair = Vec<ArgPair>;
+use crate::machinst::valueregs;
+
+pub(crate) struct RV64IsleContext<'a, 'b, I, B>
+where
+    I: VCodeInst,
+    B: LowerBackend,
+{
+    pub lower_ctx: &'a mut Lower<'b, I>,
+    pub backend: &'a B,
+    /// Precalucated value for the minimum vector register size. Will be 0 if
+    /// vectors are not supported.
+    min_vec_reg_size: u64,
+}
+
+impl<'a, 'b> RV64IsleContext<'a, 'b, MInst, Riscv64Backend> {
+    isle_prelude_method_helpers!(Riscv64ABICallSite);
+
+    fn new(lower_ctx: &'a mut Lower<'b, MInst>, backend: &'a Riscv64Backend) -> Self {
+        Self {
+            lower_ctx,
+            backend,
+            min_vec_reg_size: backend.isa_flags.min_vec_reg_size(),
+        }
+    }
+
+    #[inline]
+    fn emit_list(&mut self, list: &SmallInstVec<MInst>) {
+        for i in list {
+            self.lower_ctx.emit(i.clone());
+        }
+    }
+}
+
+impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> {
+    isle_lower_prelude_methods!();
+    isle_prelude_caller_methods!(Riscv64MachineDeps, Riscv64ABICallSite);
+
+    fn gen_return_call(
+        &mut self,
+        callee_sig: SigRef,
+        callee: ExternalName,
+        distance: RelocDistance,
+        args: ValueSlice,
+    ) -> InstOutput {
+        let caller_conv = isa::CallConv::Tail;
+        debug_assert_eq!(
+            self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()),
+            caller_conv,
+            "Can only do `return_call`s from within a `tail` calling convention function"
+        );
+
+        let call_site = Riscv64ABICallSite::from_func(
+            self.lower_ctx.sigs(),
+            callee_sig,
+            &callee,
+            distance,
+            caller_conv,
+            self.backend.flags().clone(),
+        );
+        call_site.emit_return_call(self.lower_ctx, args);
+
+        InstOutput::new()
+    }
+
+    fn gen_return_call_indirect(
+        &mut self,
+        callee_sig: SigRef,
+        callee: Value,
+        args: ValueSlice,
+    ) -> InstOutput {
+        let caller_conv = isa::CallConv::Tail;
+        debug_assert_eq!(
+            self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()),
+            caller_conv,
+            "Can only do `return_call`s from within a `tail` calling convention function"
+        );
+
+        let callee = self.put_in_reg(callee);
+
+        let call_site = Riscv64ABICallSite::from_ptr(
+            self.lower_ctx.sigs(),
+            callee_sig,
+            callee,
+            Opcode::ReturnCallIndirect,
+            caller_conv,
+            self.backend.flags().clone(),
+        );
+        call_site.emit_return_call(self.lower_ctx, args);
+
+        InstOutput::new()
+    }
+
+    fn vreg_new(&mut self, r: Reg) -> VReg {
+        VReg::new(r).unwrap()
+    }
+    fn writable_vreg_new(&mut self, r: WritableReg) -> WritableVReg {
+        r.map(|wr| VReg::new(wr).unwrap())
+    }
+    fn writable_vreg_to_vreg(&mut self, arg0: WritableVReg) -> VReg {
+        arg0.to_reg()
+    }
+    fn writable_vreg_to_writable_reg(&mut self, arg0: WritableVReg) -> WritableReg {
+        arg0.map(|vr| vr.to_reg())
+    }
+    fn vreg_to_reg(&mut self, arg0: VReg) -> Reg {
+        *arg0
+    }
+    fn xreg_new(&mut self, r: Reg) -> XReg {
+        XReg::new(r).unwrap()
+    }
+    fn writable_xreg_new(&mut self, r: WritableReg) -> WritableXReg {
+        r.map(|wr| XReg::new(wr).unwrap())
+    }
+    fn writable_xreg_to_xreg(&mut self, arg0: WritableXReg) -> XReg {
+        arg0.to_reg()
+    }
+    fn writable_xreg_to_writable_reg(&mut self, arg0: WritableXReg) -> WritableReg {
+        arg0.map(|xr| xr.to_reg())
+    }
+    fn xreg_to_reg(&mut self, arg0: XReg) -> Reg {
+        *arg0
+    }
+    fn freg_new(&mut self, r: Reg) -> FReg {
+        FReg::new(r).unwrap()
+    }
+    fn writable_freg_new(&mut self, r: WritableReg) -> WritableFReg {
+        r.map(|wr| FReg::new(wr).unwrap())
+    }
+    fn writable_freg_to_freg(&mut self, arg0: WritableFReg) -> FReg {
+        arg0.to_reg()
+    }
+    fn writable_freg_to_writable_reg(&mut self, arg0: WritableFReg) -> WritableReg {
+        arg0.map(|fr| fr.to_reg())
+    }
+    fn freg_to_reg(&mut self, arg0: FReg) -> Reg {
+        *arg0
+    }
+
+    fn vec_writable_to_regs(&mut self, val: &VecWritableReg) -> ValueRegs {
+        match val.len() {
+            1 => ValueRegs::one(val[0].to_reg()),
+            2 => ValueRegs::two(val[0].to_reg(), val[1].to_reg()),
+            _ => unreachable!(),
+        }
+    }
+    fn intcc_to_extend_op(&mut self, cc: &IntCC) -> ExtendOp {
+        use IntCC::*;
+        match *cc {
+            Equal
+            | NotEqual
+            | UnsignedLessThan
+            | UnsignedGreaterThanOrEqual
+            | UnsignedGreaterThan
+            | UnsignedLessThanOrEqual => ExtendOp::Zero,
+
+            SignedLessThan
+            | SignedGreaterThanOrEqual
+            | SignedGreaterThan
+            | SignedLessThanOrEqual => ExtendOp::Signed,
+        }
+    }
+    fn lower_cond_br(
+        &mut self,
+        cc: &IntCC,
+        a: ValueRegs,
+        targets: &VecMachLabel,
+        ty: Type,
+    ) -> Unit {
+        MInst::lower_br_icmp(
+            *cc,
+            a,
+            self.int_zero_reg(ty),
+            BranchTarget::Label(targets[0]),
+            BranchTarget::Label(targets[1]),
+            ty,
+        )
+        .iter()
+        .for_each(|i| self.emit(i));
+    }
+    fn lower_br_icmp(
+        &mut self,
+        cc: &IntCC,
+        a: ValueRegs,
+        b: ValueRegs,
+        targets: &VecMachLabel,
+        ty: Type,
+    ) -> Unit {
+        let test = generated_code::constructor_lower_icmp(self, cc, a, b, ty);
+        self.emit(&MInst::CondBr {
+            taken: BranchTarget::Label(targets[0]),
+            not_taken: BranchTarget::Label(targets[1]),
+            kind: IntegerCompare {
+                kind: IntCC::NotEqual,
+                rs1: test,
+                rs2: zero_reg(),
+            },
+        });
+    }
+    fn load_ra(&mut self) -> Reg {
+        if self.backend.flags.preserve_frame_pointers() {
+            let tmp = self.temp_writable_reg(I64);
+            self.emit(&MInst::Load {
+                rd: tmp,
+                op: LoadOP::Ld,
+                flags: MemFlags::trusted(),
+                from: AMode::FPOffset(8, I64),
+            });
+            tmp.to_reg()
+        } else {
+            link_reg()
+        }
+    }
+    fn int_zero_reg(&mut self, ty: Type) -> ValueRegs {
+        assert!(ty.is_int(), "{:?}", ty);
+        if ty.bits() == 128 {
+            ValueRegs::two(self.zero_reg(), self.zero_reg())
+        } else {
+            ValueRegs::one(self.zero_reg())
+        }
+    }
+
+    fn vec_label_get(&mut self, val: &VecMachLabel, x: u8) -> MachLabel {
+        val[x as usize]
+    }
+
+    fn label_to_br_target(&mut self, label: MachLabel) -> BranchTarget {
+        BranchTarget::Label(label)
+    }
+
+    fn vec_writable_clone(&mut self, v: &VecWritableReg) -> VecWritableReg {
+        v.clone()
+    }
+
+    fn imm12_and(&mut self, imm: Imm12, x: u64) -> Imm12 {
+        Imm12::from_bits(imm.as_i16() & (x as i16))
+    }
+
+    fn alloc_vec_writable(&mut self, ty: Type) -> VecWritableReg {
+        if ty.is_int() || ty == R32 || ty == R64 {
+            if ty.bits() <= 64 {
+                vec![self.temp_writable_reg(I64)]
+            } else {
+                vec![self.temp_writable_reg(I64), self.temp_writable_reg(I64)]
+            }
+        } else if ty.is_float() || ty.is_vector() {
+            vec![self.temp_writable_reg(ty)]
+        } else {
+            unimplemented!("ty:{:?}", ty)
+        }
+    }
+
+    fn imm(&mut self, ty: Type, val: u64) -> Reg {
+        let tmp = self.temp_writable_reg(ty);
+        let alloc_tmp = &mut |ty| self.temp_writable_reg(ty);
+        let insts = match ty {
+            F32 => MInst::load_fp_constant32(tmp, val as u32, alloc_tmp),
+            F64 => MInst::load_fp_constant64(tmp, val, alloc_tmp),
+            _ => MInst::load_constant_u64(tmp, val, alloc_tmp),
+        };
+        self.emit_list(&insts);
+        tmp.to_reg()
+    }
+    #[inline]
+    fn emit(&mut self, arg0: &MInst) -> Unit {
+        self.lower_ctx.emit(arg0.clone());
+    }
+    #[inline]
+    fn imm12_from_u64(&mut self, arg0: u64) -> Option<Imm12> {
+        Imm12::maybe_from_u64(arg0)
+    }
+
+    #[inline]
+    fn imm32_from_u64(&mut self, arg0: u64) -> Option<Imm32> {
+        Imm32::maybe_from_u64(arg0)
+    }
+    #[inline]
+    fn imm5_from_u64(&mut self, arg0: u64) -> Option<Imm5> {
+        Imm5::maybe_from_i8(i8::try_from(arg0 as i64).ok()?)
+    }
+    #[inline]
+    fn imm5_from_i8(&mut self, arg0: i8) -> Option<Imm5> {
+        Imm5::maybe_from_i8(arg0)
+    }
+    #[inline]
+    fn uimm5_bitcast_to_imm5(&mut self, arg0: UImm5) -> Imm5 {
+        Imm5::from_bits(arg0.bits() as u8)
+    }
+    #[inline]
+    fn uimm5_from_u8(&mut self, arg0: u8) -> Option<UImm5> {
+        UImm5::maybe_from_u8(arg0)
+    }
+    #[inline]
+    fn uimm5_from_u64(&mut self, arg0: u64) -> Option<UImm5> {
+        arg0.try_into().ok().and_then(UImm5::maybe_from_u8)
+    }
+    #[inline]
+    fn writable_zero_reg(&mut self) -> WritableReg {
+        writable_zero_reg()
+    }
+    #[inline]
+    fn neg_imm12(&mut self, arg0: Imm12) -> Imm12 {
+        -arg0
+    }
+    #[inline]
+    fn zero_reg(&mut self) -> Reg {
+        zero_reg()
+    }
+    #[inline]
+    fn imm_from_bits(&mut self, val: u64) -> Imm12 {
+        Imm12::maybe_from_u64(val).unwrap()
+    }
+    #[inline]
+    fn imm_from_neg_bits(&mut self, val: i64) -> Imm12 {
+        Imm12::maybe_from_u64(val as u64).unwrap()
+    }
+
+    fn gen_default_frm(&mut self) -> OptionFloatRoundingMode {
+        None
+    }
+    fn gen_select_reg(&mut self, cc: &IntCC, a: XReg, b: XReg, rs1: Reg, rs2: Reg) -> Reg {
+        let rd = self.temp_writable_reg(MInst::canonical_type_for_rc(rs1.class()));
+        self.emit(&MInst::SelectReg {
+            rd,
+            rs1,
+            rs2,
+            condition: IntegerCompare {
+                kind: *cc,
+                rs1: a.to_reg(),
+                rs2: b.to_reg(),
+            },
+        });
+        rd.to_reg()
+    }
+    fn load_u64_constant(&mut self, val: u64) -> Reg {
+        let rd = self.temp_writable_reg(I64);
+        MInst::load_constant_u64(rd, val, &mut |ty| self.temp_writable_reg(ty))
+            .iter()
+            .for_each(|i| self.emit(i));
+        rd.to_reg()
+    }
+    fn u8_as_i32(&mut self, x: u8) -> i32 {
+        x as i32
+    }
+
+    fn imm12_const(&mut self, val: i32) -> Imm12 {
+        if let Some(res) = Imm12::maybe_from_u64(val as u64) {
+            res
+        } else {
+            panic!("Unable to make an Imm12 value from {}", val)
+        }
+    }
+    fn imm12_const_add(&mut self, val: i32, add: i32) -> Imm12 {
+        Imm12::maybe_from_u64((val + add) as u64).unwrap()
+    }
+
+    //
+    fn gen_shamt(&mut self, ty: Type, shamt: XReg) -> ValueRegs {
+        let ty_bits = if ty.bits() > 64 { 64 } else { ty.bits() };
+        let shamt = {
+            let tmp = self.temp_writable_reg(I64);
+            self.emit(&MInst::AluRRImm12 {
+                alu_op: AluOPRRI::Andi,
+                rd: tmp,
+                rs: shamt.to_reg(),
+                imm12: Imm12::from_bits((ty_bits - 1) as i16),
+            });
+            tmp.to_reg()
+        };
+        let len_sub_shamt = {
+            let tmp = self.temp_writable_reg(I64);
+            self.emit(&MInst::load_imm12(tmp, Imm12::from_bits(ty_bits as i16)));
+            let len_sub_shamt = self.temp_writable_reg(I64);
+            self.emit(&MInst::AluRRR {
+                alu_op: AluOPRRR::Sub,
+                rd: len_sub_shamt,
+                rs1: tmp.to_reg(),
+                rs2: shamt,
+            });
+            len_sub_shamt.to_reg()
+        };
+        ValueRegs::two(shamt, len_sub_shamt)
+    }
+
+    fn has_v(&mut self) -> bool {
+        self.backend.isa_flags.has_v()
+    }
+
+    fn has_zbkb(&mut self) -> bool {
+        self.backend.isa_flags.has_zbkb()
+    }
+
+    fn has_zba(&mut self) -> bool {
+        self.backend.isa_flags.has_zba()
+    }
+
+    fn has_zbb(&mut self) -> bool {
+        self.backend.isa_flags.has_zbb()
+    }
+
+    fn has_zbc(&mut self) -> bool {
+        self.backend.isa_flags.has_zbc()
+    }
+
+    fn has_zbs(&mut self) -> bool {
+        self.backend.isa_flags.has_zbs()
+    }
+
+    fn offset32_imm(&mut self, offset: i32) -> Offset32 {
+        Offset32::new(offset)
+    }
+    fn default_memflags(&mut self) -> MemFlags {
+        MemFlags::new()
+    }
+
+    fn pack_float_rounding_mode(&mut self, f: &FRM) -> OptionFloatRoundingMode {
+        Some(*f)
+    }
+
+    fn int_convert_2_float_op(&mut self, from: Type, is_signed: bool, to: Type) -> FpuOPRR {
+        FpuOPRR::int_convert_2_float_op(from, is_signed, to)
+    }
+
+    fn gen_amode(&mut self, base: Reg, offset: Offset32, ty: Type) -> AMode {
+        AMode::RegOffset(base, i64::from(offset), ty)
+    }
+
+    fn gen_const_amode(&mut self, c: VCodeConstant) -> AMode {
+        AMode::Const(c)
+    }
+
+    fn valid_atomic_transaction(&mut self, ty: Type) -> Option<Type> {
+        if ty.is_int() && ty.bits() <= 64 {
+            Some(ty)
+        } else {
+            None
+        }
+    }
+    fn is_atomic_rmw_max_etc(&mut self, op: &AtomicRmwOp) -> Option<(AtomicRmwOp, bool)> {
+        let op = *op;
+        match op {
+            crate::ir::AtomicRmwOp::Umin => Some((op, false)),
+            crate::ir::AtomicRmwOp::Umax => Some((op, false)),
+            crate::ir::AtomicRmwOp::Smin => Some((op, true)),
+            crate::ir::AtomicRmwOp::Smax => Some((op, true)),
+            _ => None,
+        }
+    }
+    fn load_op(&mut self, ty: Type) -> LoadOP {
+        LoadOP::from_type(ty)
+    }
+    fn store_op(&mut self, ty: Type) -> StoreOP {
+        StoreOP::from_type(ty)
+    }
+    fn load_ext_name(&mut self, name: ExternalName, offset: i64) -> Reg {
+        let tmp = self.temp_writable_reg(I64);
+        self.emit(&MInst::LoadExtName {
+            rd: tmp,
+            name: Box::new(name),
+            offset,
+        });
+        tmp.to_reg()
+    }
+
+    fn offset32_add(&mut self, a: Offset32, adden: i64) -> Offset32 {
+        a.try_add_i64(adden).expect("offset exceed range.")
+    }
+
+    fn gen_stack_addr(&mut self, slot: StackSlot, offset: Offset32) -> Reg {
+        let result = self.temp_writable_reg(I64);
+        let i = self
+            .lower_ctx
+            .abi()
+            .sized_stackslot_addr(slot, i64::from(offset) as u32, result);
+        self.emit(&i);
+        result.to_reg()
+    }
+    fn atomic_amo(&mut self) -> AMO {
+        AMO::SeqCst
+    }
+
+    fn lower_br_table(&mut self, index: Reg, targets: &VecMachLabel) -> Unit {
+        let tmp1 = self.temp_writable_reg(I64);
+        let tmp2 = self.temp_writable_reg(I64);
+        let targets: Vec<BranchTarget> = targets
+            .into_iter()
+            .copied()
+            .map(BranchTarget::Label)
+            .collect();
+        self.emit(&MInst::BrTable {
+            index,
+            tmp1,
+            tmp2,
+            targets,
+        });
+    }
+
+    fn fp_reg(&mut self) -> PReg {
+        px_reg(8)
+    }
+
+    fn sp_reg(&mut self) -> PReg {
+        px_reg(2)
+    }
+
+    fn shift_int_to_most_significant(&mut self, v: XReg, ty: Type) -> XReg {
+        assert!(ty.is_int() && ty.bits() <= 64);
+        if ty == I64 {
+            return v;
+        }
+        let tmp = self.temp_writable_reg(I64);
+        self.emit(&MInst::AluRRImm12 {
+            alu_op: AluOPRRI::Slli,
+            rd: tmp,
+            rs: v.to_reg(),
+            imm12: Imm12::from_bits((64 - ty.bits()) as i16),
+        });
+
+        self.xreg_new(tmp.to_reg())
+    }
+
+    #[inline]
+    fn int_compare(&mut self, kind: &IntCC, rs1: XReg, rs2: XReg) -> IntegerCompare {
+        IntegerCompare {
+            kind: *kind,
+            rs1: rs1.to_reg(),
+            rs2: rs2.to_reg(),
+        }
+    }
+
+    #[inline]
+    fn vstate_from_type(&mut self, ty: Type) -> VState {
+        VState::from_type(ty)
+    }
+
+    #[inline]
+    fn vstate_mf2(&mut self, vs: VState) -> VState {
+        VState {
+            vtype: VType {
+                lmul: VecLmul::LmulF2,
+                ..vs.vtype
+            },
+            ..vs
+        }
+    }
+
+    fn min_vec_reg_size(&mut self) -> u64 {
+        self.min_vec_reg_size
+    }
+
+    #[inline]
+    fn ty_vec_fits_in_register(&mut self, ty: Type) -> Option<Type> {
+        if ty.is_vector() && (ty.bits() as u64) <= self.min_vec_reg_size() {
+            Some(ty)
+        } else {
+            None
+        }
+    }
+
+    fn vec_alu_rr_dst_type(&mut self, op: &VecAluOpRR) -> Type {
+        MInst::canonical_type_for_rc(op.dst_regclass())
+    }
+}
+
+/// The main entry point for lowering with ISLE.
+pub(crate) fn lower(
+    lower_ctx: &mut Lower<MInst>,
+    backend: &Riscv64Backend,
+    inst: Inst,
+) -> Option<InstOutput> {
+    // TODO: reuse the ISLE context across lowerings so we can reuse its
+    // internal heap allocations.
+    let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend);
+    generated_code::constructor_lower(&mut isle_ctx, inst)
+}
+
+/// The main entry point for branch lowering with ISLE.
+pub(crate) fn lower_branch(
+    lower_ctx: &mut Lower<MInst>,
+    backend: &Riscv64Backend,
+    branch: Inst,
+    targets: &[MachLabel],
+) -> Option<()> {
+    // TODO: reuse the ISLE context across lowerings so we can reuse its
+    // internal heap allocations.
+    let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend);
+    generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets.to_vec())
+}
diff --git a/cranelift/codegen/src/isa/zkasm/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/zkasm/lower/isle/generated_code.rs
new file mode 100644
index 000000000000..955a0a2b1171
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/lower/isle/generated_code.rs
@@ -0,0 +1,9 @@
+// See https://github.com/rust-lang/rust/issues/47995: we cannot use `#![...]` attributes inside of
+// the generated ISLE source below because we include!() it. We must include!() it because its path
+// depends on an environment variable; and also because of this, we can't do the `#[path = "..."]
+// mod generated_code;` trick either.
+#![allow(dead_code, unreachable_code, unreachable_patterns)]
+#![allow(unused_imports, unused_variables, non_snake_case, unused_mut)]
+#![allow(irrefutable_let_patterns)]
+
+include!(concat!(env!("ISLE_DIR"), "/isle_zkasm.rs"));
diff --git a/cranelift/codegen/src/isa/zkasm/mod.rs b/cranelift/codegen/src/isa/zkasm/mod.rs
new file mode 100644
index 000000000000..7e19f7578d2e
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/mod.rs
@@ -0,0 +1,228 @@
+//! risc-v 64-bit Instruction Set Architecture.
+
+use crate::dominator_tree::DominatorTree;
+use crate::ir;
+use crate::ir::{Function, Type};
+use crate::isa::zkasm::settings as riscv_settings;
+use crate::isa::{Builder as IsaBuilder, FunctionAlignment, TargetIsa};
+use crate::machinst::{
+    compile, CompiledCode, CompiledCodeStencil, MachInst, MachTextSectionBuilder, Reg, SigSet,
+    TextSectionBuilder, VCode,
+};
+use crate::result::CodegenResult;
+use crate::settings as shared_settings;
+use alloc::{boxed::Box, vec::Vec};
+use core::fmt;
+use cranelift_control::ControlPlane;
+use regalloc2::MachineEnv;
+use target_lexicon::{Architecture, Triple};
+mod abi;
+pub(crate) mod inst;
+mod lower;
+mod settings;
+#[cfg(feature = "unwind")]
+use crate::isa::unwind::systemv;
+
+use inst::crate_reg_eviroment;
+
+use self::inst::EmitInfo;
+
+/// An zkasm backend.
+pub struct Riscv64Backend {
+    triple: Triple,
+    flags: shared_settings::Flags,
+    isa_flags: riscv_settings::Flags,
+    mach_env: MachineEnv,
+}
+
+impl Riscv64Backend {
+    /// Create a new zkasm backend with the given (shared) flags.
+    pub fn new_with_flags(
+        triple: Triple,
+        flags: shared_settings::Flags,
+        isa_flags: riscv_settings::Flags,
+    ) -> Riscv64Backend {
+        let mach_env = crate_reg_eviroment(&flags);
+        Riscv64Backend {
+            triple,
+            flags,
+            isa_flags,
+            mach_env,
+        }
+    }
+
+    /// This performs lowering to VCode, register-allocates the code, computes block layout and
+    /// finalizes branches. The result is ready for binary emission.
+    fn compile_vcode(
+        &self,
+        func: &Function,
+        domtree: &DominatorTree,
+        ctrl_plane: &mut ControlPlane,
+    ) -> CodegenResult<(VCode<inst::Inst>, regalloc2::Output)> {
+        let emit_info = EmitInfo::new(self.flags.clone(), self.isa_flags.clone());
+        let sigs = SigSet::new::<abi::Riscv64MachineDeps>(func, &self.flags)?;
+        let abi = abi::Riscv64Callee::new(func, self, &self.isa_flags, &sigs)?;
+        compile::compile::<Riscv64Backend>(func, domtree, self, abi, emit_info, sigs, ctrl_plane)
+    }
+}
+
+impl TargetIsa for Riscv64Backend {
+    fn compile_function(
+        &self,
+        func: &Function,
+        domtree: &DominatorTree,
+        want_disasm: bool,
+        ctrl_plane: &mut ControlPlane,
+    ) -> CodegenResult<CompiledCodeStencil> {
+        let (vcode, regalloc_result) = self.compile_vcode(func, domtree, ctrl_plane)?;
+
+        let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug);
+        let emit_result = vcode.emit(&regalloc_result, want_disasm, &self.flags, ctrl_plane);
+        let frame_size = emit_result.frame_size;
+        let value_labels_ranges = emit_result.value_labels_ranges;
+        let buffer = emit_result.buffer;
+        let sized_stackslot_offsets = emit_result.sized_stackslot_offsets;
+        let dynamic_stackslot_offsets = emit_result.dynamic_stackslot_offsets;
+
+        if let Some(disasm) = emit_result.disasm.as_ref() {
+            log::debug!("disassembly:\n{}", disasm);
+        }
+
+        Ok(CompiledCodeStencil {
+            buffer,
+            frame_size,
+            vcode: emit_result.disasm,
+            value_labels_ranges,
+            sized_stackslot_offsets,
+            dynamic_stackslot_offsets,
+            bb_starts: emit_result.bb_offsets,
+            bb_edges: emit_result.bb_edges,
+        })
+    }
+
+    fn name(&self) -> &'static str {
+        "zkasm"
+    }
+    fn dynamic_vector_bytes(&self, _dynamic_ty: ir::Type) -> u32 {
+        16
+    }
+
+    fn triple(&self) -> &Triple {
+        &self.triple
+    }
+
+    fn flags(&self) -> &shared_settings::Flags {
+        &self.flags
+    }
+
+    fn machine_env(&self) -> &MachineEnv {
+        &self.mach_env
+    }
+
+    fn isa_flags(&self) -> Vec<shared_settings::Value> {
+        self.isa_flags.iter().collect()
+    }
+
+    #[cfg(feature = "unwind")]
+    fn emit_unwind_info(
+        &self,
+        result: &CompiledCode,
+        kind: crate::machinst::UnwindInfoKind,
+    ) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
+        use crate::isa::unwind::UnwindInfo;
+        use crate::machinst::UnwindInfoKind;
+        Ok(match kind {
+            UnwindInfoKind::SystemV => {
+                let mapper = self::inst::unwind::systemv::RegisterMapper;
+                Some(UnwindInfo::SystemV(
+                    crate::isa::unwind::systemv::create_unwind_info_from_insts(
+                        &result.buffer.unwind_info[..],
+                        result.buffer.data().len(),
+                        &mapper,
+                    )?,
+                ))
+            }
+            UnwindInfoKind::Windows => None,
+            _ => None,
+        })
+    }
+
+    #[cfg(feature = "unwind")]
+    fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
+        Some(inst::unwind::systemv::create_cie())
+    }
+
+    fn text_section_builder(&self, num_funcs: usize) -> Box<dyn TextSectionBuilder> {
+        Box::new(MachTextSectionBuilder::<inst::Inst>::new(num_funcs))
+    }
+
+    #[cfg(feature = "unwind")]
+    fn map_regalloc_reg_to_dwarf(&self, reg: Reg) -> Result<u16, systemv::RegisterMappingError> {
+        inst::unwind::systemv::map_reg(reg).map(|reg| reg.0)
+    }
+
+    fn function_alignment(&self) -> FunctionAlignment {
+        inst::Inst::function_alignment()
+    }
+
+    #[cfg(feature = "disas")]
+    fn to_capstone(&self) -> Result<capstone::Capstone, capstone::Error> {
+        use capstone::prelude::*;
+        let mut cs = Capstone::new()
+            .riscv()
+            .mode(arch::riscv::ArchMode::RiscV64)
+            .build()?;
+        // Similar to AArch64, RISC-V uses inline constants rather than a separate
+        // constant pool. We want to skip dissasembly over inline constants instead
+        // of stopping on invalid bytes.
+        cs.set_skipdata(true)?;
+        Ok(cs)
+    }
+
+    fn has_native_fma(&self) -> bool {
+        true
+    }
+
+    fn has_x86_blendv_lowering(&self, _: Type) -> bool {
+        false
+    }
+
+    fn has_x86_pshufb_lowering(&self) -> bool {
+        false
+    }
+
+    fn has_x86_pmulhrsw_lowering(&self) -> bool {
+        false
+    }
+
+    fn has_x86_pmaddubsw_lowering(&self) -> bool {
+        false
+    }
+}
+
+impl fmt::Display for Riscv64Backend {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_struct("MachBackend")
+            .field("name", &self.name())
+            .field("triple", &self.triple())
+            .field("flags", &format!("{}", self.flags()))
+            .finish()
+    }
+}
+
+/// Create a new `isa::Builder`.
+pub fn isa_builder(triple: Triple) -> IsaBuilder {
+    match triple.architecture {
+        Architecture::Sparc => {}
+        _ => unreachable!(),
+    }
+    IsaBuilder {
+        triple,
+        setup: riscv_settings::builder(),
+        constructor: |triple, shared_flags, builder| {
+            let isa_flags = riscv_settings::Flags::new(&shared_flags, builder);
+            let backend = Riscv64Backend::new_with_flags(triple, shared_flags, isa_flags);
+            Ok(backend.wrapped())
+        },
+    }
+}
diff --git a/cranelift/codegen/src/isa/zkasm/settings.rs b/cranelift/codegen/src/isa/zkasm/settings.rs
new file mode 100644
index 000000000000..a91e91e61938
--- /dev/null
+++ b/cranelift/codegen/src/isa/zkasm/settings.rs
@@ -0,0 +1,8 @@
+//! zkasm Settings.
+
+use crate::settings::{self, detail, Builder, Value};
+use core::fmt;
+
+// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
+// public `Flags` struct with an impl for all of the settings defined in
+include!(concat!(env!("OUT_DIR"), "/settings-zkasm.rs"));
diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs
index b537d9e10cb0..53fac6435810 100644
--- a/cranelift/codegen/src/isle_prelude.rs
+++ b/cranelift/codegen/src/isle_prelude.rs
@@ -507,6 +507,14 @@ macro_rules! isle_common_prelude_methods {
             }
         }
 
+        #[inline]
+        fn ty_addr32(&mut self, ty: Type) -> Option<Type> {
+            match ty {
+                I32 | R32 => Some(ty),
+                _ => None,
+            }
+        }
+
         #[inline]
         fn u64_from_imm64(&mut self, imm: Imm64) -> u64 {
             imm.bits() as u64
diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs
index e08384c30ab2..1eda843fe50d 100644
--- a/cranelift/codegen/src/machinst/mod.rs
+++ b/cranelift/codegen/src/machinst/mod.rs
@@ -185,6 +185,7 @@ pub trait MachInst: Clone + Debug {
     /// block, if any. Note that the return value must not be subject to
     /// register allocation.
     fn gen_block_start(
+        _block_index: usize,
         _is_indirect_branch_target: bool,
         _is_forward_edge_cfi_enabled: bool,
     ) -> Option<Self> {
diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs
index 59c7328c3aa8..d5ebee420d2f 100644
--- a/cranelift/codegen/src/machinst/vcode.rs
+++ b/cranelift/codegen/src/machinst/vcode.rs
@@ -896,6 +896,7 @@ impl<I: VCodeInst> VCode<I> {
             }
 
             if let Some(block_start) = I::gen_block_start(
+                block.index(),
                 self.block_order.is_indirect_branch_target(block),
                 is_forward_edge_cfi_enabled,
             ) {
diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle
index dd3c186a6747..188f423c41e7 100644
--- a/cranelift/codegen/src/prelude.isle
+++ b/cranelift/codegen/src/prelude.isle
@@ -461,6 +461,9 @@
 (decl ty_addr64 (Type) Type)
 (extern extractor ty_addr64 ty_addr64)
 
+(decl ty_addr32 (Type) Type)
+(extern extractor ty_addr32 ty_addr32)
+
 ;; A pure constructor that matches everything except vectors with size 32X2.
 (decl pure partial not_vec32x2 (Type) Type)
 (extern constructor not_vec32x2 not_vec32x2)
diff --git a/cranelift/data/add.zkasm b/cranelift/data/add.zkasm
new file mode 100644
index 000000000000..9bc8f465b6b3
--- /dev/null
+++ b/cranelift/data/add.zkasm
@@ -0,0 +1,19 @@
+start:
+  zkPC + 2 => RR
+  :JMP(function_1)
+  :JMP(finalizeExecution)
+
+function_1:
+  SP + 1 => SP
+  RR :MSTORE(SP)
+  2 + 3 => A
+  0 + 5 => B
+  B :ASSERT
+  $ => RR :MLOAD(SP)
+  SP - 1 => SP
+  :JMP(RR)
+
+finalizeExecution:
+  ${beforeLast()}  :JMPN(finalizeExecution)
+                   :JMP(start)
+
diff --git a/cranelift/data/add_func.zkasm b/cranelift/data/add_func.zkasm
new file mode 100644
index 000000000000..cddf7c328475
--- /dev/null
+++ b/cranelift/data/add_func.zkasm
@@ -0,0 +1,26 @@
+start:
+  zkPC + 2 => RR
+  :JMP(function_1)
+  :JMP(finalizeExecution)
+
+function_1:
+  SP + 1 => SP
+  RR :MSTORE(SP)
+  0 + 2 => A
+  0 + 3 => B
+  zkPC + 2 => RR
+  :JMP(function_2)
+  0 + 5 => B
+  B :ASSERT
+  $ => RR :MLOAD(SP)
+  SP - 1 => SP
+  :JMP(RR)
+
+function_2:
+  $ => A :ADD
+  :JMP(RR)
+
+finalizeExecution:
+  ${beforeLast()}  :JMPN(finalizeExecution)
+                   :JMP(start)
+
diff --git a/cranelift/data/add_memory.zkasm b/cranelift/data/add_memory.zkasm
new file mode 100644
index 000000000000..2040a74e15d8
--- /dev/null
+++ b/cranelift/data/add_memory.zkasm
@@ -0,0 +1,39 @@
+start:
+  zkPC + 2 => RR
+  :JMP(function_1)
+  :JMP(finalizeExecution)
+
+function_1:
+  SP + 1 => SP
+  RR :MSTORE(SP)
+  0 + 0 => B
+  0 + 2 => D
+  $ => A :MLOAD(CTX)
+  $ => E :ADD
+  D :MSTORE(E)
+  0 + 8 => B
+  0 + 3 => E
+  $ => A :MLOAD(CTX)
+  $ => A :ADD
+  E :MSTORE(A)
+  0 + 0 => B
+  $ => A :MLOAD(CTX)
+  $ => A :ADD
+  $ => A :MLOAD(A)
+  A => E
+  0 + 8 => B
+  $ => A :MLOAD(CTX)
+  $ => A :ADD
+  $ => B :MLOAD(A)
+  E => A
+  $ => A :ADD
+  0 + 5 => B
+  B :ASSERT
+  $ => RR :MLOAD(SP)
+  SP - 1 => SP
+  :JMP(RR)
+
+finalizeExecution:
+  ${beforeLast()}  :JMPN(finalizeExecution)
+                   :JMP(start)
+
diff --git a/cranelift/data/counter.zkasm b/cranelift/data/counter.zkasm
new file mode 100644
index 000000000000..a69561b7ebb0
--- /dev/null
+++ b/cranelift/data/counter.zkasm
@@ -0,0 +1,28 @@
+start:
+  zkPC + 2 => RR
+  :JMP(function_1)
+  :JMP(finalizeExecution)
+
+function_1:
+  SP + 1 => SP
+  RR :MSTORE(SP)
+  0 + 0 => A
+  :JMP(L1_1)
+L1_1:
+  0 + 1 => B
+  $ => A :ADD
+  0 + 10 => B
+  $ => B :EQ
+  B :JMPNZ(L1_3)
+  :JMP(L1_1)
+L1_3:
+  0 + 10 => B
+  B :ASSERT
+  $ => RR :MLOAD(SP)
+  SP - 1 => SP
+  :JMP(RR)
+
+finalizeExecution:
+  ${beforeLast()}  :JMPN(finalizeExecution)
+                   :JMP(start)
+
diff --git a/cranelift/data/fibonacci.zkasm b/cranelift/data/fibonacci.zkasm
new file mode 100644
index 000000000000..3f6d73266c71
--- /dev/null
+++ b/cranelift/data/fibonacci.zkasm
@@ -0,0 +1,42 @@
+start:
+  zkPC + 2 => RR
+  :JMP(function_1)
+  :JMP(finalizeExecution)
+
+function_1:
+  SP + 1 => SP
+  RR :MSTORE(SP)
+  SP + 1 => SP
+  0 + 0 => A
+  A => D
+  0 + 0 => A
+  0 + 1 => B
+  B :MSTORE(SP)
+  :JMP(L1_1)
+L1_1:
+  $ => A :ADD
+  A => C
+  B :MSTORE(SP)
+  0 + 1 => B
+  D => A
+  $ => A :ADD
+  0 + 10 => B
+  $ => E :EQ
+  E :JMPNZ(L1_3)
+  C => B
+  A => D
+  $ => A :MLOAD(SP)
+  :JMP(L1_1)
+L1_3:
+  0 + 89 => B
+  C => A
+  B :ASSERT
+  SP - 1 => SP
+  $ => RR :MLOAD(SP)
+  SP - 1 => SP
+  :JMP(RR)
+
+finalizeExecution:
+  ${beforeLast()}  :JMPN(finalizeExecution)
+                   :JMP(start)
+
diff --git a/cranelift/data/fibonacci_recursive.zkasm b/cranelift/data/fibonacci_recursive.zkasm
new file mode 100644
index 000000000000..90e0da518126
--- /dev/null
+++ b/cranelift/data/fibonacci_recursive.zkasm
@@ -0,0 +1,67 @@
+start:
+  zkPC + 2 => RR
+  :JMP(function_2)
+  :JMP(finalizeExecution)
+
+function_1:
+  SP + 1 => SP
+  RR :MSTORE(SP)
+  SP + 2 => SP
+  A :MSTORE(SP)
+  0 + 0 => B
+  $ => E :EQ
+  E :JMPNZ(L1_5)
+  0 + 1 => B
+  $ => A :MLOAD(SP)
+  $ => B :SUB
+  B => A
+  0 + 0 => B
+  $ => A :EQ
+  A :JMPNZ(L1_3)
+  0 + 1 => B
+  $ => A :MLOAD(SP)
+  $ => B :SUB
+  A :MSTORE(SP)
+  B => A
+  zkPC + 2 => RR
+  :JMP(function_1)
+  A :MSTORE(SP + 8)
+  0 + 2 => B
+  $ => A :MLOAD(SP)
+  $ => A :SUB
+  zkPC + 2 => RR
+  :JMP(function_1)
+  A => B
+  $ => A :MLOAD(SP + 8)
+  $ => A :ADD
+  :JMP(L1_4)
+L1_3:
+  0 + 1 => A
+  :JMP(L1_4)
+L1_4:
+  :JMP(L1_6)
+L1_5:
+  0 + 0 => A
+  :JMP(L1_6)
+L1_6:
+  SP - 2 => SP
+  $ => RR :MLOAD(SP)
+  SP - 1 => SP
+  :JMP(RR)
+
+function_2:
+  SP + 1 => SP
+  RR :MSTORE(SP)
+  0 + 11 => A
+  zkPC + 2 => RR
+  :JMP(function_1)
+  0 + 89 => B
+  B :ASSERT
+  $ => RR :MLOAD(SP)
+  SP - 1 => SP
+  :JMP(RR)
+
+finalizeExecution:
+  ${beforeLast()}  :JMPN(finalizeExecution)
+                   :JMP(start)
+
diff --git a/cranelift/data/gen.sh b/cranelift/data/gen.sh
new file mode 100755
index 000000000000..a6af61f42a61
--- /dev/null
+++ b/cranelift/data/gen.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+cargo build
+for name in add counter add_func add_memory fibonacci locals locals_simple fibonacci_recursive mul
+do
+	echo $name;
+	../target/debug/clif-util wasm --target sparc-unknown-unknown ../../zkwasm/data/$name.wat > data/$name.zkasm
+done
diff --git a/cranelift/data/locals.zkasm b/cranelift/data/locals.zkasm
new file mode 100644
index 000000000000..9bc8f465b6b3
--- /dev/null
+++ b/cranelift/data/locals.zkasm
@@ -0,0 +1,19 @@
+start:
+  zkPC + 2 => RR
+  :JMP(function_1)
+  :JMP(finalizeExecution)
+
+function_1:
+  SP + 1 => SP
+  RR :MSTORE(SP)
+  2 + 3 => A
+  0 + 5 => B
+  B :ASSERT
+  $ => RR :MLOAD(SP)
+  SP - 1 => SP
+  :JMP(RR)
+
+finalizeExecution:
+  ${beforeLast()}  :JMPN(finalizeExecution)
+                   :JMP(start)
+
diff --git a/cranelift/data/locals_simple.zkasm b/cranelift/data/locals_simple.zkasm
new file mode 100644
index 000000000000..dcbe3fb15b7a
--- /dev/null
+++ b/cranelift/data/locals_simple.zkasm
@@ -0,0 +1,19 @@
+start:
+  zkPC + 2 => RR
+  :JMP(function_1)
+  :JMP(finalizeExecution)
+
+function_1:
+  SP + 1 => SP
+  RR :MSTORE(SP)
+  0 + 2 => A
+  0 + 2 => B
+  B :ASSERT
+  $ => RR :MLOAD(SP)
+  SP - 1 => SP
+  :JMP(RR)
+
+finalizeExecution:
+  ${beforeLast()}  :JMPN(finalizeExecution)
+                   :JMP(start)
+
diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs
index 3dd3ae2ccd2b..c0c00d4388da 100644
--- a/cranelift/src/wasm.rs
+++ b/cranelift/src/wasm.rs
@@ -10,6 +10,7 @@
 use crate::disasm::print_all;
 use anyhow::{Context as _, Result};
 use clap::Parser;
+use cranelift_codegen::ir::ExternalName;
 use cranelift_codegen::print_errors::{pretty_error, pretty_verifier_error};
 use cranelift_codegen::settings::FlagsOrIsa;
 use cranelift_codegen::timing;
@@ -17,6 +18,7 @@ use cranelift_codegen::Context;
 use cranelift_entity::EntityRef;
 use cranelift_reader::parse_sets_and_triple;
 use cranelift_wasm::{translate_module, DummyEnvironment, FuncIndex};
+use std::collections::HashMap;
 use std::io::Read;
 use std::path::Path;
 use std::path::PathBuf;
@@ -235,6 +237,15 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) -
         vprintln!(options.verbose, "");
     }
 
+    println!("start:");
+    let start_func = dummy_environ
+        .info
+        .start_func
+        .expect("Must have a start function");
+    println!("  zkPC + 2 => RR");
+    println!("  :JMP(function_{})", start_func.index());
+    println!("  :JMP(finalizeExecution)");
+
     let num_func_imports = dummy_environ.get_num_func_imports();
     let mut total_module_code_size = 0;
     let mut context = Context::new();
@@ -243,6 +254,7 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) -
 
         let mut saved_size = None;
         let func_index = num_func_imports + def_index.index();
+        println!("\nfunction_{}:", func_index);
         let mut mem = vec![];
         let (relocs, traps, stack_maps) = if options.check_translation {
             if let Err(errors) = context.verify(fisa) {
@@ -254,6 +266,82 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) -
                 .compile_and_emit(isa, &mut mem, &mut Default::default())
                 .map_err(|err| anyhow::anyhow!("{}", pretty_error(&err.func, err.inner)))?;
             let code_info = compiled_code.code_info();
+            let mut code_buffer = compiled_code.code_buffer().to_vec();
+            let mut delta = 0i32;
+            for reloc in compiled_code.buffer.relocs() {
+                let start = (reloc.offset as i32 + delta) as usize;
+                let mut pos = start;
+                while code_buffer[pos] != b'\n' {
+                    pos += 1;
+                    delta -= 1;
+                }
+
+                let code = if let ExternalName::User(name) = reloc.name {
+                    let name = &func.params.user_named_funcs()[name];
+                    if name.index == 0 {
+                        b"  B :ASSERT".to_vec()
+                    } else {
+                        format!("  zkPC + 2 => RR\n  :JMP(function_{})", name.index)
+                            .as_bytes()
+                            .to_vec()
+                    }
+                } else {
+                    b"  UNKNOWN".to_vec()
+                };
+                delta += code.len() as i32;
+
+                code_buffer.splice(start..pos, code);
+            }
+
+            if let Ok(code) = std::str::from_utf8(&code_buffer) {
+                let mut label_definition: HashMap<usize, usize> = HashMap::new();
+                let mut label_uses: HashMap<usize, Vec<usize>> = HashMap::new();
+                let mut lines = Vec::new();
+                for (index, line) in code.lines().enumerate() {
+                    let mut line = line.to_string();
+                    if line.starts_with(&"label_") {
+                        let label_index: usize = line[6..line.len() - 1]
+                            .parse()
+                            .expect("Failed to parse label index");
+                        line = format!("L{func_index}_{label_index}:");
+                        label_definition.insert(label_index, index);
+                    } else if line.contains(&"label_") {
+                        let pos = line.find(&"label_").unwrap();
+                        let pos_end = pos + line[pos..].find(&")").unwrap();
+                        let label_index: usize = line[pos + 6..pos_end]
+                            .parse()
+                            .expect("Failed to parse label index");
+                        line.replace_range(pos..pos_end, &format!("L{func_index}_{label_index}"));
+                        label_uses.entry(label_index).or_default().push(index);
+                    }
+                    lines.push(line);
+                }
+
+                let mut lines_to_delete = Vec::new();
+                for (label, label_line) in label_definition {
+                    match label_uses.entry(label) {
+                        std::collections::hash_map::Entry::Occupied(uses) => {
+                            if uses.get().len() == 1 {
+                                let use_line = uses.get()[0];
+                                if use_line + 1 == label_line {
+                                    lines_to_delete.push(use_line);
+                                    lines_to_delete.push(label_line);
+                                }
+                            }
+                        }
+                        std::collections::hash_map::Entry::Vacant(_) => {
+                            lines_to_delete.push(label_line);
+                        }
+                    }
+                }
+                lines_to_delete.sort();
+                lines_to_delete.reverse();
+                for index in lines_to_delete {
+                    lines.remove(index);
+                }
+
+                println!("{}", lines.join("\n"));
+            }
 
             if options.print_size {
                 println!(
@@ -310,6 +398,13 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) -
         context.clear();
     }
 
+    let postamble = "
+finalizeExecution:
+  ${beforeLast()}  :JMPN(finalizeExecution)
+                   :JMP(start)
+";
+    println!("{postamble}");
+
     if !options.check_translation && options.print_size {
         println!("Total module code size: {} bytes", total_module_code_size);
         let total_bytecode_size: usize = dummy_environ.func_bytecode_sizes.iter().sum();