diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index 69413862fe09..20f501105ea1 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -284,13 +284,7 @@ (tmp WritableReg) (rs Reg) (ty Type)) - ;; Byte-reverse register - (Rev8 - (rs Reg) - (step WritableReg) - (tmp WritableReg) - (rd WritableReg)) - ;; + (Brev8 (rs Reg) (ty Type) @@ -2035,34 +2029,6 @@ (rule 0 (gen_or (fits_in_64 _) x y) (rv_or (value_regs_get x 0) (value_regs_get y 0))) -(decl lower_bit_reverse (Reg Type) Reg) - -(rule - (lower_bit_reverse r $I8) - (gen_brev8 r $I8)) - -(rule - (lower_bit_reverse r $I16) - (let - ((tmp XReg (gen_brev8 r $I16)) - (tmp2 XReg (gen_rev8 tmp)) - (result XReg (rv_srli tmp2 (imm12_const 48)))) - result)) - -(rule - (lower_bit_reverse r $I32) - (let - ((tmp XReg (gen_brev8 r $I32)) - (tmp2 XReg (gen_rev8 tmp)) - (result XReg (rv_srli tmp2 (imm12_const 32)))) - result)) - -(rule - (lower_bit_reverse r $I64) - (let - ((tmp XReg (gen_rev8 r))) - (gen_brev8 tmp $I64))) - (decl lower_ctz (Type Reg) Reg) (rule (lower_ctz ty x) @@ -2728,23 +2694,6 @@ (rule (float_int_of_same_size $F64) $I64) -(decl gen_rev8 (XReg) XReg) -(rule 1 - (gen_rev8 rs) - (if-let $true (has_zbb)) - (rv_rev8 rs)) - -(rule - (gen_rev8 rs) - (if-let $false (has_zbb)) - (let - ((rd WritableXReg (temp_writable_xreg)) - (tmp WritableXReg (temp_writable_xreg)) - (step WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Rev8 rs step tmp rd)))) - (writable_reg_to_reg rd))) - - (decl gen_brev8 (Reg Type) Reg) (rule 1 (gen_brev8 rs _) diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 1a9fc210a27a..e85c466e4359 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -269,7 +269,6 @@ impl Inst { | Inst::DummyUse { .. } | Inst::FloatRound { .. } | Inst::Popcnt { .. } - | Inst::Rev8 { .. } | Inst::Cltz { .. } | Inst::Brev8 { .. } | Inst::StackProbeLoop { .. } => None, @@ -2535,71 +2534,6 @@ impl Inst { } sink.bind_label(label_done, &mut state.ctrl_plane); } - &Inst::Rev8 { rs, rd, tmp, step } => { - // init. - Inst::gen_move(rd, zero_reg(), I64).emit(&[], sink, emit_info, state); - Inst::gen_move(tmp, rs, I64).emit(&[], sink, emit_info, state); - // load 56 to step. - Inst::load_imm12(step, Imm12::from_i16(56)).emit(&[], sink, emit_info, state); - let label_done = sink.get_label(); - let label_loop = sink.get_label(); - sink.bind_label(label_loop, &mut state.ctrl_plane); - Inst::CondBr { - taken: CondBrTarget::Label(label_done), - not_taken: CondBrTarget::Fallthrough, - kind: IntegerCompare { - kind: IntCC::SignedLessThan, - rs1: step.to_reg(), - rs2: zero_reg(), - }, - } - .emit(&[], sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Andi, - rd: writable_spilltmp_reg(), - rs: tmp.to_reg(), - imm12: Imm12::from_i16(255), - } - .emit(&[], sink, emit_info, state); - Inst::AluRRR { - alu_op: AluOPRRR::Sll, - rd: writable_spilltmp_reg(), - rs1: spilltmp_reg(), - rs2: step.to_reg(), - } - .emit(&[], sink, emit_info, state); - - Inst::AluRRR { - alu_op: AluOPRRR::Or, - rd: rd, - rs1: rd.to_reg(), - rs2: spilltmp_reg(), - } - .emit(&[], sink, emit_info, state); - - { - // reset step - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: step, - rs: step.to_reg(), - imm12: Imm12::from_i16(-8), - } - .emit(&[], sink, emit_info, state); - //reset tmp. - Inst::AluRRImm12 { - alu_op: AluOPRRI::Srli, - rd: tmp, - rs: tmp.to_reg(), - imm12: Imm12::from_i16(8), - } - .emit(&[], sink, emit_info, state); - // loop. - Inst::gen_jump(label_loop).emit(&[], sink, emit_info, state); - } - - sink.bind_label(label_done, &mut state.ctrl_plane); - } &Inst::Cltz { sum, tmp, @@ -3447,14 +3381,6 @@ impl Inst { sum: allocs.next_writable(sum), ty, }, - - Inst::Rev8 { rs, rd, tmp, step } => Inst::Rev8 { - rs: allocs.next(rs), - tmp: allocs.next_writable(tmp), - step: allocs.next_writable(step), - rd: allocs.next_writable(rd), - }, - Inst::Cltz { sum, tmp, diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index 2b519e8b581c..5b588943c022 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -610,12 +610,6 @@ fn riscv64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_early_def(step); collector.reg_early_def(sum); } - &Inst::Rev8 { rs, rd, tmp, step } => { - collector.reg_use(rs); - collector.reg_early_def(tmp); - collector.reg_early_def(step); - collector.reg_early_def(rd); - } &Inst::Cltz { sum, step, tmp, rs, .. } => { @@ -1187,13 +1181,6 @@ impl Inst { let sum = format_reg(sum.to_reg(), allocs); format!("popcnt {},{}##ty={} tmp={} step={}", sum, rs, ty, tmp, step) } - &Inst::Rev8 { rs, rd, tmp, step } => { - let rs = format_reg(rs, allocs); - let tmp = format_reg(tmp.to_reg(), allocs); - let step = format_reg(step.to_reg(), allocs); - let rd = format_reg(rd.to_reg(), allocs); - format!("rev8 {},{}##step={} tmp={}", rd, rs, step, tmp) - } &Inst::Cltz { sum, step, diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 9df4639695f8..de00086d8e6c 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -954,14 +954,30 @@ (rv_xnor x y)) ;;;; Rules for `bit_reverse` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type (fits_in_64 (ty_int ty)) (bitrev x))) - (lower_bit_reverse x ty)) + +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (bitrev x))) + (gen_bitrev ty x)) (rule 1 (lower (has_type $I128 (bitrev x))) - (let ((val ValueRegs x) - (lo_rev XReg (lower_bit_reverse (value_regs_get val 0) $I64)) - (hi_rev XReg (lower_bit_reverse (value_regs_get val 1) $I64))) - (value_regs hi_rev lo_rev))) + (value_regs + (gen_bitrev $I64 (value_regs_get x 1)) + (gen_bitrev $I64 (value_regs_get x 0)))) + + +;; Constructs a sequence of instructions that reverse all bits in `x` up to +;; the given type width. +(decl gen_bitrev (Type XReg) XReg) + +(rule 0 (gen_bitrev (ty_16_or_32 (ty_int ty)) x) + (if-let shift_amt (u64_to_imm12 (u64_sub 64 (ty_bits ty)))) + (rv_srli (gen_bitrev $I64 x) shift_amt)) + +(rule 1 (gen_bitrev $I8 x) + (gen_brev8 x $I8)) + +(rule 1 (gen_bitrev $I64 x) + (gen_brev8 (gen_bswap $I64 x) $I64)) + ;;;; Rules for `bswap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -973,22 +989,8 @@ (gen_bswap $I64 (value_regs_get x 1)) (gen_bswap $I64 (value_regs_get x 0)))) -(rule 3 (lower (has_type (fits_in_32 ty) (bswap x))) - (if-let $true (has_zbb)) - (if-let shift_amt (u64_to_imm12 (u64_sub 64 (ty_bits ty)))) - (rv_srli (rv_rev8 x) shift_amt)) - -(rule 4 (lower (has_type $I64 (bswap x))) - (if-let $true (has_zbb)) - (rv_rev8 x)) - -(rule 5 (lower (has_type $I128 (bswap x))) - (if-let $true (has_zbb)) - (value_regs - (rv_rev8 (value_regs_get x 1)) - (rv_rev8 (value_regs_get x 0)))) - -;; Helper to generate a fallback byte-swap sequence when `zbb` isn't available. +;; Builds a sequence of instructions that swaps the bytes in `x` up to the given +;; type width. (decl gen_bswap (Type XReg) XReg) ;; This is only here to make the rule below work. bswap.i8 isn't valid @@ -1012,6 +1014,15 @@ (bot XReg (rv_srli bot_shifted_left shift))) (rv_or top bot))) +(rule 2 (gen_bswap (ty_16_or_32 (ty_int ty)) x) + (if-let $true (has_zbb)) + (if-let shift_amt (u64_to_imm12 (u64_sub 64 (ty_bits ty)))) + (rv_srli (rv_rev8 x) shift_amt)) + +(rule 3 (gen_bswap $I64 x) + (if-let $true (has_zbb)) + (rv_rev8 x)) + ;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (fits_in_64 ty) (ctz x))) (lower_ctz ty x)) diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index bd4db5b19405..88b6abdb8eff 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -382,6 +382,15 @@ macro_rules! isle_common_prelude_methods { } } + #[inline] + fn ty_16_or_32(&mut self, ty: Type) -> Option { + if ty.bits() == 16 || ty.bits() == 32 { + Some(ty) + } else { + None + } + } + #[inline] fn int_fits_in_32(&mut self, ty: Type) -> Option { match ty { diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 997af7a84cb9..eb03dd24a7b5 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -431,6 +431,10 @@ (decl ty_8_or_16 (Type) Type) (extern extractor ty_8_or_16 ty_8_or_16) +;; An extractor that matches 16- and 32-bit types only. +(decl ty_16_or_32 (Type) Type) +(extern extractor ty_16_or_32 ty_16_or_32) + ;; An extractor that matches int types that fit in 32 bits. (decl int_fits_in_32 (Type) Type) (extern extractor int_fits_in_32 int_fits_in_32) diff --git a/cranelift/filetests/filetests/isa/riscv64/bitops.clif b/cranelift/filetests/filetests/isa/riscv64/bitops.clif index 38c19c78578d..768f58302116 100644 --- a/cranelift/filetests/filetests/isa/riscv64/bitops.clif +++ b/cranelift/filetests/filetests/isa/riscv64/bitops.clif @@ -2,289 +2,6 @@ test compile precise-output set unwind_info=false target riscv64 -function %a(i8) -> i8 { -block0(v0: i8): - v1 = bitrev v0 - return v1 -} - -; VCode: -; block0: -; mv a1,a0 -; brev8 a0,a1##tmp=a2 tmp2=a3 step=a4 ty=i8 -; ret -; -; Disassembled: -; block0: ; offset 0x0 -; mv a1, a0 -; mv a0, zero -; addi a4, zero, 8 -; addi a2, zero, 1 -; slli a2, a2, 7 -; addi a3, zero, 1 -; slli a3, a3, 0 -; blez a4, 0x34 -; and t5, a2, a1 -; beq zero, t5, 8 -; or a0, a0, a3 -; addi a4, a4, -1 -; srli a2, a2, 1 -; addi t5, zero, 8 -; rem t5, a4, t5 -; bnez t5, 0xc -; srli a3, a3, 0xf -; j -0x28 -; slli a3, a3, 1 -; j -0x30 -; ret - -function %a(i16) -> i16 { -block0(v0: i16): - v1 = bitrev v0 - return v1 -} - -; VCode: -; block0: -; brev8 a5,a0##tmp=a2 tmp2=a3 step=a4 ty=i16 -; rev8 a1,a5##step=a3 tmp=a2 -; srli a0,a1,48 -; ret -; -; Disassembled: -; block0: ; offset 0x0 -; mv a5, zero -; addi a4, zero, 0x10 -; addi a2, zero, 1 -; slli a2, a2, 0xf -; addi a3, zero, 1 -; slli a3, a3, 8 -; blez a4, 0x34 -; and t5, a2, a0 -; beq zero, t5, 8 -; or a5, a5, a3 -; addi a4, a4, -1 -; srli a2, a2, 1 -; addi t5, zero, 8 -; rem t5, a4, t5 -; bnez t5, 0xc -; srli a3, a3, 0xf -; j -0x28 -; slli a3, a3, 1 -; j -0x30 -; mv a1, zero -; mv a2, a5 -; addi a3, zero, 0x38 -; bltz a3, 0x1c -; andi t6, a2, 0xff -; sll t6, t6, a3 -; or a1, a1, t6 -; addi a3, a3, -8 -; srli a2, a2, 8 -; j -0x18 -; srli a0, a1, 0x30 -; ret - -function %a(i32) -> i32 { -block0(v0: i32): - v1 = bitrev v0 - return v1 -} - -; VCode: -; block0: -; brev8 a5,a0##tmp=a2 tmp2=a3 step=a4 ty=i32 -; rev8 a1,a5##step=a3 tmp=a2 -; srli a0,a1,32 -; ret -; -; Disassembled: -; block0: ; offset 0x0 -; mv a5, zero -; addi a4, zero, 0x20 -; addi a2, zero, 1 -; slli a2, a2, 0x1f -; addi a3, zero, 1 -; slli a3, a3, 0x18 -; blez a4, 0x34 -; and t5, a2, a0 -; beq zero, t5, 8 -; or a5, a5, a3 -; addi a4, a4, -1 -; srli a2, a2, 1 -; addi t5, zero, 8 -; rem t5, a4, t5 -; bnez t5, 0xc -; srli a3, a3, 0xf -; j -0x28 -; slli a3, a3, 1 -; j -0x30 -; mv a1, zero -; mv a2, a5 -; addi a3, zero, 0x38 -; bltz a3, 0x1c -; andi t6, a2, 0xff -; sll t6, t6, a3 -; or a1, a1, t6 -; addi a3, a3, -8 -; srli a2, a2, 8 -; j -0x18 -; srli a0, a1, 0x20 -; ret - -function %a(i64) -> i64 { -block0(v0: i64): - v1 = bitrev v0 - return v1 -} - -; VCode: -; block0: -; rev8 a5,a0##step=a4 tmp=a3 -; brev8 a0,a5##tmp=a3 tmp2=a1 step=a2 ty=i64 -; ret -; -; Disassembled: -; block0: ; offset 0x0 -; mv a5, zero -; mv a3, a0 -; addi a4, zero, 0x38 -; bltz a4, 0x1c -; andi t6, a3, 0xff -; sll t6, t6, a4 -; or a5, a5, t6 -; addi a4, a4, -8 -; srli a3, a3, 8 -; j -0x18 -; mv a0, zero -; addi a2, zero, 0x40 -; addi a3, zero, 1 -; slli a3, a3, 0x3f -; addi a1, zero, 1 -; slli a1, a1, 0x38 -; blez a2, 0x34 -; and t5, a3, a5 -; beq zero, t5, 8 -; or a0, a0, a1 -; addi a2, a2, -1 -; srli a3, a3, 1 -; addi t5, zero, 8 -; rem t5, a2, t5 -; bnez t5, 0xc -; srli a1, a1, 0xf -; j -0x28 -; slli a1, a1, 1 -; j -0x30 -; ret - -function %a(i128) -> i128 { -block0(v0: i128): - v1 = bitrev v0 - return v1 -} - -; VCode: -; addi sp,sp,-16 -; sd ra,8(sp) -; sd fp,0(sp) -; mv fp,sp -; sd s6,-8(sp) -; sd s11,-16(sp) -; addi sp,sp,-16 -; block0: -; mv s11,a1 -; mv a1,a0 -; rev8 a0,a1##step=a5 tmp=a4 -; brev8 a1,a0##tmp=a4 tmp2=a2 step=a3 ty=i64 -; mv a4,s11 -; rev8 s6,a4##step=a2 tmp=a3 -; brev8 a0,s6##tmp=a4 tmp2=a5 step=a2 ty=i64 -; addi sp,sp,16 -; ld s6,-8(sp) -; ld s11,-16(sp) -; ld ra,8(sp) -; ld fp,0(sp) -; addi sp,sp,16 -; ret -; -; Disassembled: -; block0: ; offset 0x0 -; addi sp, sp, -0x10 -; sd ra, 8(sp) -; sd s0, 0(sp) -; mv s0, sp -; sd s6, -8(sp) -; sd s11, -0x10(sp) -; addi sp, sp, -0x10 -; block1: ; offset 0x1c -; mv s11, a1 -; mv a1, a0 -; mv a0, zero -; mv a4, a1 -; addi a5, zero, 0x38 -; bltz a5, 0x1c -; andi t6, a4, 0xff -; sll t6, t6, a5 -; or a0, a0, t6 -; addi a5, a5, -8 -; srli a4, a4, 8 -; j -0x18 -; mv a1, zero -; addi a3, zero, 0x40 -; addi a4, zero, 1 -; slli a4, a4, 0x3f -; addi a2, zero, 1 -; slli a2, a2, 0x38 -; blez a3, 0x34 -; and t5, a4, a0 -; beq zero, t5, 8 -; or a1, a1, a2 -; addi a3, a3, -1 -; srli a4, a4, 1 -; addi t5, zero, 8 -; rem t5, a3, t5 -; bnez t5, 0xc -; srli a2, a2, 0xf -; j -0x28 -; slli a2, a2, 1 -; j -0x30 -; mv a4, s11 -; mv s6, zero -; mv a3, a4 -; addi a2, zero, 0x38 -; bltz a2, 0x1c -; andi t6, a3, 0xff -; sll t6, t6, a2 -; or s6, s6, t6 -; addi a2, a2, -8 -; srli a3, a3, 8 -; j -0x18 -; mv a0, zero -; addi a2, zero, 0x40 -; addi a4, zero, 1 -; slli a4, a4, 0x3f -; addi a5, zero, 1 -; slli a5, a5, 0x38 -; blez a2, 0x34 -; and t5, a4, s6 -; beq zero, t5, 8 -; or a0, a0, a5 -; addi a2, a2, -1 -; srli a4, a4, 1 -; addi t5, zero, 8 -; rem t5, a2, t5 -; bnez t5, 0xc -; srli a5, a5, 0xf -; j -0x28 -; slli a5, a5, 1 -; j -0x30 -; addi sp, sp, 0x10 -; ld s6, -8(sp) -; ld s11, -0x10(sp) -; ld ra, 8(sp) -; ld s0, 0(sp) -; addi sp, sp, 0x10 -; ret function %b(i8) -> i8 { block0(v0: i8): diff --git a/cranelift/filetests/filetests/isa/riscv64/bitrev-zbb-zbkb.clif b/cranelift/filetests/filetests/isa/riscv64/bitrev-zbb-zbkb.clif new file mode 100644 index 000000000000..84d16bfea213 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/bitrev-zbb-zbkb.clif @@ -0,0 +1,104 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_zbkb has_zbb + +function %bitrev_i8(i8) -> i8 { +block0(v0: i8): + v1 = bitrev v0 + return v1 +} + +; VCode: +; block0: +; brev8 a0,a0 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x13, 0x55, 0x75, 0x68 +; ret + +function %bitrev_i16(i16) -> i16 { +block0(v0: i16): + v1 = bitrev v0 + return v1 +} + +; VCode: +; block0: +; rev8 a2,a0 +; brev8 a4,a2 +; srli a0,a4,48 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x13, 0x56, 0x85, 0x6b +; .byte 0x13, 0x57, 0x76, 0x68 +; srli a0, a4, 0x30 +; ret + +function %bitrev_i32(i32) -> i32 { +block0(v0: i32): + v1 = bitrev v0 + return v1 +} + +; VCode: +; block0: +; rev8 a2,a0 +; brev8 a4,a2 +; srli a0,a4,32 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x13, 0x56, 0x85, 0x6b +; .byte 0x13, 0x57, 0x76, 0x68 +; srli a0, a4, 0x20 +; ret + +function %bitrev_i64(i64) -> i64 { +block0(v0: i64): + v1 = bitrev v0 + return v1 +} + +; VCode: +; block0: +; rev8 a2,a0 +; brev8 a0,a2 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x13, 0x56, 0x85, 0x6b +; .byte 0x13, 0x55, 0x76, 0x68 +; ret + +function %bitrev_i128(i128) -> i128 { +block0(v0: i128): + v1 = bitrev v0 + return v1 +} + +; VCode: +; block0: +; mv a2,a0 +; rev8 a3,a1 +; brev8 a0,a3 +; mv a3,a2 +; rev8 a1,a3 +; brev8 a1,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; mv a2, a0 +; .byte 0x93, 0xd6, 0x85, 0x6b +; .byte 0x13, 0xd5, 0x76, 0x68 +; mv a3, a2 +; .byte 0x93, 0xd5, 0x86, 0x6b +; .byte 0x93, 0xd5, 0x75, 0x68 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/bitrev.clif b/cranelift/filetests/filetests/isa/riscv64/bitrev.clif new file mode 100644 index 000000000000..968b94bb3e26 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/bitrev.clif @@ -0,0 +1,577 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %bitrev_i8(i8) -> i8 { +block0(v0: i8): + v1 = bitrev v0 + return v1 +} + +; VCode: +; block0: +; mv a1,a0 +; brev8 a0,a1##tmp=a2 tmp2=a3 step=a4 ty=i8 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; mv a1, a0 +; mv a0, zero +; addi a4, zero, 8 +; addi a2, zero, 1 +; slli a2, a2, 7 +; addi a3, zero, 1 +; slli a3, a3, 0 +; blez a4, 0x34 +; and t5, a2, a1 +; beq zero, t5, 8 +; or a0, a0, a3 +; addi a4, a4, -1 +; srli a2, a2, 1 +; addi t5, zero, 8 +; rem t5, a4, t5 +; bnez t5, 0xc +; srli a3, a3, 0xf +; j -0x28 +; slli a3, a3, 1 +; j -0x30 +; ret + +function %bitrev_i16(i16) -> i16 { +block0(v0: i16): + v1 = bitrev v0 + return v1 +} + +; VCode: +; block0: +; slli a2,a0,8 +; srli a4,a0,8 +; slli a1,a4,56 +; srli a3,a1,56 +; or a4,a2,a3 +; slli a1,a4,16 +; srli a2,a0,16 +; slli a4,a2,8 +; srli a2,a2,8 +; slli a2,a2,56 +; srli a5,a2,56 +; or a2,a4,a5 +; slli a2,a2,48 +; srli a4,a2,48 +; or a1,a1,a4 +; slli a2,a1,32 +; srli a5,a0,32 +; slli a0,a5,8 +; srli a3,a5,8 +; slli a4,a3,56 +; srli a1,a4,56 +; or a3,a0,a1 +; slli a4,a3,16 +; srli a0,a5,16 +; slli a3,a0,8 +; srli a5,a0,8 +; slli a0,a5,56 +; srli a5,a0,56 +; or a5,a3,a5 +; slli a0,a5,48 +; srli a3,a0,48 +; or a4,a4,a3 +; slli a0,a4,32 +; srli a3,a0,32 +; or a4,a2,a3 +; brev8 a3,a4##tmp=a0 tmp2=a1 step=a2 ty=i64 +; srli a0,a3,48 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; slli a2, a0, 8 +; srli a4, a0, 8 +; slli a1, a4, 0x38 +; srli a3, a1, 0x38 +; or a4, a2, a3 +; slli a1, a4, 0x10 +; srli a2, a0, 0x10 +; slli a4, a2, 8 +; srli a2, a2, 8 +; slli a2, a2, 0x38 +; srli a5, a2, 0x38 +; or a2, a4, a5 +; slli a2, a2, 0x30 +; srli a4, a2, 0x30 +; or a1, a1, a4 +; slli a2, a1, 0x20 +; srli a5, a0, 0x20 +; slli a0, a5, 8 +; srli a3, a5, 8 +; slli a4, a3, 0x38 +; srli a1, a4, 0x38 +; or a3, a0, a1 +; slli a4, a3, 0x10 +; srli a0, a5, 0x10 +; slli a3, a0, 8 +; srli a5, a0, 8 +; slli a0, a5, 0x38 +; srli a5, a0, 0x38 +; or a5, a3, a5 +; slli a0, a5, 0x30 +; srli a3, a0, 0x30 +; or a4, a4, a3 +; slli a0, a4, 0x20 +; srli a3, a0, 0x20 +; or a4, a2, a3 +; mv a3, zero +; addi a2, zero, 0x40 +; addi a0, zero, 1 +; slli a0, a0, 0x3f +; addi a1, zero, 1 +; slli a1, a1, 0x38 +; blez a2, 0x34 +; and t5, a0, a4 +; beq zero, t5, 8 +; or a3, a3, a1 +; addi a2, a2, -1 +; srli a0, a0, 1 +; addi t5, zero, 8 +; rem t5, a2, t5 +; bnez t5, 0xc +; srli a1, a1, 0xf +; j -0x28 +; slli a1, a1, 1 +; j -0x30 +; srli a0, a3, 0x30 +; ret + +function %bitrev_i32(i32) -> i32 { +block0(v0: i32): + v1 = bitrev v0 + return v1 +} + +; VCode: +; block0: +; slli a2,a0,8 +; srli a4,a0,8 +; slli a1,a4,56 +; srli a3,a1,56 +; or a4,a2,a3 +; slli a1,a4,16 +; srli a2,a0,16 +; slli a4,a2,8 +; srli a2,a2,8 +; slli a2,a2,56 +; srli a5,a2,56 +; or a2,a4,a5 +; slli a2,a2,48 +; srli a4,a2,48 +; or a1,a1,a4 +; slli a2,a1,32 +; srli a5,a0,32 +; slli a0,a5,8 +; srli a3,a5,8 +; slli a4,a3,56 +; srli a1,a4,56 +; or a3,a0,a1 +; slli a4,a3,16 +; srli a0,a5,16 +; slli a3,a0,8 +; srli a5,a0,8 +; slli a0,a5,56 +; srli a5,a0,56 +; or a5,a3,a5 +; slli a0,a5,48 +; srli a3,a0,48 +; or a4,a4,a3 +; slli a0,a4,32 +; srli a3,a0,32 +; or a4,a2,a3 +; brev8 a3,a4##tmp=a0 tmp2=a1 step=a2 ty=i64 +; srli a0,a3,32 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; slli a2, a0, 8 +; srli a4, a0, 8 +; slli a1, a4, 0x38 +; srli a3, a1, 0x38 +; or a4, a2, a3 +; slli a1, a4, 0x10 +; srli a2, a0, 0x10 +; slli a4, a2, 8 +; srli a2, a2, 8 +; slli a2, a2, 0x38 +; srli a5, a2, 0x38 +; or a2, a4, a5 +; slli a2, a2, 0x30 +; srli a4, a2, 0x30 +; or a1, a1, a4 +; slli a2, a1, 0x20 +; srli a5, a0, 0x20 +; slli a0, a5, 8 +; srli a3, a5, 8 +; slli a4, a3, 0x38 +; srli a1, a4, 0x38 +; or a3, a0, a1 +; slli a4, a3, 0x10 +; srli a0, a5, 0x10 +; slli a3, a0, 8 +; srli a5, a0, 8 +; slli a0, a5, 0x38 +; srli a5, a0, 0x38 +; or a5, a3, a5 +; slli a0, a5, 0x30 +; srli a3, a0, 0x30 +; or a4, a4, a3 +; slli a0, a4, 0x20 +; srli a3, a0, 0x20 +; or a4, a2, a3 +; mv a3, zero +; addi a2, zero, 0x40 +; addi a0, zero, 1 +; slli a0, a0, 0x3f +; addi a1, zero, 1 +; slli a1, a1, 0x38 +; blez a2, 0x34 +; and t5, a0, a4 +; beq zero, t5, 8 +; or a3, a3, a1 +; addi a2, a2, -1 +; srli a0, a0, 1 +; addi t5, zero, 8 +; rem t5, a2, t5 +; bnez t5, 0xc +; srli a1, a1, 0xf +; j -0x28 +; slli a1, a1, 1 +; j -0x30 +; srli a0, a3, 0x20 +; ret + +function %bitrev_i64(i64) -> i64 { +block0(v0: i64): + v1 = bitrev v0 + return v1 +} + +; VCode: +; block0: +; slli a2,a0,8 +; srli a4,a0,8 +; slli a1,a4,56 +; srli a3,a1,56 +; or a4,a2,a3 +; slli a1,a4,16 +; srli a2,a0,16 +; slli a4,a2,8 +; srli a2,a2,8 +; slli a2,a2,56 +; srli a5,a2,56 +; or a2,a4,a5 +; slli a2,a2,48 +; srli a4,a2,48 +; or a1,a1,a4 +; slli a2,a1,32 +; srli a5,a0,32 +; slli a0,a5,8 +; srli a3,a5,8 +; slli a4,a3,56 +; srli a1,a4,56 +; or a3,a0,a1 +; slli a4,a3,16 +; srli a0,a5,16 +; slli a3,a0,8 +; srli a5,a0,8 +; slli a0,a5,56 +; srli a5,a0,56 +; or a5,a3,a5 +; slli a0,a5,48 +; srli a3,a0,48 +; or a4,a4,a3 +; slli a0,a4,32 +; srli a3,a0,32 +; or a4,a2,a3 +; brev8 a0,a4##tmp=a3 tmp2=a1 step=a2 ty=i64 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; slli a2, a0, 8 +; srli a4, a0, 8 +; slli a1, a4, 0x38 +; srli a3, a1, 0x38 +; or a4, a2, a3 +; slli a1, a4, 0x10 +; srli a2, a0, 0x10 +; slli a4, a2, 8 +; srli a2, a2, 8 +; slli a2, a2, 0x38 +; srli a5, a2, 0x38 +; or a2, a4, a5 +; slli a2, a2, 0x30 +; srli a4, a2, 0x30 +; or a1, a1, a4 +; slli a2, a1, 0x20 +; srli a5, a0, 0x20 +; slli a0, a5, 8 +; srli a3, a5, 8 +; slli a4, a3, 0x38 +; srli a1, a4, 0x38 +; or a3, a0, a1 +; slli a4, a3, 0x10 +; srli a0, a5, 0x10 +; slli a3, a0, 8 +; srli a5, a0, 8 +; slli a0, a5, 0x38 +; srli a5, a0, 0x38 +; or a5, a3, a5 +; slli a0, a5, 0x30 +; srli a3, a0, 0x30 +; or a4, a4, a3 +; slli a0, a4, 0x20 +; srli a3, a0, 0x20 +; or a4, a2, a3 +; mv a0, zero +; addi a2, zero, 0x40 +; addi a3, zero, 1 +; slli a3, a3, 0x3f +; addi a1, zero, 1 +; slli a1, a1, 0x38 +; blez a2, 0x34 +; and t5, a3, a4 +; beq zero, t5, 8 +; or a0, a0, a1 +; addi a2, a2, -1 +; srli a3, a3, 1 +; addi t5, zero, 8 +; rem t5, a2, t5 +; bnez t5, 0xc +; srli a1, a1, 0xf +; j -0x28 +; slli a1, a1, 1 +; j -0x30 +; ret + +function %bitrev_i128(i128) -> i128 { +block0(v0: i128): + v1 = bitrev v0 + return v1 +} + +; VCode: +; addi sp,sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; sd s3,-8(sp) +; addi sp,sp,-16 +; block0: +; slli a3,a1,8 +; srli a5,a1,8 +; slli a2,a5,56 +; srli a4,a2,56 +; or a5,a3,a4 +; slli a2,a5,16 +; srli a3,a1,16 +; slli a5,a3,8 +; srli a3,a3,8 +; slli a3,a3,56 +; srli a3,a3,56 +; or a3,a5,a3 +; slli a3,a3,48 +; srli a5,a3,48 +; or a2,a2,a5 +; slli a3,a2,32 +; srli a1,a1,32 +; slli a2,a1,8 +; srli a4,a1,8 +; slli a5,a4,56 +; srli a4,a5,56 +; or a4,a2,a4 +; slli a5,a4,16 +; srli a1,a1,16 +; slli a4,a1,8 +; srli a1,a1,8 +; slli a1,a1,56 +; srli a1,a1,56 +; or a1,a4,a1 +; slli a1,a1,48 +; srli a4,a1,48 +; or a5,a5,a4 +; slli a1,a5,32 +; srli a4,a1,32 +; or a5,a3,a4 +; brev8 a4,a5##tmp=a1 tmp2=a2 step=a3 ty=i64 +; mv s3,a4 +; slli a1,a0,8 +; srli a2,a0,8 +; slli a4,a2,56 +; srli a2,a4,56 +; or a2,a1,a2 +; slli a4,a2,16 +; srli a1,a0,16 +; slli a2,a1,8 +; srli a5,a1,8 +; slli a1,a5,56 +; srli a3,a1,56 +; or a5,a2,a3 +; slli a1,a5,48 +; srli a2,a1,48 +; or a4,a4,a2 +; slli a1,a4,32 +; srli a3,a0,32 +; slli a4,a3,8 +; srli a0,a3,8 +; slli a2,a0,56 +; srli a5,a2,56 +; or a0,a4,a5 +; slli a2,a0,16 +; srli a4,a3,16 +; slli a0,a4,8 +; srli a3,a4,8 +; slli a4,a3,56 +; srli a3,a4,56 +; or a3,a0,a3 +; slli a4,a3,48 +; srli a0,a4,48 +; or a2,a2,a0 +; slli a4,a2,32 +; srli a0,a4,32 +; or a2,a1,a0 +; brev8 a1,a2##tmp=a4 tmp2=a5 step=a0 ty=i64 +; mv a0,s3 +; addi sp,sp,16 +; ld s3,-8(sp) +; ld ra,8(sp) +; ld fp,0(sp) +; addi sp,sp,16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; sd s3, -8(sp) +; addi sp, sp, -0x10 +; block1: ; offset 0x18 +; slli a3, a1, 8 +; srli a5, a1, 8 +; slli a2, a5, 0x38 +; srli a4, a2, 0x38 +; or a5, a3, a4 +; slli a2, a5, 0x10 +; srli a3, a1, 0x10 +; slli a5, a3, 8 +; srli a3, a3, 8 +; slli a3, a3, 0x38 +; srli a3, a3, 0x38 +; or a3, a5, a3 +; slli a3, a3, 0x30 +; srli a5, a3, 0x30 +; or a2, a2, a5 +; slli a3, a2, 0x20 +; srli a1, a1, 0x20 +; slli a2, a1, 8 +; srli a4, a1, 8 +; slli a5, a4, 0x38 +; srli a4, a5, 0x38 +; or a4, a2, a4 +; slli a5, a4, 0x10 +; srli a1, a1, 0x10 +; slli a4, a1, 8 +; srli a1, a1, 8 +; slli a1, a1, 0x38 +; srli a1, a1, 0x38 +; or a1, a4, a1 +; slli a1, a1, 0x30 +; srli a4, a1, 0x30 +; or a5, a5, a4 +; slli a1, a5, 0x20 +; srli a4, a1, 0x20 +; or a5, a3, a4 +; mv a4, zero +; addi a3, zero, 0x40 +; addi a1, zero, 1 +; slli a1, a1, 0x3f +; addi a2, zero, 1 +; slli a2, a2, 0x38 +; blez a3, 0x34 +; and t5, a1, a5 +; beq zero, t5, 8 +; or a4, a4, a2 +; addi a3, a3, -1 +; srli a1, a1, 1 +; addi t5, zero, 8 +; rem t5, a3, t5 +; bnez t5, 0xc +; srli a2, a2, 0xf +; j -0x28 +; slli a2, a2, 1 +; j -0x30 +; mv s3, a4 +; slli a1, a0, 8 +; srli a2, a0, 8 +; slli a4, a2, 0x38 +; srli a2, a4, 0x38 +; or a2, a1, a2 +; slli a4, a2, 0x10 +; srli a1, a0, 0x10 +; slli a2, a1, 8 +; srli a5, a1, 8 +; slli a1, a5, 0x38 +; srli a3, a1, 0x38 +; or a5, a2, a3 +; slli a1, a5, 0x30 +; srli a2, a1, 0x30 +; or a4, a4, a2 +; slli a1, a4, 0x20 +; srli a3, a0, 0x20 +; slli a4, a3, 8 +; srli a0, a3, 8 +; slli a2, a0, 0x38 +; srli a5, a2, 0x38 +; or a0, a4, a5 +; slli a2, a0, 0x10 +; srli a4, a3, 0x10 +; slli a0, a4, 8 +; srli a3, a4, 8 +; slli a4, a3, 0x38 +; srli a3, a4, 0x38 +; or a3, a0, a3 +; slli a4, a3, 0x30 +; srli a0, a4, 0x30 +; or a2, a2, a0 +; slli a4, a2, 0x20 +; srli a0, a4, 0x20 +; or a2, a1, a0 +; mv a1, zero +; addi a0, zero, 0x40 +; addi a4, zero, 1 +; slli a4, a4, 0x3f +; addi a5, zero, 1 +; slli a5, a5, 0x38 +; blez a0, 0x34 +; and t5, a4, a2 +; beq zero, t5, 8 +; or a1, a1, a5 +; addi a0, a0, -1 +; srli a4, a4, 1 +; addi t5, zero, 8 +; rem t5, a0, t5 +; bnez t5, 0xc +; srli a5, a5, 0xf +; j -0x28 +; slli a5, a5, 1 +; j -0x30 +; mv a0, s3 +; addi sp, sp, 0x10 +; ld s3, -8(sp) +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/runtests/bitrev.clif b/cranelift/filetests/filetests/runtests/bitrev.clif index fff70f7b435e..4bcadd553224 100644 --- a/cranelift/filetests/filetests/runtests/bitrev.clif +++ b/cranelift/filetests/filetests/runtests/bitrev.clif @@ -4,6 +4,9 @@ target aarch64 target s390x target x86_64 target riscv64 +target riscv64 has_zbb +target riscv64 has_zbkb +target riscv64 has_zbb has_zbkb target riscv64 has_c has_zcb function %bitrev_i8(i8) -> i8 { diff --git a/cranelift/filetests/filetests/runtests/i128-bitrev.clif b/cranelift/filetests/filetests/runtests/i128-bitrev.clif index a3bc89eca2d2..c53cf0ba30e5 100644 --- a/cranelift/filetests/filetests/runtests/i128-bitrev.clif +++ b/cranelift/filetests/filetests/runtests/i128-bitrev.clif @@ -4,6 +4,9 @@ target aarch64 target s390x target x86_64 target riscv64 +target riscv64 has_zbb +target riscv64 has_zbkb +target riscv64 has_zbb has_zbkb target riscv64 has_c has_zcb function %reverse_bits_zero() -> i8 {