Skip to content

Commit

Permalink
riscv64: Add remaining Zfa Instructions (#8582)
Browse files Browse the repository at this point in the history
* riscv64: Add `fround` instruction

* riscv64: Remove unused load_fp functions

* riscv64: Add support for `fli` instruction

* riscv64: Add negated `fli` rules
  • Loading branch information
afonso360 authored May 8, 2024
1 parent d51b5ae commit d89e2b3
Show file tree
Hide file tree
Showing 14 changed files with 975 additions and 53 deletions.
63 changes: 57 additions & 6 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
(rd WritableReg)
(imm Imm20))

(Fli
(ty Type)
(imm FliConstant)
(rd WritableReg))

;; An ALU operation with one register sources and a register destination.
(FpuRR
(alu_op FpuOPRR)
Expand Down Expand Up @@ -411,6 +416,8 @@
(QNaN)
))

(type FliConstant (primitive FliConstant))

(type FpuOPRR (enum
;; RV32F Standard Extension
(FsqrtS)
Expand Down Expand Up @@ -447,8 +454,10 @@
(FcvtWuD)
(FcvtDW)
(FcvtDWU)
;; bitmapip

;; Zfa Extension
(FroundS)
(FroundD)
))

(type LoadOP (enum
Expand Down Expand Up @@ -1549,6 +1558,20 @@
(rule (rv_fmaxm $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FmaxmS) $F32 (FRM.RUP) rs1 rs2))
(rule (rv_fmaxm $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FmaxmD) $F64 (FRM.RUP) rs1 rs2))

;; Helper for emitting the `fround` instruction.
(decl rv_fround (Type FRM FReg) FReg)
(rule (rv_fround $F32 frm rs) (fpu_rr (FpuOPRR.FroundS) $F32 frm rs))
(rule (rv_fround $F64 frm rs) (fpu_rr (FpuOPRR.FroundD) $F64 frm rs))

;; Helper for emitting the `fli` instruction.
(decl rv_fli (Type FliConstant) FReg)
(rule (rv_fli ty imm)
(let ((dst WritableFReg (temp_writable_freg))
(_ Unit (emit (MInst.Fli ty
imm
dst))))
dst))

;; `Zba` Extension Instructions

;; Helper for emitting the `adduw` ("Add Unsigned Word") instruction.
Expand Down Expand Up @@ -1778,6 +1801,14 @@



;; Helper for generating a FliConstant from a u64 constant
(decl pure partial fli_constant_from_u64 (Type u64) FliConstant)
(extern constructor fli_constant_from_u64 fli_constant_from_u64)

;; Helper for generating a FliConstant from a u64 negated constant
(decl pure partial fli_constant_from_negated_u64 (Type u64) FliConstant)
(extern constructor fli_constant_from_negated_u64 fli_constant_from_negated_u64)

;; Helper for generating a i64 from a pair of Imm20 and Imm12 constants
(decl i64_generate_imm (Imm20 Imm12) i64)
(extern extractor i64_generate_imm i64_generate_imm)
Expand All @@ -1795,14 +1826,30 @@
;; TODO: Load floats using `fld` instead of `ld`
(decl imm (Type u64) Reg)

;; Refs get loaded as integers.
(rule 5 (imm $R32 c) (imm $I32 c))
(rule 5 (imm $R64 c) (imm $I64 c))
;; If Zfa is enabled, we can load certain constants with the `fli` instruction.
(rule 7 (imm (ty_scalar_float ty) imm)
(if-let $true (has_zfa))
(if-let const (fli_constant_from_u64 ty imm))
(rv_fli ty const))

;; Floats get loaded as integers and then moved into an F register.
;; It is beneficial to load the negated constant with `fli` and then negate it
;; in a register.
;;
;; For f64's this saves one instruction, and for f32's it avoids
;; having to allocate an integer register, reducing integer register pressure.
(rule 6 (imm (ty_scalar_float ty) imm)
(if-let $true (has_zfa))
(if-let const (fli_constant_from_negated_u64 ty imm))
(rv_fneg ty (rv_fli ty const)))

;; Otherwise floats get loaded as integers and then moved into an F register.
(rule 5 (imm $F32 c) (gen_bitcast (imm $I32 c) $I32 $F32))
(rule 5 (imm $F64 c) (gen_bitcast (imm $I64 c) $I64 $F64))

;; Refs get loaded as integers.
(rule 5 (imm $R32 c) (imm $I32 c))
(rule 5 (imm $R64 c) (imm $I64 c))

;; Try to match just an imm12
(rule 4 (imm (ty_int ty) c)
(if-let (i64_generate_imm (imm20_is_zero) imm12) (i64_sextend_u64 ty c))
Expand Down Expand Up @@ -2470,7 +2517,7 @@
(rule (float_round_fcvt $F64 frm rs) (rv_fcvtdl frm (rv_fcvtld frm rs)))

(decl gen_float_round (FRM FReg Type) FReg)
(rule (gen_float_round frm rs ty)
(rule 0 (gen_float_round frm rs ty)
(let (;; if rs is NaN/+-Infinity/+-Zero or if the exponent is larger than # of bits
;; in mantissa, the result is the same as src, check for these cases first.
(max FReg (imm ty (float_int_max ty)))
Expand All @@ -2491,6 +2538,10 @@
;; Check if the value cannot be rounded exactly and return the source input if so
(gen_select_freg (cmp_eqz exact) corrected_nan rounded)))

;; With Zfa we can use the dedicated `fround` instruction.
(rule 1 (gen_float_round frm rs ty)
(if-let $true (has_zfa))
(rv_fround ty frm rs))



Expand Down
130 changes: 119 additions & 11 deletions cranelift/codegen/src/isa/riscv64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,116 @@ impl IntegerCompare {
}
}

#[derive(Debug, Clone, Copy, PartialEq)]
pub struct FliConstant(u8);

impl FliConstant {
pub(crate) fn new(value: u8) -> Self {
debug_assert!(value <= 31, "Invalid FliConstant: {}", value);
Self(value)
}

pub(crate) fn maybe_from_u64(ty: Type, imm: u64) -> Option<Self> {
// Convert the value into an F64, this allows us to represent
// values from both f32 and f64 in the same value.
let value = match ty {
F32 => f32::from_bits(imm as u32) as f64,
F64 => f64::from_bits(imm),
_ => unimplemented!(),
};

Some(match (ty, value) {
(_, f) if f == -1.0 => Self::new(0),

// Since f64 can represent all f32 values, f32::min_positive won't be
// the same as f64::min_positive, so we need to check for both indepenendtly
(F32, f) if f == (f32::MIN_POSITIVE as f64) => Self::new(1),
(F64, f) if f == f64::MIN_POSITIVE => Self::new(1),

(_, f) if f == 2.0f64.powi(-16) => Self::new(2),
(_, f) if f == 2.0f64.powi(-15) => Self::new(3),
(_, f) if f == 2.0f64.powi(-8) => Self::new(4),
(_, f) if f == 2.0f64.powi(-7) => Self::new(5),
(_, f) if f == 0.0625 => Self::new(6),
(_, f) if f == 0.125 => Self::new(7),
(_, f) if f == 0.25 => Self::new(8),
(_, f) if f == 0.3125 => Self::new(9),
(_, f) if f == 0.375 => Self::new(10),
(_, f) if f == 0.4375 => Self::new(11),
(_, f) if f == 0.5 => Self::new(12),
(_, f) if f == 0.625 => Self::new(13),
(_, f) if f == 0.75 => Self::new(14),
(_, f) if f == 0.875 => Self::new(15),
(_, f) if f == 1.0 => Self::new(16),
(_, f) if f == 1.25 => Self::new(17),
(_, f) if f == 1.5 => Self::new(18),
(_, f) if f == 1.75 => Self::new(19),
(_, f) if f == 2.0 => Self::new(20),
(_, f) if f == 2.5 => Self::new(21),
(_, f) if f == 3.0 => Self::new(22),
(_, f) if f == 4.0 => Self::new(23),
(_, f) if f == 8.0 => Self::new(24),
(_, f) if f == 16.0 => Self::new(25),
(_, f) if f == 128.0 => Self::new(26),
(_, f) if f == 256.0 => Self::new(27),
(_, f) if f == 32768.0 => Self::new(28),
(_, f) if f == 65536.0 => Self::new(29),
(_, f) if f == f64::INFINITY => Self::new(30),

// NaN's are not guaranteed to preserve the sign / payload bits, so we need to check
// the original bits directly.
(F32, f) if f.is_nan() && imm == 0x7fc0_0000 => Self::new(31), // Canonical NaN
(F64, f) if f.is_nan() && imm == 0x7ff8_0000_0000_0000 => Self::new(31), // Canonical NaN
_ => return None,
})
}

pub(crate) fn format(self) -> &'static str {
// The preferred assembly syntax for entries 1, 30, and 31 is min, inf, and nan, respectively.
// For entries 0 through 29 (including entry 1), the assembler will accept decimal constants
// in C-like syntax.
match self.0 {
0 => "-1.0",
1 => "min",
2 => "2^-16",
3 => "2^-15",
4 => "2^-8",
5 => "2^-7",
6 => "0.0625",
7 => "0.125",
8 => "0.25",
9 => "0.3125",
10 => "0.375",
11 => "0.4375",
12 => "0.5",
13 => "0.625",
14 => "0.75",
15 => "0.875",
16 => "1.0",
17 => "1.25",
18 => "1.5",
19 => "1.75",
20 => "2.0",
21 => "2.5",
22 => "3.0",
23 => "4.0",
24 => "8.0",
25 => "16.0",
26 => "128.0",
27 => "256.0",
28 => "32768.0",
29 => "65536.0",
30 => "inf",
31 => "nan",
_ => panic!("Invalid FliConstant"),
}
}

pub(crate) fn bits(self) -> u8 {
self.0
}
}

impl FpuOPRRRR {
pub(crate) fn op_name(self) -> &'static str {
match self {
Expand Down Expand Up @@ -376,6 +486,8 @@ impl FpuOPRR {
Self::FcvtWuD => "fcvt.wu.d",
Self::FcvtDW => "fcvt.d.w",
Self::FcvtDWU => "fcvt.d.wu",
Self::FroundS => "fround.s",
Self::FroundD => "fround.d",
}
}

Expand All @@ -392,14 +504,6 @@ impl FpuOPRR {
_ => false,
}
}
// move from x register to float register.
pub(crate) fn move_x_to_f_op(ty: Type) -> Self {
match ty {
F32 => Self::FmvWX,
F64 => Self::FmvDX,
_ => unreachable!("ty:{:?}", ty),
}
}

pub(crate) fn op_code(self) -> u32 {
match self {
Expand Down Expand Up @@ -428,7 +532,9 @@ impl FpuOPRR {
| FpuOPRR::FcvtWD
| FpuOPRR::FcvtWuD
| FpuOPRR::FcvtDW
| FpuOPRR::FcvtDWU => 0b1010011,
| FpuOPRR::FcvtDWU
| FpuOPRR::FroundS
| FpuOPRR::FroundD => 0b1010011,
}
}

Expand Down Expand Up @@ -460,6 +566,8 @@ impl FpuOPRR {
FpuOPRR::FcvtDW => 0b00000,
FpuOPRR::FcvtDWU => 0b00001,
FpuOPRR::FsqrtD => 0b00000,
FpuOPRR::FroundS => 0b00100,
FpuOPRR::FroundD => 0b00100,
}
}
pub(crate) fn funct7(self) -> u32 {
Expand All @@ -482,8 +590,8 @@ impl FpuOPRR {
FpuOPRR::FcvtDL => 0b1101001,
FpuOPRR::FcvtDLu => 0b1101001,
FpuOPRR::FmvDX => 0b1111001,
FpuOPRR::FcvtSD => 0b0100000,
FpuOPRR::FcvtDS => 0b0100001,
FpuOPRR::FcvtSD | FpuOPRR::FroundS => 0b0100000,
FpuOPRR::FcvtDS | FpuOPRR::FroundD => 0b0100001,
FpuOPRR::FclassD => 0b1110001,
FpuOPRR::FcvtWD => 0b1100001,
FpuOPRR::FcvtWuD => 0b1100001,
Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ impl Inst {
| Inst::Nop4
| Inst::BrTable { .. }
| Inst::Auipc { .. }
| Inst::Fli { .. }
| Inst::Lui { .. }
| Inst::LoadInlineConst { .. }
| Inst::AluRRR { .. }
Expand Down Expand Up @@ -875,6 +876,9 @@ impl Inst {
let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12);
sink.put4(x);
}
&Inst::Fli { rd, ty, imm } => {
sink.put4(encode_fli(ty, imm, rd));
}
&Inst::LoadInlineConst { rd, ty, imm } => {
let data = &imm.to_le_bytes()[..ty.bytes() as usize];

Expand Down
20 changes: 20 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2084,6 +2084,26 @@ fn test_riscv64_binemit() {
0x22b59553,
));

insns.push(TestUnit::new(
Inst::Fli {
ty: F32,
rd: writable_fa0(),
imm: FliConstant::new(0),
},
"fli.s fa0,-1.0",
0xf0100553,
));

insns.push(TestUnit::new(
Inst::Fli {
ty: F64,
rd: writable_fa0(),
imm: FliConstant::new(13),
},
"fli.d fa0,0.625",
0xf2168553,
));

let (flags, isa_flags) = make_test_flags();
let emit_info = EmitInfo::new(flags, isa_flags);

Expand Down
21 changes: 21 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -652,3 +652,24 @@ pub fn encode_zcbmem_load(op: ZcbMemOp, rd: WritableReg, base: Reg, imm: Uimm2)
pub fn encode_zcbmem_store(op: ZcbMemOp, src: Reg, base: Reg, imm: Uimm2) -> u16 {
encode_zcbmem_bits(op, src, base, imm)
}

pub fn encode_fli(ty: Type, imm: FliConstant, rd: WritableReg) -> u32 {
// FLI.{S,D} is encoded as a FMV.{W,D} instruction with rs2 set to the
// immediate value to be loaded.
let op = match ty {
F32 => FpuOPRR::FmvWX,
F64 => FpuOPRR::FmvDX,
_ => unreachable!(),
};
let frm = 0; // FRM is hard coded to 0 in both instructions
let rs2 = 1; // rs2 set to 1 is what differentiates FLI from FMV

let mut bits = 0;
bits |= unsigned_field_width(op.op_code(), 7);
bits |= reg_to_gpr_num(rd.to_reg()) << 7;
bits |= unsigned_field_width(frm, 3) << 12;
bits |= unsigned_field_width(imm.bits() as u32, 5) << 15;
bits |= unsigned_field_width(rs2, 6) << 20;
bits |= unsigned_field_width(op.funct7(), 7) << 25;
bits
}
Loading

0 comments on commit d89e2b3

Please sign in to comment.