Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

riscv64: Add fmsub/fnmsub/fnmadd instruction lowerings #8588

Merged
merged 1 commit into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1372,6 +1372,21 @@
(rule (rv_fmadd $F32 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddS) $F32 frm rs1 rs2 rs3))
(rule (rv_fmadd $F64 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddD) $F64 frm rs1 rs2 rs3))

;; Helper for emitting the `fmsub` instruction.
(decl rv_fmsub (Type FRM FReg FReg FReg) FReg)
(rule (rv_fmsub $F32 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmsubS) $F32 frm rs1 rs2 rs3))
(rule (rv_fmsub $F64 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmsubD) $F64 frm rs1 rs2 rs3))

;; Helper for emitting the `fnmadd` instruction.
(decl rv_fnmadd (Type FRM FReg FReg FReg) FReg)
(rule (rv_fnmadd $F32 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FnmaddS) $F32 frm rs1 rs2 rs3))
(rule (rv_fnmadd $F64 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FnmaddD) $F64 frm rs1 rs2 rs3))

;; Helper for emitting the `fnmsub` instruction.
(decl rv_fnmsub (Type FRM FReg FReg FReg) FReg)
(rule (rv_fnmsub $F32 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FnmsubS) $F32 frm rs1 rs2 rs3))
(rule (rv_fnmsub $F64 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FnmsubD) $F64 frm rs1 rs2 rs3))

;; Helper for emitting the `fmv.x.w` instruction.
(decl rv_fmvxw (FReg) XReg)
(rule (rv_fmvxw r) (fpu_rr (FpuOPRR.FmvXW) $I32 (FRM.RNE) r))
Expand Down
30 changes: 22 additions & 8 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1508,41 +1508,55 @@
(rv_vfsgnj_vf x y (unmasked) ty))

;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; fmadd: rs1 * rs2 + rs3
(rule 0 (lower (has_type (ty_scalar_float ty) (fma x y z)))
(rv_fmadd ty (FRM.RNE) x y z))

;; fmsub: rs1 * rs2 - rs3
(rule 1 (lower (has_type (ty_scalar_float ty) (fma x y (fneg z))))
(rv_fmsub ty (FRM.RNE) x y z))

;; fnmsub: -rs1 * rs2 + rs3
(rule 2 (lower (has_type (ty_scalar_float ty) (fma (fneg x) y z)))
(rv_fnmsub ty (FRM.RNE) x y z))

;; fnmadd: -rs1 * rs2 - rs3
(rule 3 (lower (has_type (ty_scalar_float ty) (fma (fneg x) y (fneg z))))
(rv_fnmadd ty (FRM.RNE) x y z))

;; (fma x y z) computes x * y + z
;; vfmacc computes vd[i] = +(vs1[i] * vs2[i]) + vd[i]
;; We need to reverse the order of the arguments

(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fma x y z)))
(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (fma x y z)))
(rv_vfmacc_vv z y x (unmasked) ty))

(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fma (splat x) y z)))
(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (fma (splat x) y z)))
(rv_vfmacc_vf z y x (unmasked) ty))

;; vfmsac computes vd[i] = +(vs1[i] * vs2[i]) - vd[i]

(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fma x y (fneg z))))
(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (fma x y (fneg z))))
(rv_vfmsac_vv z y x (unmasked) ty))

(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (fma (splat x) y (fneg z))))
(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (fma (splat x) y (fneg z))))
(rv_vfmsac_vf z y x (unmasked) ty))

;; vfnmacc computes vd[i] = -(vs1[i] * vs2[i]) - vd[i]

(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg x) y (fneg z))))
(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg x) y (fneg z))))
(rv_vfnmacc_vv z y x (unmasked) ty))

(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg (splat x)) y (fneg z))))
(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg (splat x)) y (fneg z))))
(rv_vfnmacc_vf z y x (unmasked) ty))

;; vfnmsac computes vd[i] = -(vs1[i] * vs2[i]) + vd[i]

(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg x) y z)))
(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg x) y z)))
(rv_vfnmsac_vv z y x (unmasked) ty))

(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg (splat x)) y z)))
(rule 8 (lower (has_type (ty_vec_fits_in_register ty) (fma (fneg (splat x)) y z)))
(rv_vfnmsac_vf z y x (unmasked) ty))


Expand Down
32 changes: 0 additions & 32 deletions cranelift/filetests/filetests/isa/riscv64/float.clif
Original file line number Diff line number Diff line change
Expand Up @@ -259,38 +259,6 @@ block0(v0: f64):
; ret


function %f29(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fma v0, v1, v2
return v3
}

; VCode:
; block0:
; fmadd.s fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmadd.s fa0, fa0, fa1, fa2, rne
; ret

function %f30(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fma v0, v1, v2
return v3
}

; VCode:
; block0:
; fmadd.d fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmadd.d fa0, fa0, fa1, fa2, rne
; ret

function %f31(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fcopysign v0, v1
Expand Down
142 changes: 142 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/fma.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
test compile precise-output
set unwind_info=false
target riscv64


function %fmadd_f32(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fma v0, v1, v2
return v3
}

; VCode:
; block0:
; fmadd.s fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmadd.s fa0, fa0, fa1, fa2, rne
; ret

function %fmadd_f64(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fma v0, v1, v2
return v3
}

; VCode:
; block0:
; fmadd.d fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmadd.d fa0, fa0, fa1, fa2, rne
; ret


function %fmsub_f32(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fneg v2
v4 = fma v0, v1, v3
return v4
}

; VCode:
; block0:
; fmsub.s fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmsub.s fa0, fa0, fa1, fa2, rne
; ret

function %fmsub_f64(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fneg v2
v4 = fma v0, v1, v3
return v4
}

; VCode:
; block0:
; fmsub.d fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmsub.d fa0, fa0, fa1, fa2, rne
; ret

function %fnmsub_f32(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fneg v0
v4 = fma v3, v1, v2
return v4
}

; VCode:
; block0:
; fnmsub.s fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fnmsub.s fa0, fa0, fa1, fa2, rne
; ret

function %fnmsub_f64(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fneg v0
v4 = fma v3, v1, v2
return v4
}

; VCode:
; block0:
; fnmsub.d fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fnmsub.d fa0, fa0, fa1, fa2, rne
; ret

function %fnmadd_f32(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fneg v0
v4 = fneg v2
v5 = fma v3, v1, v4
return v5
}

; VCode:
; block0:
; fnmadd.s fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fnmadd.s fa0, fa0, fa1, fa2, rne
; ret

function %fnmadd_f64(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fneg v0
v4 = fneg v2
v5 = fma v3, v1, v4
return v5
}

; VCode:
; block0:
; fnmadd.d fa0,fa0,fa1,fa2,rne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fnmadd.d fa0, fa0, fa1, fa2, rne
; ret

70 changes: 70 additions & 0 deletions cranelift/filetests/filetests/runtests/fma.clif
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,73 @@ block0(v0: f32, v1: f32, v2: f32):
return v4
}
; run: %fma_load_f32(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6




function %fmsub_f32(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fneg v2
v4 = fma v0, v1, v3
return v4
}
; run: %fmsub_f32(0x83.0, 0x2.68091p6, 0x9.88721p1) == 0x1.3af060p14
; run: %fmsub_f32(-0x83.0, 0x2.68091p6, 0x9.88721p1) == -0x1.3b88e6p14
; run: %fmsub_f32(-0x83.0, 0x2.68091p6, -0x9.88721p1) == -0x1.3af060p14


function %fmsub_f64(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fneg v2
v4 = fma v0, v1, v3
return v4
}
; run: %fmsub_f64(0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == 0x1.7ba5badfd4333p21
; run: %fmsub_f64(-0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == -0x1.7ba6ebee17417p21
; run: %fmsub_f64(-0x1.3b88ea148dd4ap14, 0x2.680916809121p6, -0x9.887218721837p1) == -0x1.7ba5badfd4333p21



function %fnmsub_f32(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fneg v0
v4 = fma v3, v1, v2
return v4
}
; run: %fnmsub_f32(0x83.0, 0x2.68091p6, 0x9.88721p1) == -0x1.3af060p14
; run: %fnmsub_f32(-0x83.0, 0x2.68091p6, 0x9.88721p1) == 0x1.3b88e6p14
; run: %fnmsub_f32(-0x83.0, 0x2.68091p6, -0x9.88721p1) == 0x1.3af060p14

function %fnmsub_f64(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fneg v0
v4 = fma v3, v1, v2
return v4
}
; run: %fnmsub_f64(0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == -0x1.7ba5badfd4333p21
; run: %fnmsub_f64(-0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == 0x1.7ba6ebee17417p21
; run: %fnmsub_f64(-0x1.3b88ea148dd4ap14, 0x2.680916809121p6, -0x9.887218721837p1) == 0x1.7ba5badfd4333p21



function %fnmadd_f32(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fneg v0
v4 = fneg v2
v5 = fma v3, v1, v4
return v5
}
; run: %fnmadd_f32(0x83.0, 0x2.68091p6, 0x9.88721p1) == -0x1.3b88e6p14
; run: %fnmadd_f32(-0x83.0, 0x2.68091p6, 0x9.88721p1) == 0x1.3af060p14
; run: %fnmadd_f32(-0x83.0, 0x2.68091p6, -0x9.88721p1) == 0x1.3b88e6p14

function %fnmadd_f64(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fneg v0
v4 = fneg v2
v5 = fma v3, v1, v4
return v5
}
; run: %fnmadd_f64(0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == -0x1.7ba6ebee17417p21
; run: %fnmadd_f64(-0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == 0x1.7ba5badfd4333p21
; run: %fnmadd_f64(-0x1.3b88ea148dd4ap14, 0x2.680916809121p6, -0x9.887218721837p1) == 0x1.7ba6ebee17417p21
Loading