8318227: RISC-V: C2 ConvHF2F

Reviewed-by: fyang
jianglizhou · Jan 15, 2024 · b363472 · b363472
1 parent edc0ebb
commit b363472
Show file tree

Hide file tree

Showing 12 changed files with 84 additions and 0 deletions.
diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
@@ -815,6 +815,8 @@ enum operand_size { int8, int16, int32, uint32, int64 };
 
   INSN(fsqrt_s,  0b1010011, 0b00000, 0b0101100);
   INSN(fsqrt_d,  0b1010011, 0b00000, 0b0101101);
+  INSN(fcvt_s_h, 0b1010011, 0b00010, 0b0100000);
+  INSN(fcvt_h_s, 0b1010011, 0b00000, 0b0100010);
   INSN(fcvt_s_d, 0b1010011, 0b00001, 0b0100000);
   INSN(fcvt_d_s, 0b1010011, 0b00000, 0b0100001);
 #undef INSN
@@ -1071,6 +1073,7 @@ enum operand_size { int8, int16, int32, uint32, int64 };
     emit(insn);                                      \
   }
 
+  INSN(fmv_h_x,  0b1010011, 0b000, 0b00000, 0b1111010);
   INSN(fmv_w_x,  0b1010011, 0b000, 0b00000, 0b1111000);
   INSN(fmv_d_x,  0b1010011, 0b000, 0b00000, 0b1111001);
 
@@ -1108,8 +1111,10 @@ enum fclass_mask {
     emit(insn);                                           \
   }
 
+  INSN(fclass_h, 0b1010011, 0b001, 0b00000, 0b1110010);
   INSN(fclass_s, 0b1010011, 0b001, 0b00000, 0b1110000);
   INSN(fclass_d, 0b1010011, 0b001, 0b00000, 0b1110001);
+  INSN(fmv_x_h,  0b1010011, 0b000, 0b00000, 0b1110010);
   INSN(fmv_x_w,  0b1010011, 0b000, 0b00000, 0b1110000);
   INSN(fmv_x_d,  0b1010011, 0b000, 0b00000, 0b1110001);
 

diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
@@ -1783,6 +1783,52 @@ void C2_MacroAssembler::signum_fp(FloatRegister dst, FloatRegister one, bool is_
   bind(done);
 }
 
+static void float16_to_float_slow_path(C2_MacroAssembler& masm, C2GeneralStub<FloatRegister, Register, Register>& stub) {
+#define __ masm.
+  FloatRegister dst = stub.data<0>();
+  Register src = stub.data<1>();
+  Register tmp = stub.data<2>();
+  __ bind(stub.entry());
+
+  // following instructions mainly focus on NaN, as riscv does not handle
+  // NaN well with fcvt, but the code also works for Inf at the same time.
+
+  // construct a NaN in 32 bits from the NaN in 16 bits,
+  // we need the payloads of non-canonical NaNs to be preserved.
+  __ mv(tmp, 0x7f800000);
+  // sign-bit was already set via sign-extension if necessary.
+  __ slli(t0, src, 13);
+  __ orr(tmp, t0, tmp);
+  __ fmv_w_x(dst, tmp);
+
+  __ j(stub.continuation());
+#undef __
+}
+
+// j.l.Float.float16ToFloat
+void C2_MacroAssembler::float16_to_float(FloatRegister dst, Register src, Register tmp) {
+  auto stub = C2CodeStub::make<FloatRegister, Register, Register>(dst, src, tmp, 20, float16_to_float_slow_path);
+
+  // in riscv, NaN needs a special process as fcvt does not work in that case.
+  // in riscv, Inf does not need a special process as fcvt can handle it correctly.
+  // but we consider to get the slow path to process NaN and Inf at the same time,
+  // as both of them are rare cases, and if we try to get the slow path to handle
+  // only NaN case it would sacrifise the performance for normal cases,
+  // i.e. non-NaN and non-Inf cases.
+
+  // check whether it's a NaN or +/- Inf.
+  mv(t0, 0x7c00);
+  andr(tmp, src, t0);
+  // jump to stub processing NaN and Inf cases.
+  beq(t0, tmp, stub->entry());
+
+  // non-NaN or non-Inf cases, just use built-in instructions.
+  fmv_h_x(dst, src);
+  fcvt_s_h(dst, dst);
+
+  bind(stub->continuation());
+}
+
 void C2_MacroAssembler::signum_fp_v(VectorRegister dst, VectorRegister one, BasicType bt, int vlen) {
   vsetvli_helper(bt, vlen);
 

diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
@@ -172,8 +172,11 @@
 
   void signum_fp(FloatRegister dst, FloatRegister one, bool is_double);
 
+  void float16_to_float(FloatRegister dst, Register src, Register tmp);
+
   void signum_fp_v(VectorRegister dst, VectorRegister one, BasicType bt, int vlen);
 
+
   // intrinsic methods implemented by rvv instructions
 
   // compress bits, i.e. j.l.Integer/Long::compress.

diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
@@ -105,6 +105,7 @@ define_pd_global(intx, InlineSmallCode,          1000);
   product(bool, UseZba, false, "Use Zba instructions")                           \
   product(bool, UseZbb, false, "Use Zbb instructions")                           \
   product(bool, UseZbs, false, "Use Zbs instructions")                           \
+  product(bool, UseZfh, false, "Use Zfh instructions")                           \
   product(bool, UseZacas, false, EXPERIMENTAL, "Use Zacas instructions")         \
   product(bool, UseZic64b, false, EXPERIMENTAL, "Use Zic64b instructions")       \
   product(bool, UseZicbom, false, EXPERIMENTAL, "Use Zicbom instructions")       \

diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
@@ -1932,6 +1932,9 @@ bool Matcher::match_rule_supported(int opcode) {
     case Op_FmaVF:
     case Op_FmaVD:
       return UseFMA;
+
+    case Op_ConvHF2F:
+      return UseZfh;
   }
 
   return true; // Per default match rules are supported.
@@ -8275,6 +8278,20 @@ instruct convD2F_reg(fRegF dst, fRegD src) %{
   ins_pipe(fp_d2f);
 %}
 
+// single <-> half precision
+
+instruct convHF2F_reg_reg(fRegF dst, iRegINoSp src, iRegINoSp tmp) %{
+  match(Set dst (ConvHF2F src));
+  effect(TEMP tmp);
+  format %{ "fmv.h.x $dst, $src\t# move source from $src to $dst\n\t"
+            "fcvt.s.h $dst, $dst\t# convert half to single precision"
+  %}
+  ins_encode %{
+    __ float16_to_float($dst$$FloatRegister, $src$$Register, $tmp$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 // float <-> int
 
 instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{

diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
@@ -81,6 +81,9 @@ void VM_Version::initialize() {
     if (FLAG_IS_DEFAULT(UseZbs)) {
       FLAG_SET_DEFAULT(UseZbs, true);
     }
+    if (FLAG_IS_DEFAULT(UseZfh)) {
+      FLAG_SET_DEFAULT(UseZfh, true);
+    }
     if (FLAG_IS_DEFAULT(UseZic64b)) {
       FLAG_SET_DEFAULT(UseZic64b, true);
     }

diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
@@ -105,6 +105,8 @@ class VM_Version : public Abstract_VM_Version {
   // Zbc Carry-less multiplication
   // Zbs Single-bit instructions
   //
+  // Zfh Half-Precision Floating-Point instructions
+  //
   // Zicsr Control and Status Register (CSR) Instructions
   // Zifencei Instruction-Fetch Fence
   // Zic64b Cache blocks must be 64 bytes in size, naturally aligned in the address space.
@@ -143,6 +145,7 @@ class VM_Version : public Abstract_VM_Version {
   decl(ext_Zbc         , "Zbc"         , RV_NO_FLAG_BIT, true , NO_UPDATE_DEFAULT)              \
   decl(ext_Zbs         , "Zbs"         , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZbs))         \
   decl(ext_Zcb         , "Zcb"         , RV_NO_FLAG_BIT, true , NO_UPDATE_DEFAULT)              \
+  decl(ext_Zfh         , "Zfh"         , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZfh))         \
   decl(ext_Zicsr       , "Zicsr"       , RV_NO_FLAG_BIT, true , NO_UPDATE_DEFAULT)              \
   decl(ext_Zifencei    , "Zifencei"    , RV_NO_FLAG_BIT, true , NO_UPDATE_DEFAULT)              \
   decl(ext_Zic64b      , "Zic64b"      , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZic64b))      \

diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
@@ -242,6 +242,8 @@ void VM_Version::rivos_features() {
 
   ext_Zcb.enable_feature();
 
+  ext_Zfh.enable_feature();
+
   ext_Zicsr.enable_feature();
   ext_Zifencei.enable_feature();
   ext_Zic64b.enable_feature();

diff --git a/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16Conversion.java b/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16Conversion.java
@@ -26,6 +26,7 @@
  * @bug 8289551 8302976
  * @summary Verify conversion between float and the binary16 format
  * @requires (vm.cpu.features ~= ".*avx512vl.*" | vm.cpu.features ~= ".*f16c.*") | os.arch=="aarch64"
+ *           | (os.arch == "riscv64" & vm.cpu.features ~= ".*zfh,.*")
  * @requires vm.compiler1.enabled & vm.compiler2.enabled
  * @requires vm.compMode != "Xcomp"
  * @comment default run

diff --git a/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16ConversionNaN.java b/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16ConversionNaN.java
@@ -26,6 +26,7 @@
  * @bug 8289551 8302976
  * @summary Verify NaN sign and significand bits are preserved across conversions
  * @requires (vm.cpu.features ~= ".*avx512vl.*" | vm.cpu.features ~= ".*f16c.*") | os.arch=="aarch64"
+ *           | (os.arch == "riscv64" & vm.cpu.features ~= ".*zfh,.*")
  * @requires vm.compiler1.enabled & vm.compiler2.enabled
  * @requires vm.compMode != "Xcomp"
  * @library /test/lib /

diff --git a/test/hotspot/jtreg/compiler/intrinsics/float16/TestAllFloat16ToFloat.java b/test/hotspot/jtreg/compiler/intrinsics/float16/TestAllFloat16ToFloat.java
@@ -26,6 +26,7 @@
  * @bug 8302976
  * @summary Verify conversion between float and the binary16 format
  * @requires (vm.cpu.features ~= ".*avx512vl.*" | vm.cpu.features ~= ".*f16c.*") | os.arch == "aarch64"
+ *           | (os.arch == "riscv64" & vm.cpu.features ~= ".*zfh,.*")
  * @requires vm.compiler1.enabled & vm.compiler2.enabled
  * @requires vm.compMode != "Xcomp"
  * @comment default run:

diff --git a/test/hotspot/jtreg/compiler/intrinsics/float16/TestConstFloat16ToFloat.java b/test/hotspot/jtreg/compiler/intrinsics/float16/TestConstFloat16ToFloat.java
@@ -26,6 +26,7 @@
  * @bug 8302976
  * @summary Verify conversion cons between float and the binary16 format
  * @requires (vm.cpu.features ~= ".*avx512vl.*" | vm.cpu.features ~= ".*f16c.*") | os.arch=="aarch64"
+ *           | (os.arch == "riscv64" & vm.cpu.features ~= ".*zfh,.*")
  * @requires vm.compiler1.enabled & vm.compiler2.enabled
  * @requires vm.compMode != "Xcomp"
  * @comment default run: