diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp index f44840d9f8cc7..4a01a9c2ae455 100644 --- a/src/hotspot/cpu/riscv/assembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp @@ -815,6 +815,8 @@ enum operand_size { int8, int16, int32, uint32, int64 }; INSN(fsqrt_s, 0b1010011, 0b00000, 0b0101100); INSN(fsqrt_d, 0b1010011, 0b00000, 0b0101101); + INSN(fcvt_s_h, 0b1010011, 0b00010, 0b0100000); + INSN(fcvt_h_s, 0b1010011, 0b00000, 0b0100010); INSN(fcvt_s_d, 0b1010011, 0b00001, 0b0100000); INSN(fcvt_d_s, 0b1010011, 0b00000, 0b0100001); #undef INSN @@ -1071,6 +1073,7 @@ enum operand_size { int8, int16, int32, uint32, int64 }; emit(insn); \ } + INSN(fmv_h_x, 0b1010011, 0b000, 0b00000, 0b1111010); INSN(fmv_w_x, 0b1010011, 0b000, 0b00000, 0b1111000); INSN(fmv_d_x, 0b1010011, 0b000, 0b00000, 0b1111001); @@ -1108,8 +1111,10 @@ enum fclass_mask { emit(insn); \ } + INSN(fclass_h, 0b1010011, 0b001, 0b00000, 0b1110010); INSN(fclass_s, 0b1010011, 0b001, 0b00000, 0b1110000); INSN(fclass_d, 0b1010011, 0b001, 0b00000, 0b1110001); + INSN(fmv_x_h, 0b1010011, 0b000, 0b00000, 0b1110010); INSN(fmv_x_w, 0b1010011, 0b000, 0b00000, 0b1110000); INSN(fmv_x_d, 0b1010011, 0b000, 0b00000, 0b1110001); diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index 711eb2100912b..0617c37687ffa 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -1783,6 +1783,52 @@ void C2_MacroAssembler::signum_fp(FloatRegister dst, FloatRegister one, bool is_ bind(done); } +static void float16_to_float_slow_path(C2_MacroAssembler& masm, C2GeneralStub& stub) { +#define __ masm. + FloatRegister dst = stub.data<0>(); + Register src = stub.data<1>(); + Register tmp = stub.data<2>(); + __ bind(stub.entry()); + + // following instructions mainly focus on NaN, as riscv does not handle + // NaN well with fcvt, but the code also works for Inf at the same time. + + // construct a NaN in 32 bits from the NaN in 16 bits, + // we need the payloads of non-canonical NaNs to be preserved. + __ mv(tmp, 0x7f800000); + // sign-bit was already set via sign-extension if necessary. + __ slli(t0, src, 13); + __ orr(tmp, t0, tmp); + __ fmv_w_x(dst, tmp); + + __ j(stub.continuation()); +#undef __ +} + +// j.l.Float.float16ToFloat +void C2_MacroAssembler::float16_to_float(FloatRegister dst, Register src, Register tmp) { + auto stub = C2CodeStub::make(dst, src, tmp, 20, float16_to_float_slow_path); + + // in riscv, NaN needs a special process as fcvt does not work in that case. + // in riscv, Inf does not need a special process as fcvt can handle it correctly. + // but we consider to get the slow path to process NaN and Inf at the same time, + // as both of them are rare cases, and if we try to get the slow path to handle + // only NaN case it would sacrifise the performance for normal cases, + // i.e. non-NaN and non-Inf cases. + + // check whether it's a NaN or +/- Inf. + mv(t0, 0x7c00); + andr(tmp, src, t0); + // jump to stub processing NaN and Inf cases. + beq(t0, tmp, stub->entry()); + + // non-NaN or non-Inf cases, just use built-in instructions. + fmv_h_x(dst, src); + fcvt_s_h(dst, dst); + + bind(stub->continuation()); +} + void C2_MacroAssembler::signum_fp_v(VectorRegister dst, VectorRegister one, BasicType bt, int vlen) { vsetvli_helper(bt, vlen); diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp index 4940ce5fe9e94..7309c59110a0b 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp @@ -172,8 +172,11 @@ void signum_fp(FloatRegister dst, FloatRegister one, bool is_double); + void float16_to_float(FloatRegister dst, Register src, Register tmp); + void signum_fp_v(VectorRegister dst, VectorRegister one, BasicType bt, int vlen); + // intrinsic methods implemented by rvv instructions // compress bits, i.e. j.l.Integer/Long::compress. diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp index aa95cebec14cd..01cd2296d9e41 100644 --- a/src/hotspot/cpu/riscv/globals_riscv.hpp +++ b/src/hotspot/cpu/riscv/globals_riscv.hpp @@ -105,6 +105,7 @@ define_pd_global(intx, InlineSmallCode, 1000); product(bool, UseZba, false, "Use Zba instructions") \ product(bool, UseZbb, false, "Use Zbb instructions") \ product(bool, UseZbs, false, "Use Zbs instructions") \ + product(bool, UseZfh, false, "Use Zfh instructions") \ product(bool, UseZacas, false, EXPERIMENTAL, "Use Zacas instructions") \ product(bool, UseZic64b, false, EXPERIMENTAL, "Use Zic64b instructions") \ product(bool, UseZicbom, false, EXPERIMENTAL, "Use Zicbom instructions") \ diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 7e1291f49d74c..550e7947cc5e6 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -1932,6 +1932,9 @@ bool Matcher::match_rule_supported(int opcode) { case Op_FmaVF: case Op_FmaVD: return UseFMA; + + case Op_ConvHF2F: + return UseZfh; } return true; // Per default match rules are supported. @@ -8275,6 +8278,20 @@ instruct convD2F_reg(fRegF dst, fRegD src) %{ ins_pipe(fp_d2f); %} +// single <-> half precision + +instruct convHF2F_reg_reg(fRegF dst, iRegINoSp src, iRegINoSp tmp) %{ + match(Set dst (ConvHF2F src)); + effect(TEMP tmp); + format %{ "fmv.h.x $dst, $src\t# move source from $src to $dst\n\t" + "fcvt.s.h $dst, $dst\t# convert half to single precision" + %} + ins_encode %{ + __ float16_to_float($dst$$FloatRegister, $src$$Register, $tmp$$Register); + %} + ins_pipe(pipe_slow); +%} + // float <-> int instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{ diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp index 9a72b8d75a136..390ba51ee4f34 100644 --- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp +++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp @@ -81,6 +81,9 @@ void VM_Version::initialize() { if (FLAG_IS_DEFAULT(UseZbs)) { FLAG_SET_DEFAULT(UseZbs, true); } + if (FLAG_IS_DEFAULT(UseZfh)) { + FLAG_SET_DEFAULT(UseZfh, true); + } if (FLAG_IS_DEFAULT(UseZic64b)) { FLAG_SET_DEFAULT(UseZic64b, true); } diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp index 1ea853284ff15..de85fb166f834 100644 --- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp +++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp @@ -105,6 +105,8 @@ class VM_Version : public Abstract_VM_Version { // Zbc Carry-less multiplication // Zbs Single-bit instructions // + // Zfh Half-Precision Floating-Point instructions + // // Zicsr Control and Status Register (CSR) Instructions // Zifencei Instruction-Fetch Fence // Zic64b Cache blocks must be 64 bytes in size, naturally aligned in the address space. @@ -143,6 +145,7 @@ class VM_Version : public Abstract_VM_Version { decl(ext_Zbc , "Zbc" , RV_NO_FLAG_BIT, true , NO_UPDATE_DEFAULT) \ decl(ext_Zbs , "Zbs" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZbs)) \ decl(ext_Zcb , "Zcb" , RV_NO_FLAG_BIT, true , NO_UPDATE_DEFAULT) \ + decl(ext_Zfh , "Zfh" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZfh)) \ decl(ext_Zicsr , "Zicsr" , RV_NO_FLAG_BIT, true , NO_UPDATE_DEFAULT) \ decl(ext_Zifencei , "Zifencei" , RV_NO_FLAG_BIT, true , NO_UPDATE_DEFAULT) \ decl(ext_Zic64b , "Zic64b" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZic64b)) \ diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp index 354dbd70bb4e1..5230a06e43f63 100644 --- a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp +++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp @@ -242,6 +242,8 @@ void VM_Version::rivos_features() { ext_Zcb.enable_feature(); + ext_Zfh.enable_feature(); + ext_Zicsr.enable_feature(); ext_Zifencei.enable_feature(); ext_Zic64b.enable_feature(); diff --git a/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16Conversion.java b/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16Conversion.java index 0541121c127c4..ab4d8ff8bb703 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16Conversion.java +++ b/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16Conversion.java @@ -26,6 +26,7 @@ * @bug 8289551 8302976 * @summary Verify conversion between float and the binary16 format * @requires (vm.cpu.features ~= ".*avx512vl.*" | vm.cpu.features ~= ".*f16c.*") | os.arch=="aarch64" + * | (os.arch == "riscv64" & vm.cpu.features ~= ".*zfh,.*") * @requires vm.compiler1.enabled & vm.compiler2.enabled * @requires vm.compMode != "Xcomp" * @comment default run diff --git a/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16ConversionNaN.java b/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16ConversionNaN.java index 38060dfb5043c..f549c1ae670a5 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16ConversionNaN.java +++ b/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16ConversionNaN.java @@ -26,6 +26,7 @@ * @bug 8289551 8302976 * @summary Verify NaN sign and significand bits are preserved across conversions * @requires (vm.cpu.features ~= ".*avx512vl.*" | vm.cpu.features ~= ".*f16c.*") | os.arch=="aarch64" + * | (os.arch == "riscv64" & vm.cpu.features ~= ".*zfh,.*") * @requires vm.compiler1.enabled & vm.compiler2.enabled * @requires vm.compMode != "Xcomp" * @library /test/lib / diff --git a/test/hotspot/jtreg/compiler/intrinsics/float16/TestAllFloat16ToFloat.java b/test/hotspot/jtreg/compiler/intrinsics/float16/TestAllFloat16ToFloat.java index 492901f0046f0..937e17f8fca8b 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/float16/TestAllFloat16ToFloat.java +++ b/test/hotspot/jtreg/compiler/intrinsics/float16/TestAllFloat16ToFloat.java @@ -26,6 +26,7 @@ * @bug 8302976 * @summary Verify conversion between float and the binary16 format * @requires (vm.cpu.features ~= ".*avx512vl.*" | vm.cpu.features ~= ".*f16c.*") | os.arch == "aarch64" + * | (os.arch == "riscv64" & vm.cpu.features ~= ".*zfh,.*") * @requires vm.compiler1.enabled & vm.compiler2.enabled * @requires vm.compMode != "Xcomp" * @comment default run: diff --git a/test/hotspot/jtreg/compiler/intrinsics/float16/TestConstFloat16ToFloat.java b/test/hotspot/jtreg/compiler/intrinsics/float16/TestConstFloat16ToFloat.java index b4ba578c9f260..f58cf618c1c45 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/float16/TestConstFloat16ToFloat.java +++ b/test/hotspot/jtreg/compiler/intrinsics/float16/TestConstFloat16ToFloat.java @@ -26,6 +26,7 @@ * @bug 8302976 * @summary Verify conversion cons between float and the binary16 format * @requires (vm.cpu.features ~= ".*avx512vl.*" | vm.cpu.features ~= ".*f16c.*") | os.arch=="aarch64" + * | (os.arch == "riscv64" & vm.cpu.features ~= ".*zfh,.*") * @requires vm.compiler1.enabled & vm.compiler2.enabled * @requires vm.compMode != "Xcomp" * @comment default run: