IR: Add llvm.frexp intrinsic

Add an intrinsic which returns the two pieces as multiple return values. Alternatively could introduce a pair of intrinsics to separately return the fractional and exponent parts. AMDGPU has native instructions to return the two halves, but could use some generic legalization and optimization handling. For example, we should be able to handle legalization of f16 on older targets, and for bf16. Additionally antique targets need a hardware workaround which would be better handled in the backend rather than in library code where it is now.
Xilinx · Jun 28, 2023 · 003b58f · 003b58f
1 parent ee47699
commit 003b58f
Show file tree

Hide file tree

Showing 31 changed files with 3,184 additions and 22 deletions.
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
@@ -964,6 +964,7 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
                                              inputs. Backend will optimize out denormal scaling if
                                              marked with the :ref:`afn <fastmath_afn>` flag.
 
+  :ref:`llvm.frexp <int_frexp>`              Implemented for half, float and double.
 
   =========================================  ==========================================================
 

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
@@ -14757,6 +14757,62 @@ value is returned. If the result underflows a zero with the same sign
 is returned. If the result overflows, the result is an infinity with
 the same sign.
 
+.. _int_frexp:
+
+'``llvm.frexp.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.frexp`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare { float, i32 }     @llvm.frexp.f32.i32(float %Val)
+      declare { double, i32 }    @llvm.frexp.f64.i32(double %Val)
+      declare { x86_fp80, i32 }  @llvm.frexp.f80.i32(x86_fp80 %Val)
+      declare { fp128, i32 }     @llvm.frexp.f128.i32(fp128 %Val)
+      declare { ppc_fp128, i32 } @llvm.frexp.ppcf128.i32(ppc_fp128 %Val)
+      declare { <2 x float>, <2 x i32> }  @llvm.frexp.v2f32.v2i32(<2 x float> %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.frexp.*``' intrinsics perform the frexp function.
+
+Arguments:
+""""""""""
+
+The argument is a :ref:`floating-point <t_floating>` or
+:ref:`vector <t_vector>` of floating-point values. Returns two values
+in a struct. The first struct field matches the argument type, and the
+second field is an integer or a vector of integer values with the same
+number of elements as the argument.
+
+Semantics:
+""""""""""
+
+This intrinsic splits a floating point value into a normalized
+fractional component and integral exponent.
+
+For a non-zero argument, returns the argument multiplied by some power
+of two such that the absolute value of the returned value is in the
+range [0.5, 1.0), with the same sign as the argument. The second
+result is an integer such that the first result raised to the power of
+the second result is the input argument.
+
+If the argument is a zero, returns a zero with the same sign and a 0
+exponent.
+
+If the argument is a NaN, a NaN is returned and the returned exponent
+is unspecified.
+
+If the argument is an infinity, returns an infinity with the same sign
+and an unspecified exponent.
+
 '``llvm.log.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
@@ -63,6 +63,8 @@ Changes to the LLVM IR
 
 * Introduced new ``llvm.ldexp`` and ``llvm.experimental.constrained.ldexp`` intrinsics.
 
+* Introduced new ``llvm.frexp`` intrinsic.
+
 * The constant expression variants of the following instructions have been
   removed:
 

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1832,6 +1832,13 @@ class MachineIRBuilder {
     return buildInstr(TargetOpcode::G_FLDEXP, {Dst}, {Src0, Src1}, Flags);
   }
 
+  /// Build and insert \p Fract, \p Exp = G_FFREXP \p Src
+  MachineInstrBuilder
+  buildFFrexp(const DstOp &Fract, const DstOp &Exp, const SrcOp &Src,
+              std::optional<unsigned> Flags = std::nullopt) {
+    return buildInstr(TargetOpcode::G_FFREXP, {Fract, Exp}, {Src}, Flags);
+  }
+
   /// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1
   MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0,
                                      const SrcOp &Src1) {

diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -931,6 +931,12 @@ enum NodeType {
   FPOWI,
   /// FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
   FLDEXP,
+
+  /// FFREXP - frexp, extract fractional and exponent component of a
+  /// floating-point value. Returns the two components as separate return
+  /// values.
+  FFREXP,
+
   FLOG,
   FLOG2,
   FLOG10,

diff --git a/llvm/include/llvm/CodeGen/RuntimeLibcalls.h b/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -74,6 +74,10 @@ namespace RTLIB {
   /// UNKNOWN_LIBCALL if there is none.
   Libcall getLDEXP(EVT RetVT);
 
+  /// getFREXP - Return the FREXP_* value for the given types, or
+  /// UNKNOWN_LIBCALL if there is none.
+  Libcall getFREXP(EVT RetVT);
+
   /// Return the SYNC_FETCH_AND_* value for the given opcode and type, or
   /// UNKNOWN_LIBCALL if there is none.
   Libcall getSYNC(unsigned Opc, MVT VT);

diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
@@ -1046,6 +1046,9 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
   // TODO: int operand should be constrained to same number of elements as the result.
   def int_ldexp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
                                                              llvm_anyint_ty]>;
+
+  // TODO: Should constrain all element counts to match
+  def int_frexp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty, llvm_anyint_ty], [LLVMMatchType<0>]>;
 }
 
 def int_minnum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],

diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -284,6 +284,11 @@ HANDLE_LIBCALL(LDEXP_F64, "ldexp")
 HANDLE_LIBCALL(LDEXP_F80, "ldexpl")
 HANDLE_LIBCALL(LDEXP_F128, "ldexpl")
 HANDLE_LIBCALL(LDEXP_PPCF128, "ldexpl")
+HANDLE_LIBCALL(FREXP_F32, "frexpf")
+HANDLE_LIBCALL(FREXP_F64, "frexp")
+HANDLE_LIBCALL(FREXP_F80, "frexpl")
+HANDLE_LIBCALL(FREXP_F128, "frexpl")
+HANDLE_LIBCALL(FREXP_PPCF128, "frexpl")
 
 // Floating point environment
 HANDLE_LIBCALL(FEGETENV, "fegetenv")

diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -619,6 +619,9 @@ HANDLE_TARGET_OPCODE(G_FLOG10)
 /// Floating point x * 2^n
 HANDLE_TARGET_OPCODE(G_FLDEXP)
 
+/// Floating point extract fraction and exponent.
+HANDLE_TARGET_OPCODE(G_FFREXP)
+
 /// Generic FP negation.
 HANDLE_TARGET_OPCODE(G_FNEG)
 

diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -930,6 +930,13 @@ def G_FLDEXP : GenericInstruction {
   let hasSideEffects = false;
 }
 
+// Floating point frexp
+def G_FFREXP : GenericInstruction {
+  let OutOperandList = (outs type0:$dst0, type1:$dst1);
+  let InOperandList = (ins type0:$src0);
+  let hasSideEffects = false;
+}
+
 // Floating point ceiling of a value.
 def G_FCEIL : GenericInstruction {
   let OutOperandList = (outs type0:$dst);

diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2175,6 +2175,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
                             getOrCreateVReg(*CI.getArgOperand(0)),
                             MachineInstr::copyFlagsFromInstruction(CI));
     return true;
+  case Intrinsic::frexp: {
+    ArrayRef<Register> VRegs = getOrCreateVRegs(CI);
+    MIRBuilder.buildFFrexp(VRegs[0], VRegs[1],
+                           getOrCreateVReg(*CI.getArgOperand(0)),
+                           MachineInstr::copyFlagsFromInstruction(CI));
+    return true;
+  }
   case Intrinsic::memcpy_inline:
     return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE);
   case Intrinsic::memcpy:

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2584,6 +2584,19 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
 
     return UnableToLegalize;
   }
+  case TargetOpcode::G_FFREXP: {
+    Observer.changingInstr(MI);
+
+    if (TypeIdx == 0) {
+      widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
+      widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+    } else {
+      widenScalarDst(MI, WideTy, 1);
+    }
+
+    Observer.changedInstr(MI);
+    return Legalized;
+  }
   case TargetOpcode::G_INTTOPTR:
     if (TypeIdx != 1)
       return UnableToLegalize;
@@ -4235,6 +4248,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case G_STRICT_FMUL:
   case G_STRICT_FMA:
   case G_STRICT_FLDEXP:
+  case G_FFREXP:
     return fewerElementsVectorMultiEltType(GMI, NumElts);
   case G_ICMP:
   case G_FCMP: