Skip to content

Commit

Permalink
Implement support for option 'fexcess-precision'.
Browse files Browse the repository at this point in the history
Differential revision: https://reviews.llvm.org/D136176
  • Loading branch information
zahiraam committed Jan 5, 2023
1 parent abcdc26 commit 85d049a
Show file tree
Hide file tree
Showing 15 changed files with 542 additions and 28 deletions.
41 changes: 41 additions & 0 deletions clang/docs/UsersManual.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1746,6 +1746,47 @@ floating point semantic models: precise (the default), strict, and fast.
has no effect because the optimizer is prohibited from making unsafe
transformations.

.. option:: -fexcess-precision:

The C and C++ standards allow floating-point expressions to be computed as if
intermediate results had more precision (and/or a wider range) than the type
of the expression strictly allows. This is called excess precision
arithmetic.
Excess precision arithmetic can improve the accuracy of results (although not
always), and it can make computation significantly faster if the target lacks
direct hardware support for arithmetic in a particular type. However, it can
also undermine strict floating-point reproducibility.

Under the standards, assignments and explicit casts force the operand to be
converted to its formal type, discarding any excess precision. Because data
can only flow between statements via an assignment, this means that the use
of excess precision arithmetic is a reliable local property of a single
statement, and results do not change based on optimization. However, when
excess precision arithmetic is in use, Clang does not guarantee strict
reproducibility, and future compiler releases may recognize more
opportunities to use excess precision arithmetic, e.g. with floating-point
builtins.

Clang does not use excess precision arithmetic for most types or on most
targets. For example, even on pre-SSE X86 targets where ``float`` and
``double`` computations must be performed in the 80-bit X87 format, Clang
rounds all intermediate results correctly for their type. Clang currently
uses excess precision arithmetic by default only for the following types and
targets:

* ``_Float16`` on X86 targets without ``AVX512-FP16``.

The ``-fexcess-precision=<value>`` option can be used to control the use of
excess precision arithmetic. Valid values are:

* ``standard`` - The default. Allow the use of excess precision arithmetic
under the constraints of the C and C++ standards. Has no effect except on
the types and targets listed above.
* ``fast`` - Accepted for GCC compatibility, but currently treated as an
alias for ``standard``.
* ``16`` - Forces ``_Float16`` operations to be emitted without using excess
precision arithmetic.

.. _crtfastmath.o:

A note about ``crtfastmath.o``
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/AST/Type.h
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,8 @@ class QualType {
unsigned getLocalFastQualifiers() const { return Value.getInt(); }
void setLocalFastQualifiers(unsigned Quals) { Value.setInt(Quals); }

bool UseExcessPrecision(const ASTContext &Ctx);

/// Retrieves a pointer to the underlying (unqualified) type.
///
/// This function requires that the type not be NULL. If the type might be
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Basic/FPOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@ OPTION(NoSignedZero, bool, 1, NoHonorInfs)
OPTION(AllowReciprocal, bool, 1, NoSignedZero)
OPTION(AllowApproxFunc, bool, 1, AllowReciprocal)
OPTION(FPEvalMethod, LangOptions::FPEvalMethodKind, 2, AllowApproxFunc)
OPTION(Float16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, FPEvalMethod)
#undef OPTION
1 change: 1 addition & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ COMPATIBLE_LANGOPT(ExpStrictFP, 1, false, "Enable experimental strict floating p
BENIGN_LANGOPT(RoundingMath, 1, false, "Do not assume default floating-point rounding behavior")
BENIGN_ENUM_LANGOPT(FPExceptionMode, FPExceptionModeKind, 2, FPE_Default, "FP Exception Behavior Mode type")
BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 2, FEM_UnsetOnCommandLine, "FP type used for floating point arithmetic")
ENUM_LANGOPT(Float16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for floating point arithmetic")
LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment")
LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility")
LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting")
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/LangOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,8 @@ class LangOptions : public LangOptionsBase {
FEM_UnsetOnCommandLine = 3
};

enum ExcessPrecisionKind { FPP_Standard, FPP_Fast, FPP_None };

/// Possible exception handling behavior.
enum class ExceptionHandlingKind { None, SjLj, WinEH, DwarfCFI, Wasm };

Expand Down
2 changes: 0 additions & 2 deletions clang/include/clang/Basic/TargetInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -933,8 +933,6 @@ class TargetInfo : public virtual TransferrableTargetInfo,
return true;
}

virtual bool shouldEmitFloat16WithExcessPrecision() const { return false; }

/// Specify if mangling based on address space map should be used or
/// not for language specific address spaces
bool useAddressSpaceMapMangling() const {
Expand Down
18 changes: 16 additions & 2 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1576,8 +1576,22 @@ def exception_model_EQ : Joined<["-"], "exception-model=">,
def fignore_exceptions : Flag<["-"], "fignore-exceptions">, Group<f_Group>, Flags<[CC1Option]>,
HelpText<"Enable support for ignoring exception handling constructs">,
MarshallingInfoFlag<LangOpts<"IgnoreExceptions">>;
def fexcess_precision_EQ : Joined<["-"], "fexcess-precision=">,
Group<clang_ignored_gcc_optimization_f_Group>;
def fexcess_precision_EQ : Joined<["-"], "fexcess-precision=">, Group<f_Group>,
HelpText<"Allows control over excess precision on targets where native "
"support for the precision types is not available. By default, excess "
"precision is used to calculate intermediate results following the "
"rules specified in ISO C99.">,
Values<"standard,fast,none">, NormalizedValuesScope<"LangOptions">,
NormalizedValues<["FPP_Standard", "FPP_Fast", "FPP_None"]>;
def ffloat16_excess_precision_EQ : Joined<["-"], "ffloat16-excess-precision=">,
Group<f_Group>, Flags<[CC1Option, NoDriverOption]>,
HelpText<"Allows control over excess precision on targets where native "
"support for Float16 precision types is not available. By default, excess "
"precision is used to calculate intermediate results following the "
"rules specified in ISO C99.">,
Values<"standard,fast,none">, NormalizedValuesScope<"LangOptions">,
NormalizedValues<["FPP_Standard", "FPP_Fast", "FPP_None"]>,
MarshallingInfoEnum<LangOpts<"Float16ExcessPrecision">, "FPP_Standard">;
def : Flag<["-"], "fexpensive-optimizations">, Group<clang_ignored_gcc_optimization_f_Group>;
def : Flag<["-"], "fno-expensive-optimizations">, Group<clang_ignored_gcc_optimization_f_Group>;
def fextdirs_EQ : Joined<["-"], "fextdirs=">, Group<f_Group>;
Expand Down
19 changes: 19 additions & 0 deletions clang/lib/AST/Type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1483,6 +1483,25 @@ struct StripObjCKindOfTypeVisitor

} // namespace

bool QualType::UseExcessPrecision(const ASTContext &Ctx) {
const BuiltinType *BT = getTypePtr()->getAs<BuiltinType>();
if (BT) {
switch (BT->getKind()) {
case BuiltinType::Kind::Float16: {
const TargetInfo &TI = Ctx.getTargetInfo();
if (TI.hasFloat16Type() && !TI.hasLegalHalfType() &&
Ctx.getLangOpts().getFloat16ExcessPrecision() !=
Ctx.getLangOpts().ExcessPrecisionKind::FPP_None)
return true;
return false;
}
default:
return false;
}
}
return false;
}

/// Substitute the given type arguments for Objective-C type
/// parameters within the given type, recursively.
QualType QualType::substObjCTypeArgs(ASTContext &ctx,
Expand Down
4 changes: 0 additions & 4 deletions clang/lib/Basic/Targets/X86.h
Original file line number Diff line number Diff line change
Expand Up @@ -302,10 +302,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
return false;
}

bool shouldEmitFloat16WithExcessPrecision() const override {
return HasFloat16 && !hasLegalHalfType();
}

void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const override;

Expand Down
17 changes: 6 additions & 11 deletions clang/lib/CodeGen/CGExprComplex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,18 +275,13 @@ class ComplexExprEmitter
const BinOpInfo &Op);

QualType getPromotionType(QualType Ty) {
if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision()) {
if (Ty->isRealFloatingType()) {
if (Ty->isFloat16Type())
return CGF.getContext().FloatTy;
} else {
assert(Ty->isAnyComplexType() &&
"Expecting to promote a complex type!");
QualType ElementType = Ty->castAs<ComplexType>()->getElementType();
if (ElementType->isFloat16Type())
return CGF.getContext().getComplexType(CGF.getContext().FloatTy);
}
if (auto *CT = Ty->getAs<ComplexType>()) {
QualType ElementType = CT->getElementType();
if (ElementType.UseExcessPrecision(CGF.getContext()))
return CGF.getContext().getComplexType(CGF.getContext().FloatTy);
}
if (Ty.UseExcessPrecision(CGF.getContext()))
return CGF.getContext().FloatTy;
return QualType();
}

Expand Down
14 changes: 6 additions & 8 deletions clang/lib/CodeGen/CGExprScalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -814,15 +814,13 @@ class ScalarExprEmitter
Value *(ScalarExprEmitter::*F)(const BinOpInfo &));

QualType getPromotionType(QualType Ty) {
if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision()) {
if (Ty->isAnyComplexType()) {
QualType ElementType = Ty->castAs<ComplexType>()->getElementType();
if (ElementType->isFloat16Type())
return CGF.getContext().getComplexType(CGF.getContext().FloatTy);
}
if (Ty->isFloat16Type())
return CGF.getContext().FloatTy;
if (auto *CT = Ty->getAs<ComplexType>()) {
QualType ElementType = CT->getElementType();
if (ElementType.UseExcessPrecision(CGF.getContext()))
return CGF.getContext().getComplexType(CGF.getContext().FloatTy);
}
if (Ty.UseExcessPrecision(CGF.getContext()))
return CGF.getContext().FloatTy;
return QualType();
}

Expand Down
26 changes: 26 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2705,6 +2705,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
!JA.isOffloading(Action::OFK_HIP))
FPContract = "on";
bool StrictFPModel = false;
StringRef Float16ExcessPrecision = "";

if (const Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) {
CmdArgs.push_back("-mlimit-float-precision");
Expand Down Expand Up @@ -2901,6 +2902,27 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
break;
}

case options::OPT_fexcess_precision_EQ: {
StringRef Val = A->getValue();
const llvm::Triple::ArchType Arch = TC.getArch();
if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64) {
if (Val.equals("standard") || Val.equals("fast"))
Float16ExcessPrecision = Val;
// To make it GCC compatible, allow the value of "16" which
// means disable excess precision, the same meaning than clang's
// equivalent value "none".
else if (Val.equals("16"))
Float16ExcessPrecision = "none";
else
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getSpelling() << Val;
} else {
if (!(Val.equals("standard") || Val.equals("fast")))
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getSpelling() << Val;
}
break;
}
case options::OPT_ffinite_math_only:
HonorINFs = false;
HonorNaNs = false;
Expand Down Expand Up @@ -3071,6 +3093,10 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
if (!FPEvalMethod.empty())
CmdArgs.push_back(Args.MakeArgString("-ffp-eval-method=" + FPEvalMethod));

if (!Float16ExcessPrecision.empty())
CmdArgs.push_back(Args.MakeArgString("-ffloat16-excess-precision=" +
Float16ExcessPrecision));

ParseMRecip(D, Args, CmdArgs);

// -ffast-math enables the __FAST_MATH__ preprocessor macro, but check for the
Expand Down
Loading

0 comments on commit 85d049a

Please sign in to comment.