diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index d4e5b3565ee06be..7dd8ecb5fcc4d9d 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1746,6 +1746,47 @@ floating point semantic models: precise (the default), strict, and fast. has no effect because the optimizer is prohibited from making unsafe transformations. +.. option:: -fexcess-precision: + + The C and C++ standards allow floating-point expressions to be computed as if + intermediate results had more precision (and/or a wider range) than the type + of the expression strictly allows. This is called excess precision + arithmetic. + Excess precision arithmetic can improve the accuracy of results (although not + always), and it can make computation significantly faster if the target lacks + direct hardware support for arithmetic in a particular type. However, it can + also undermine strict floating-point reproducibility. + + Under the standards, assignments and explicit casts force the operand to be + converted to its formal type, discarding any excess precision. Because data + can only flow between statements via an assignment, this means that the use + of excess precision arithmetic is a reliable local property of a single + statement, and results do not change based on optimization. However, when + excess precision arithmetic is in use, Clang does not guarantee strict + reproducibility, and future compiler releases may recognize more + opportunities to use excess precision arithmetic, e.g. with floating-point + builtins. + + Clang does not use excess precision arithmetic for most types or on most + targets. For example, even on pre-SSE X86 targets where ``float`` and + ``double`` computations must be performed in the 80-bit X87 format, Clang + rounds all intermediate results correctly for their type. Clang currently + uses excess precision arithmetic by default only for the following types and + targets: + + * ``_Float16`` on X86 targets without ``AVX512-FP16``. + + The ``-fexcess-precision=`` option can be used to control the use of + excess precision arithmetic. Valid values are: + + * ``standard`` - The default. Allow the use of excess precision arithmetic + under the constraints of the C and C++ standards. Has no effect except on + the types and targets listed above. + * ``fast`` - Accepted for GCC compatibility, but currently treated as an + alias for ``standard``. + * ``16`` - Forces ``_Float16`` operations to be emitted without using excess + precision arithmetic. + .. _crtfastmath.o: A note about ``crtfastmath.o`` diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 8f7a4836381ae3b..af6dbcf3988aee0 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -763,6 +763,8 @@ class QualType { unsigned getLocalFastQualifiers() const { return Value.getInt(); } void setLocalFastQualifiers(unsigned Quals) { Value.setInt(Quals); } + bool UseExcessPrecision(const ASTContext &Ctx); + /// Retrieves a pointer to the underlying (unqualified) type. /// /// This function requires that the type not be NULL. If the type might be diff --git a/clang/include/clang/Basic/FPOptions.def b/clang/include/clang/Basic/FPOptions.def index 1dfbbb549c87413..0c687e3c3fa0396 100644 --- a/clang/include/clang/Basic/FPOptions.def +++ b/clang/include/clang/Basic/FPOptions.def @@ -25,4 +25,5 @@ OPTION(NoSignedZero, bool, 1, NoHonorInfs) OPTION(AllowReciprocal, bool, 1, NoSignedZero) OPTION(AllowApproxFunc, bool, 1, AllowReciprocal) OPTION(FPEvalMethod, LangOptions::FPEvalMethodKind, 2, AllowApproxFunc) +OPTION(Float16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, FPEvalMethod) #undef OPTION diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 4ccf84b88385e74..d1cbe430643910a 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -317,6 +317,7 @@ COMPATIBLE_LANGOPT(ExpStrictFP, 1, false, "Enable experimental strict floating p BENIGN_LANGOPT(RoundingMath, 1, false, "Do not assume default floating-point rounding behavior") BENIGN_ENUM_LANGOPT(FPExceptionMode, FPExceptionModeKind, 2, FPE_Default, "FP Exception Behavior Mode type") BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 2, FEM_UnsetOnCommandLine, "FP type used for floating point arithmetic") +ENUM_LANGOPT(Float16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for floating point arithmetic") LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment") LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility") LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting") diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 70b123434f6f5eb..41b0426a2d4b8ae 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -295,6 +295,8 @@ class LangOptions : public LangOptionsBase { FEM_UnsetOnCommandLine = 3 }; + enum ExcessPrecisionKind { FPP_Standard, FPP_Fast, FPP_None }; + /// Possible exception handling behavior. enum class ExceptionHandlingKind { None, SjLj, WinEH, DwarfCFI, Wasm }; diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 9e310f6473a67cc..a5aea33d84751bf 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -933,8 +933,6 @@ class TargetInfo : public virtual TransferrableTargetInfo, return true; } - virtual bool shouldEmitFloat16WithExcessPrecision() const { return false; } - /// Specify if mangling based on address space map should be used or /// not for language specific address spaces bool useAddressSpaceMapMangling() const { diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index cf204ae6b441ac2..dd299fa8e883050 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1576,8 +1576,22 @@ def exception_model_EQ : Joined<["-"], "exception-model=">, def fignore_exceptions : Flag<["-"], "fignore-exceptions">, Group, Flags<[CC1Option]>, HelpText<"Enable support for ignoring exception handling constructs">, MarshallingInfoFlag>; -def fexcess_precision_EQ : Joined<["-"], "fexcess-precision=">, - Group; +def fexcess_precision_EQ : Joined<["-"], "fexcess-precision=">, Group, + HelpText<"Allows control over excess precision on targets where native " + "support for the precision types is not available. By default, excess " + "precision is used to calculate intermediate results following the " + "rules specified in ISO C99.">, + Values<"standard,fast,none">, NormalizedValuesScope<"LangOptions">, + NormalizedValues<["FPP_Standard", "FPP_Fast", "FPP_None"]>; +def ffloat16_excess_precision_EQ : Joined<["-"], "ffloat16-excess-precision=">, + Group, Flags<[CC1Option, NoDriverOption]>, + HelpText<"Allows control over excess precision on targets where native " + "support for Float16 precision types is not available. By default, excess " + "precision is used to calculate intermediate results following the " + "rules specified in ISO C99.">, + Values<"standard,fast,none">, NormalizedValuesScope<"LangOptions">, + NormalizedValues<["FPP_Standard", "FPP_Fast", "FPP_None"]>, + MarshallingInfoEnum, "FPP_Standard">; def : Flag<["-"], "fexpensive-optimizations">, Group; def : Flag<["-"], "fno-expensive-optimizations">, Group; def fextdirs_EQ : Joined<["-"], "fextdirs=">, Group; diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 54bfb023237b07e..eb249ae7c6585e9 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -1483,6 +1483,25 @@ struct StripObjCKindOfTypeVisitor } // namespace +bool QualType::UseExcessPrecision(const ASTContext &Ctx) { + const BuiltinType *BT = getTypePtr()->getAs(); + if (BT) { + switch (BT->getKind()) { + case BuiltinType::Kind::Float16: { + const TargetInfo &TI = Ctx.getTargetInfo(); + if (TI.hasFloat16Type() && !TI.hasLegalHalfType() && + Ctx.getLangOpts().getFloat16ExcessPrecision() != + Ctx.getLangOpts().ExcessPrecisionKind::FPP_None) + return true; + return false; + } + default: + return false; + } + } + return false; +} + /// Substitute the given type arguments for Objective-C type /// parameters within the given type, recursively. QualType QualType::substObjCTypeArgs(ASTContext &ctx, diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 0f0ca5d9a5f9507..3a02889f9f6dc34 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -302,10 +302,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { return false; } - bool shouldEmitFloat16WithExcessPrecision() const override { - return HasFloat16 && !hasLegalHalfType(); - } - void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index a31685ddf38a4e6..e65e925a11d4872 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -275,18 +275,13 @@ class ComplexExprEmitter const BinOpInfo &Op); QualType getPromotionType(QualType Ty) { - if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision()) { - if (Ty->isRealFloatingType()) { - if (Ty->isFloat16Type()) - return CGF.getContext().FloatTy; - } else { - assert(Ty->isAnyComplexType() && - "Expecting to promote a complex type!"); - QualType ElementType = Ty->castAs()->getElementType(); - if (ElementType->isFloat16Type()) - return CGF.getContext().getComplexType(CGF.getContext().FloatTy); - } + if (auto *CT = Ty->getAs()) { + QualType ElementType = CT->getElementType(); + if (ElementType.UseExcessPrecision(CGF.getContext())) + return CGF.getContext().getComplexType(CGF.getContext().FloatTy); } + if (Ty.UseExcessPrecision(CGF.getContext())) + return CGF.getContext().FloatTy; return QualType(); } diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 6e67d3da6f2b655..d8de8af0be7c3bb 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -814,15 +814,13 @@ class ScalarExprEmitter Value *(ScalarExprEmitter::*F)(const BinOpInfo &)); QualType getPromotionType(QualType Ty) { - if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision()) { - if (Ty->isAnyComplexType()) { - QualType ElementType = Ty->castAs()->getElementType(); - if (ElementType->isFloat16Type()) - return CGF.getContext().getComplexType(CGF.getContext().FloatTy); - } - if (Ty->isFloat16Type()) - return CGF.getContext().FloatTy; + if (auto *CT = Ty->getAs()) { + QualType ElementType = CT->getElementType(); + if (ElementType.UseExcessPrecision(CGF.getContext())) + return CGF.getContext().getComplexType(CGF.getContext().FloatTy); } + if (Ty.UseExcessPrecision(CGF.getContext())) + return CGF.getContext().FloatTy; return QualType(); } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 3635748ab9aa1a1..f16c799887995de 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2705,6 +2705,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, !JA.isOffloading(Action::OFK_HIP)) FPContract = "on"; bool StrictFPModel = false; + StringRef Float16ExcessPrecision = ""; if (const Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) { CmdArgs.push_back("-mlimit-float-precision"); @@ -2901,6 +2902,27 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, break; } + case options::OPT_fexcess_precision_EQ: { + StringRef Val = A->getValue(); + const llvm::Triple::ArchType Arch = TC.getArch(); + if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64) { + if (Val.equals("standard") || Val.equals("fast")) + Float16ExcessPrecision = Val; + // To make it GCC compatible, allow the value of "16" which + // means disable excess precision, the same meaning than clang's + // equivalent value "none". + else if (Val.equals("16")) + Float16ExcessPrecision = "none"; + else + D.Diag(diag::err_drv_unsupported_option_argument) + << A->getSpelling() << Val; + } else { + if (!(Val.equals("standard") || Val.equals("fast"))) + D.Diag(diag::err_drv_unsupported_option_argument) + << A->getSpelling() << Val; + } + break; + } case options::OPT_ffinite_math_only: HonorINFs = false; HonorNaNs = false; @@ -3071,6 +3093,10 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, if (!FPEvalMethod.empty()) CmdArgs.push_back(Args.MakeArgString("-ffp-eval-method=" + FPEvalMethod)); + if (!Float16ExcessPrecision.empty()) + CmdArgs.push_back(Args.MakeArgString("-ffloat16-excess-precision=" + + Float16ExcessPrecision)); + ParseMRecip(D, Args, CmdArgs); // -ffast-math enables the __FAST_MATH__ preprocessor macro, but check for the diff --git a/clang/test/CodeGen/X86/fexcess-precision.c b/clang/test/CodeGen/X86/fexcess-precision.c new file mode 100644 index 000000000000000..325706830f894da --- /dev/null +++ b/clang/test/CodeGen/X86/fexcess-precision.c @@ -0,0 +1,387 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=fast -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=fast -target-feature +avx512fp16 \ +// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=standard -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=standard -target-feature +avx512fp16 \ +// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=fast \ +// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=fast -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=standard \ +// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=standard -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none \ +// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=fast \ +// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=fast -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=standard \ +// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=standard -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none \ +// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=fast \ +// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=fast -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=standard \ +// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=standard -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none \ +// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none \ +// RUN: -ffp-contract=on -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -ffp-contract=on -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none \ +// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ +// RUN: -ffp-eval-method=source -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ +// RUN: -ffp-eval-method=source -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none \ +// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ +// RUN: -ffp-eval-method=double -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ +// RUN: -ffp-eval-method=double -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none \ +// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ +// RUN: -ffp-eval-method=extended -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ +// RUN: -ffp-eval-method=extended -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none \ +// RUN: -fapprox-func -fmath-errno -fno-signed-zeros -mreassociate \ +// RUN: -freciprocal-math -ffp-contract=on -fno-rounding-math \ +// RUN: -funsafe-math-optimizations -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-UNSAFE %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -fapprox-func -fmath-errno -fno-signed-zeros -mreassociate \ +// RUN: -freciprocal-math -ffp-contract=on -fno-rounding-math \ +// RUN: -funsafe-math-optimizations -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-UNSAFE %s + +// CHECK-EXT-LABEL: @f( +// CHECK-EXT-NEXT: entry: +// CHECK-EXT-NEXT: [[A_ADDR:%.*]] = alloca half +// CHECK-EXT-NEXT: [[B_ADDR:%.*]] = alloca half +// CHECK-EXT-NEXT: [[C_ADDR:%.*]] = alloca half +// CHECK-EXT-NEXT: [[D_ADDR:%.*]] = alloca half +// CHECK-EXT-NEXT: store half [[A:%.*]], ptr [[A_ADDR]] +// CHECK-EXT-NEXT: store half [[B:%.*]], ptr [[B_ADDR]] +// CHECK-EXT-NEXT: store half [[C:%.*]], ptr [[C_ADDR]] +// CHECK-EXT-NEXT: store half [[D:%.*]], ptr [[D_ADDR]] +// CHECK-EXT-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]] +// CHECK-EXT-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float +// CHECK-EXT-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]] +// CHECK-EXT-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float +// CHECK-EXT-NEXT: [[MUL:%.*]] = fmul float [[EXT]], [[EXT1]] +// CHECK-EXT-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]] +// CHECK-EXT-NEXT: [[EXT2:%.*]] = fpext half [[TMP2]] to float +// CHECK-EXT-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]] +// CHECK-EXT-NEXT: [[EXT3:%.*]] = fpext half [[TMP3]] to float +// CHECK-EXT-NEXT: [[MUL4:%.*]] = fmul float [[EXT2]], [[EXT3]] +// CHECK-EXT-NEXT: [[ADD:%.*]] = fadd float [[MUL]], [[MUL4]] +// CHECK-EXT-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to half +// CHECK-EXT-NEXT: ret half [[UNPROMOTION]] +// +// CHECK-NO-EXT-LABEL: @f( +// CHECK-NO-EXT-NEXT: entry: +// CHECK-NO-EXT-NEXT: [[A_ADDR:%.*]] = alloca half +// CHECK-NO-EXT-NEXT: [[B_ADDR:%.*]] = alloca half +// CHECK-NO-EXT-NEXT: [[C_ADDR:%.*]] = alloca half +// CHECK-NO-EXT-NEXT: [[D_ADDR:%.*]] = alloca half +// CHECK-NO-EXT-NEXT: store half [[A:%.*]], ptr [[A_ADDR]] +// CHECK-NO-EXT-NEXT: store half [[B:%.*]], ptr [[B_ADDR]] +// CHECK-NO-EXT-NEXT: store half [[C:%.*]], ptr [[C_ADDR]] +// CHECK-NO-EXT-NEXT: store half [[D:%.*]], ptr [[D_ADDR]] +// CHECK-NO-EXT-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]] +// CHECK-NO-EXT-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]] +// CHECK-NO-EXT-NEXT: [[MUL:%.*]] = fmul half [[TMP0]], [[TMP1]] +// CHECK-NO-EXT-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]] +// CHECK-NO-EXT-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]] +// CHECK-NO-EXT-NEXT: [[MUL1:%.*]] = fmul half [[TMP2]], [[TMP3]] +// CHECK-NO-EXT-NEXT: [[ADD:%.*]] = fadd half [[MUL]], [[MUL1]] +// CHECK-NO-EXT-NEXT: ret half [[ADD]] +// +// CHECK-EXT-DBL-LABEL: @f( +// CHECK-EXT-DBL-NEXT: entry: +// CHECK-EXT-DBL-NEXT: [[A_ADDR:%.*]] = alloca half +// CHECK-EXT-DBL-NEXT: [[B_ADDR:%.*]] = alloca half +// CHECK-EXT-DBL-NEXT: [[C_ADDR:%.*]] = alloca half +// CHECK-EXT-DBL-NEXT: [[D_ADDR:%.*]] = alloca half +// CHECK-EXT-DBL-NEXT: store half [[A:%.*]], ptr [[A_ADDR]] +// CHECK-EXT-DBL-NEXT: store half [[B:%.*]], ptr [[B_ADDR]] +// CHECK-EXT-DBL-NEXT: store half [[C:%.*]], ptr [[C_ADDR]] +// CHECK-EXT-DBL-NEXT: store half [[D:%.*]], ptr [[D_ADDR]] +// CHECK-EXT-DBL-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]] +// CHECK-EXT-DBL-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to double +// CHECK-EXT-DBL-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]] +// CHECK-EXT-DBL-NEXT: [[CONV1:%.*]] = fpext half [[TMP1]] to double +// CHECK-EXT-DBL-NEXT: [[MUL:%.*]] = fmul double [[CONV]], [[CONV1]] +// CHECK-EXT-DBL-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]] +// CHECK-EXT-DBL-NEXT: [[CONV2:%.*]] = fpext half [[TMP2]] to double +// CHECK-EXT-DBL-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]] +// CHECK-EXT-DBL-NEXT: [[CONV3:%.*]] = fpext half [[TMP3]] to double +// CHECK-EXT-DBL-NEXT: [[MUL4:%.*]] = fmul double [[CONV2]], [[CONV3]] +// CHECK-EXT-DBL-NEXT: [[ADD:%.*]] = fadd double [[MUL]], [[MUL4]] +// CHECK-EXT-DBL-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD]] to half +// CHECK-EXT-DBL-NEXT: ret half [[CONV5]] +// +// CHECK-EXT-FP80-LABEL: @f( +// CHECK-EXT-FP80-NEXT: entry: +// CHECK-EXT-FP80-NEXT: [[A_ADDR:%.*]] = alloca half +// CHECK-EXT-FP80-NEXT: [[B_ADDR:%.*]] = alloca half +// CHECK-EXT-FP80-NEXT: [[C_ADDR:%.*]] = alloca half +// CHECK-EXT-FP80-NEXT: [[D_ADDR:%.*]] = alloca half +// CHECK-EXT-FP80-NEXT: store half [[A:%.*]], ptr [[A_ADDR]] +// CHECK-EXT-FP80-NEXT: store half [[B:%.*]], ptr [[B_ADDR]] +// CHECK-EXT-FP80-NEXT: store half [[C:%.*]], ptr [[C_ADDR]] +// CHECK-EXT-FP80-NEXT: store half [[D:%.*]], ptr [[D_ADDR]] +// CHECK-EXT-FP80-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]] +// CHECK-EXT-FP80-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to x86_fp80 +// CHECK-EXT-FP80-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]] +// CHECK-EXT-FP80-NEXT: [[CONV1:%.*]] = fpext half [[TMP1]] to x86_fp80 +// CHECK-EXT-FP80-NEXT: [[MUL:%.*]] = fmul x86_fp80 [[CONV]], [[CONV1]] +// CHECK-EXT-FP80-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]] +// CHECK-EXT-FP80-NEXT: [[CONV2:%.*]] = fpext half [[TMP2]] to x86_fp80 +// CHECK-EXT-FP80-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]] +// CHECK-EXT-FP80-NEXT: [[CONV3:%.*]] = fpext half [[TMP3]] to x86_fp80 +// CHECK-EXT-FP80-NEXT: [[MUL4:%.*]] = fmul x86_fp80 [[CONV2]], [[CONV3]] +// CHECK-EXT-FP80-NEXT: [[ADD:%.*]] = fadd x86_fp80 [[MUL]], [[MUL4]] +// CHECK-EXT-FP80-NEXT: [[CONV5:%.*]] = fptrunc x86_fp80 [[ADD]] to half +// CHECK-EXT-FP80-NEXT: ret half [[CONV5]] +// +// CHECK-CONTRACT-LABEL: @f( +// CHECK-CONTRACT-NEXT: entry: +// CHECK-CONTRACT-NEXT: [[A_ADDR:%.*]] = alloca half +// CHECK-CONTRACT-NEXT: [[B_ADDR:%.*]] = alloca half +// CHECK-CONTRACT-NEXT: [[C_ADDR:%.*]] = alloca half +// CHECK-CONTRACT-NEXT: [[D_ADDR:%.*]] = alloca half +// CHECK-CONTRACT-NEXT: store half [[A:%.*]], ptr [[A_ADDR]] +// CHECK-CONTRACT-NEXT: store half [[B:%.*]], ptr [[B_ADDR]] +// CHECK-CONTRACT-NEXT: store half [[C:%.*]], ptr [[C_ADDR]] +// CHECK-CONTRACT-NEXT: store half [[D:%.*]], ptr [[D_ADDR]] +// CHECK-CONTRACT-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]] +// CHECK-CONTRACT-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]] +// CHECK-CONTRACT-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]] +// CHECK-CONTRACT-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]] +// CHECK-CONTRACT-NEXT: [[MUL1:%.*]] = fmul half [[TMP2]], [[TMP3]] +// CHECK-CONTRACT-NEXT: [[TMP4:%.*]] = call half @llvm.fmuladd.f16(half [[TMP0]], half [[TMP1]], half [[MUL1]]) +// CHECK-CONTRACT-NEXT: ret half [[TMP4]] +// +// CHECK-CONTRACT-DBL-LABEL: @f( +// CHECK-CONTRACT-DBL-NEXT: entry: +// CHECK-CONTRACT-DBL-NEXT: [[A_ADDR:%.*]] = alloca half +// CHECK-CONTRACT-DBL-NEXT: [[B_ADDR:%.*]] = alloca half +// CHECK-CONTRACT-DBL-NEXT: [[C_ADDR:%.*]] = alloca half +// CHECK-CONTRACT-DBL-NEXT: [[D_ADDR:%.*]] = alloca half +// CHECK-CONTRACT-DBL-NEXT: store half [[A:%.*]], ptr [[A_ADDR]] +// CHECK-CONTRACT-DBL-NEXT: store half [[B:%.*]], ptr [[B_ADDR]] +// CHECK-CONTRACT-DBL-NEXT: store half [[C:%.*]], ptr [[C_ADDR]] +// CHECK-CONTRACT-DBL-NEXT: store half [[D:%.*]], ptr [[D_ADDR]] +// CHECK-CONTRACT-DBL-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]] +// CHECK-CONTRACT-DBL-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to double +// CHECK-CONTRACT-DBL-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]] +// CHECK-CONTRACT-DBL-NEXT: [[CONV1:%.*]] = fpext half [[TMP1]] to double +// CHECK-CONTRACT-DBL-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]] +// CHECK-CONTRACT-DBL-NEXT: [[CONV2:%.*]] = fpext half [[TMP2]] to double +// CHECK-CONTRACT-DBL-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]] +// CHECK-CONTRACT-DBL-NEXT: [[CONV3:%.*]] = fpext half [[TMP3]] to double +// CHECK-CONTRACT-DBL-NEXT: [[MUL4:%.*]] = fmul double [[CONV2]], [[CONV3]] +// CHECK-CONTRACT-DBL-NEXT: [[TMP4:%.*]] = call double @llvm.fmuladd.f64(double [[CONV]], double [[CONV1]], double [[MUL4]]) +// CHECK-CONTRACT-DBL-NEXT: [[CONV5:%.*]] = fptrunc double [[TMP4]] to half +// CHECK-CONTRACT-DBL-NEXT: ret half [[CONV5]] +// +// CHECK-CONTRACT-EXT-LABEL: @f( +// CHECK-CONTRACT-EXT-NEXT: entry: +// CHECK-CONTRACT-EXT-NEXT: [[A_ADDR:%.*]] = alloca half +// CHECK-CONTRACT-EXT-NEXT: [[B_ADDR:%.*]] = alloca half +// CHECK-CONTRACT-EXT-NEXT: [[C_ADDR:%.*]] = alloca half +// CHECK-CONTRACT-EXT-NEXT: [[D_ADDR:%.*]] = alloca half +// CHECK-CONTRACT-EXT-NEXT: store half [[A:%.*]], ptr [[A_ADDR]] +// CHECK-CONTRACT-EXT-NEXT: store half [[B:%.*]], ptr [[B_ADDR]] +// CHECK-CONTRACT-EXT-NEXT: store half [[C:%.*]], ptr [[C_ADDR]] +// CHECK-CONTRACT-EXT-NEXT: store half [[D:%.*]], ptr [[D_ADDR]] +// CHECK-CONTRACT-EXT-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]] +// CHECK-CONTRACT-EXT-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to x86_fp80 +// CHECK-CONTRACT-EXT-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]] +// CHECK-CONTRACT-EXT-NEXT: [[CONV1:%.*]] = fpext half [[TMP1]] to x86_fp80 +// CHECK-CONTRACT-EXT-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]] +// CHECK-CONTRACT-EXT-NEXT: [[CONV2:%.*]] = fpext half [[TMP2]] to x86_fp80 +// CHECK-CONTRACT-EXT-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]] +// CHECK-CONTRACT-EXT-NEXT: [[CONV3:%.*]] = fpext half [[TMP3]] to x86_fp80 +// CHECK-CONTRACT-EXT-NEXT: [[MUL4:%.*]] = fmul x86_fp80 [[CONV2]], [[CONV3]] +// CHECK-CONTRACT-EXT-NEXT: [[TMP4:%.*]] = call x86_fp80 @llvm.fmuladd.f80(x86_fp80 [[CONV]], x86_fp80 [[CONV1]], x86_fp80 [[MUL4]]) +// CHECK-CONTRACT-EXT-NEXT: [[CONV5:%.*]] = fptrunc x86_fp80 [[TMP4]] to half +// CHECK-CONTRACT-EXT-NEXT: ret half [[CONV5]] +// +// CHECK-UNSAFE-LABEL: @f( +// CHECK-UNSAFE-NEXT: entry: +// CHECK-UNSAFE-NEXT: [[A_ADDR:%.*]] = alloca half +// CHECK-UNSAFE-NEXT: [[B_ADDR:%.*]] = alloca half +// CHECK-UNSAFE-NEXT: [[C_ADDR:%.*]] = alloca half +// CHECK-UNSAFE-NEXT: [[D_ADDR:%.*]] = alloca half +// CHECK-UNSAFE-NEXT: store half [[A:%.*]], ptr [[A_ADDR]] +// CHECK-UNSAFE-NEXT: store half [[B:%.*]], ptr [[B_ADDR]] +// CHECK-UNSAFE-NEXT: store half [[C:%.*]], ptr [[C_ADDR]] +// CHECK-UNSAFE-NEXT: store half [[D:%.*]], ptr [[D_ADDR]] +// CHECK-UNSAFE-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]] +// CHECK-UNSAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]] +// CHECK-UNSAFE-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]] +// CHECK-UNSAFE-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]] +// CHECK-UNSAFE-NEXT: [[MUL1:%.*]] = fmul reassoc nsz arcp afn half [[TMP2]], [[TMP3]] +// CHECK-UNSAFE-NEXT: [[TMP4:%.*]] = call reassoc nsz arcp afn half @llvm.fmuladd.f16(half [[TMP0]], half [[TMP1]], half [[MUL1]]) +// CHECK-UNSAFE-NEXT: ret half [[TMP4]] +// +_Float16 f(_Float16 a, _Float16 b, _Float16 c, _Float16 d) { + return a * b + c * d; +} + +// CHECK-EXT-LABEL: @getFEM( +// CHECK-EXT-NEXT: entry: +// CHECK-EXT-NEXT: ret i32 0 +// +// CHECK-NO-EXT-LABEL: @getFEM( +// CHECK-NO-EXT-NEXT: entry: +// CHECK-NO-EXT-NEXT: ret i32 0 +// +// CHECK-EXT-DBL-LABEL: @getFEM( +// CHECK-EXT-DBL-NEXT: entry: +// CHECK-EXT-DBL-NEXT: ret i32 1 +// +// CHECK-EXT-FP80-LABEL: @getFEM( +// CHECK-EXT-FP80-NEXT: entry: +// CHECK-EXT-FP80-NEXT: ret i32 2 +// +// CHECK-CONTRACT-LABEL: @getFEM( +// CHECK-CONTRACT-NEXT: entry: +// CHECK-CONTRACT-NEXT: ret i32 0 +// +// CHECK-CONTRACT-DBL-LABEL: @getFEM( +// CHECK-CONTRACT-DBL-NEXT: entry: +// CHECK-CONTRACT-DBL-NEXT: ret i32 1 +// +// CHECK-CONTRACT-EXT-LABEL: @getFEM( +// CHECK-CONTRACT-EXT-NEXT: entry: +// CHECK-CONTRACT-EXT-NEXT: ret i32 2 +// +// CHECK-UNSAFE-LABEL: @getFEM( +// CHECK-UNSAFE-NEXT: entry: +// CHECK-UNSAFE-NEXT: ret i32 -1 +// +int getFEM() { + return __FLT_EVAL_METHOD__; +} diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index 0ac67d60f7d376e..446bb07ee6cb743 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -398,7 +398,7 @@ // CHECK-WARNING-DAG: optimization flag '-falign-loops' is not supported // CHECK-WARNING-DAG: optimization flag '-falign-jumps' is not supported // CHECK-WARNING-DAG: optimization flag '-falign-jumps=100' is not supported -// CHECK-WARNING-DAG: optimization flag '-fexcess-precision=100' is not supported +// CHECK-WARNING-DAG: unsupported argument '100' to option '-fexcess-precision=' // CHECK-WARNING-DAG: optimization flag '-fbranch-count-reg' is not supported // CHECK-WARNING-DAG: optimization flag '-fcaller-saves' is not supported // CHECK-WARNING-DAG: optimization flag '-fno-default-inline' is not supported diff --git a/clang/test/Driver/fexcess-precision.c b/clang/test/Driver/fexcess-precision.c new file mode 100644 index 000000000000000..05f1ddbdb70e77d --- /dev/null +++ b/clang/test/Driver/fexcess-precision.c @@ -0,0 +1,34 @@ +// RUN: %clang -### -target i386 -fexcess-precision=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FAST %s +// RUN: %clang -### -target i386 -fexcess-precision=standard -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-STD %s +// RUN: %clang -### -target i386 -fexcess-precision=16 -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NONE %s +// RUN: %clang -### -target i386 -fexcess-precision=none -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-ERR-NONE %s + +// RUN: %clang -### -target x86_64 -fexcess-precision=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FAST %s +// RUN: %clang -### -target x86_64 -fexcess-precision=standard -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-STD %s +// RUN: %clang -### -target x86_64 -fexcess-precision=16 -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NONE %s +// RUN: %clang -### -target x86_64 -fexcess-precision=none -c %s 2>&1 \ +// RUN: | FileCheck --check-prefixes=CHECK-ERR-NONE %s + +// RUN: %clang -### -target aarch64 -fexcess-precision=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK %s +// RUN: %clang -### -target aarch64 -fexcess-precision=standard -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK %s +// RUN: %clang -### -target aarch64 -fexcess-precision=16 -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-ERR-16 %s +// RUN: %clang -### -target aarch64 -fexcess-precision=none -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-ERR-NONE %s + +// CHECK-FAST: "-ffloat16-excess-precision=fast" +// CHECK-STD: "-ffloat16-excess-precision=standard" +// CHECK-NONE: "-ffloat16-excess-precision=none" +// CHECK-ERR-NONE: unsupported argument 'none' to option '-fexcess-precision=' +// CHECK: "-cc1" +// CHECK-NOT: "-ffloat16-excess-precision=fast" +// CHECK-ERR-16: unsupported argument '16' to option '-fexcess-precision='