apache · masahi · Oct 19, 2021 · Oct 7, 2021 · Oct 7, 2021 · Oct 9, 2021
diff --git a/src/target/llvm/codegen_llvm.cc b/src/target/llvm/codegen_llvm.cc
@@ -77,6 +77,8 @@ void CodeGenLLVM::Init(const std::string& module_name, llvm::TargetMachine* tm,
   this->InitTarget(tm);
 }
 
+void CodeGenLLVM::SetFastMathFlag(llvm::FastMathFlags fmf) { builder_->setFastMathFlags(fmf); }
+
 void CodeGenLLVM::InitTarget(llvm::TargetMachine* tm) {
   module_->setTargetTriple(tm->getTargetTriple().str());
   module_->setDataLayout(tm->createDataLayout());

diff --git a/src/target/llvm/codegen_llvm.h b/src/target/llvm/codegen_llvm.h
@@ -78,6 +78,13 @@ class CodeGenLLVM : public ExprFunctor<llvm::Value*(const PrimExpr&)>,
    */
   virtual void Init(const std::string& module_name, llvm::TargetMachine* tm, llvm::LLVMContext* ctx,
                     bool system_lib, bool dynamic_lookup, bool target_c_runtime);
+
+  /*!
+   * \brief Turn on fast math flags for floating point operations.
+   * \param fmf FastMathFlags to use for code generation.
+   */
+  void SetFastMathFlag(llvm::FastMathFlags fmf);
+
   /*!
    * \brief Compile and add function f to the current module.
    * \param f The function to be added.

diff --git a/src/target/llvm/llvm_common.cc b/src/target/llvm/llvm_common.cc
@@ -106,10 +106,23 @@ void ParseLLVMTargetOptions(const Target& target, std::string* triple, std::stri
 #if TVM_LLVM_VERSION < 50
   opt.LessPreciseFPMADOption = true;
 #endif
-  opt.AllowFPOpFusion = llvm::FPOpFusion::Fast;
-  opt.UnsafeFPMath = false;
-  opt.NoInfsFPMath = false;
+  // We depend on generating IR with proper fast math flags to control fast math
+  // semantics. These just enable these optimizations if the proper IR flags
+  // are set.
+  opt.UnsafeFPMath = true;
+  opt.NoInfsFPMath = true;
   opt.NoNaNsFPMath = true;
+
+#if TVM_LLVM_VERSION >= 50
+  opt.NoSignedZerosFPMath = true;
+#endif
+
+  // Assume no generated code ever needs to handle floating point exceptions.
+  opt.NoTrappingFPMath = true;
+
+  // TODO(AndrewZhaoLuo): Look into control of setting this flag.
+  opt.AllowFPOpFusion = llvm::FPOpFusion::Fast;
+
   if (soft_float_abi) {
     opt.FloatABIType = llvm::FloatABI::Soft;
   } else {
@@ -139,8 +152,22 @@ std::unique_ptr<llvm::TargetMachine> GetLLVMTargetMachine(const Target& target,
     ICHECK(allow_null) << err << " target_triple=" << target_triple;
     return nullptr;
   }
-  llvm::TargetMachine* tm =
-      llvm_target->createTargetMachine(target_triple, mcpu, mattr, opt, llvm::Reloc::PIC_);
+
+  Integer llvm_opt_level = target->GetAttr<Integer>("O").value_or(Integer(2));
 builder.OptLevel = 3; 
 builder.OptLevel = 3; 
+  llvm::CodeGenOpt::Level llvm_opt;
+  if (llvm_opt_level <= 0) {
+    llvm_opt = llvm::CodeGenOpt::None;
+  } else if (llvm_opt_level == 1) {
+    llvm_opt = llvm::CodeGenOpt::Less;
+  } else if (llvm_opt_level == 2) {
+    llvm_opt = llvm::CodeGenOpt::Default;
+  } else {
+    // llvm_opt_level >= 3
+    llvm_opt = llvm::CodeGenOpt::Aggressive;
+  }
+
+  llvm::TargetMachine* tm = llvm_target->createTargetMachine(
+      target_triple, mcpu, mattr, opt, llvm::Reloc::PIC_, llvm::CodeModel::Small, llvm_opt);
   return std::unique_ptr<llvm::TargetMachine>(tm);
 }
 

diff --git a/src/target/llvm/llvm_module.cc b/src/target/llvm/llvm_module.cc
@@ -258,8 +258,53 @@ class LLVMModuleNode final : public runtime::ModuleNode {
     // makes sense when we start to use multiple modules.
     cg->Init("TVMMod", tm_.get(), ctx_.get(), system_lib, system_lib, target_c_runtime);
 
-    cg->AddFunctionsOrdered(funcs.begin(), funcs.end());
+    // See https://llvm.org/docs/LangRef.html#fast-math-flags for details
+    Bool fast_math_all = target->GetAttr<Bool>("fast-math").value_or(Bool(false));
+    Bool fast_math_nnan = target->GetAttr<Bool>("fast-math-nnan").value_or(Bool(false));
+    Bool fast_math_ninf = target->GetAttr<Bool>("fast-math-ninf").value_or(Bool(false));
+    Bool fast_math_nsz = target->GetAttr<Bool>("fast-math-nsz").value_or(Bool(false));
+    Bool fast_math_arcp = target->GetAttr<Bool>("fast-math-arcp").value_or(Bool(false));
+
+    llvm::FastMathFlags fmf;
+    if (fast_math_all) {
+#if TVM_LLVM_VERSION >= 60
+      fmf.setFast();
+#else
+      fmf.setUnsafeAlgebra();
+#endif
+    }
+
+    if (fast_math_nnan) {
+      fmf.setNoNaNs();
+    }
+    if (fast_math_ninf) {
+      fmf.setNoInfs();
+    }
+    if (fast_math_nsz) {
+      fmf.setNoSignedZeros();
+    }
+    if (fast_math_arcp) {
+      fmf.setAllowReciprocal();
+    }
+
+#if TVM_LLVM_VERSION >= 60
+    Bool fast_math_contract = target->GetAttr<Bool>("fast-math-contract").value_or(Bool(false));
+    Bool fast_math_afn = target->GetAttr<Bool>("fast-math-afn").value_or(Bool(false));
+    Bool fast_math_reassoc = target->GetAttr<Bool>("fast-math-reassoc").value_or(Bool(false));
+    if (fast_math_contract) {
+      fmf.setAllowContract();
+    }
+    if (fast_math_afn) {
+      fmf.setApproxFunc();
+    }
+    if (fast_math_reassoc) {
+      fmf.setAllowReassoc();
+    }
+#endif
 
+    cg->SetFastMathFlag(fmf);
+
+    cg->AddFunctionsOrdered(funcs.begin(), funcs.end());
     if (entry_func.length() != 0) {
       cg->AddMainFunction(entry_func);
     }

diff --git a/src/target/target_kind.cc b/src/target/target_kind.cc
@@ -222,6 +222,15 @@ TVM_REGISTER_TARGET_KIND("llvm", kDLCPU)
     .add_attr_option<Bool>("link-params", Bool(false))
     .add_attr_option<Bool>("unpacked-api")
     .add_attr_option<String>("interface-api")
+    // Fast math flags, see https://llvm.org/docs/LangRef.html#fast-math-flags
+    .add_attr_option<Bool>("fast-math")  // implies all the below
+    .add_attr_option<Bool>("fast-math-nnan")
+    .add_attr_option<Bool>("fast-math-ninf")
+    .add_attr_option<Bool>("fast-math-nsz")
+    .add_attr_option<Bool>("fast-math-arcp")
+    .add_attr_option<Bool>("fast-math-contract")
+    .add_attr_option<Bool>("fast-math-reassoc")
+    .add_attr_option<Integer>("O")
     .set_default_keys({"cpu"});
 
 TVM_REGISTER_TARGET_KIND("c", kDLCPU)