diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index f823792fabe63..b67689ce523f2 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -2,6 +2,7 @@ use crate::common::Funclet;
 use crate::context::CodegenCx;
 use crate::llvm::{self, BasicBlock, False};
 use crate::llvm::{AtomicOrdering, AtomicRmwBinOp, SynchronizationScope};
+use crate::llvm_util;
 use crate::type_::Type;
 use crate::type_of::LayoutLlvmExt;
 use crate::value::Value;
@@ -16,7 +17,7 @@ use rustc_data_structures::small_c_str::SmallCStr;
 use rustc_hir::def_id::DefId;
 use rustc_middle::ty::layout::TyAndLayout;
 use rustc_middle::ty::{self, Ty, TyCtxt};
-use rustc_span::{sym, Span};
+use rustc_span::Span;
 use rustc_target::abi::{self, Align, Size};
 use rustc_target::spec::{HasTargetSpec, Target};
 use std::borrow::Cow;
@@ -669,81 +670,47 @@ impl BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     }
 
     fn fptoui_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
-        // WebAssembly has saturating floating point to integer casts if the
-        // `nontrapping-fptoint` target feature is activated. We'll use those if
-        // they are available.
-        if self.sess().target.arch == "wasm32"
-            && self.sess().target_features.contains(&sym::nontrapping_dash_fptoint)
-        {
+        if llvm_util::get_version() >= (12, 0, 0) {
             let src_ty = self.cx.val_ty(val);
             let float_width = self.cx.float_width(src_ty);
             let int_width = self.cx.int_width(dest_ty);
-            let name = match (int_width, float_width) {
-                (32, 32) => Some("llvm.wasm.trunc.saturate.unsigned.i32.f32"),
-                (32, 64) => Some("llvm.wasm.trunc.saturate.unsigned.i32.f64"),
-                (64, 32) => Some("llvm.wasm.trunc.saturate.unsigned.i64.f32"),
-                (64, 64) => Some("llvm.wasm.trunc.saturate.unsigned.i64.f64"),
-                _ => None,
-            };
-            if let Some(name) = name {
-                let intrinsic = self.get_intrinsic(name);
-                return Some(self.call(intrinsic, &[val], None));
-            }
+            let name = format!("llvm.fptoui.sat.i{}.f{}", int_width, float_width);
+            let intrinsic = self.get_intrinsic(&name);
+            return Some(self.call(intrinsic, &[val], None));
         }
+
         None
     }
 
     fn fptosi_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
-        // WebAssembly has saturating floating point to integer casts if the
-        // `nontrapping-fptoint` target feature is activated. We'll use those if
-        // they are available.
-        if self.sess().target.arch == "wasm32"
-            && self.sess().target_features.contains(&sym::nontrapping_dash_fptoint)
-        {
+        if llvm_util::get_version() >= (12, 0, 0) {
             let src_ty = self.cx.val_ty(val);
             let float_width = self.cx.float_width(src_ty);
             let int_width = self.cx.int_width(dest_ty);
-            let name = match (int_width, float_width) {
-                (32, 32) => Some("llvm.wasm.trunc.saturate.signed.i32.f32"),
-                (32, 64) => Some("llvm.wasm.trunc.saturate.signed.i32.f64"),
-                (64, 32) => Some("llvm.wasm.trunc.saturate.signed.i64.f32"),
-                (64, 64) => Some("llvm.wasm.trunc.saturate.signed.i64.f64"),
-                _ => None,
-            };
-            if let Some(name) = name {
-                let intrinsic = self.get_intrinsic(name);
-                return Some(self.call(intrinsic, &[val], None));
-            }
+            let name = format!("llvm.fptosi.sat.i{}.f{}", int_width, float_width);
+            let intrinsic = self.get_intrinsic(&name);
+            return Some(self.call(intrinsic, &[val], None));
         }
-        None
-    }
 
-    fn fptosui_may_trap(&self, val: &'ll Value, dest_ty: &'ll Type) -> bool {
-        // Most of the time we'll be generating the `fptosi` or `fptoui`
-        // instruction for floating-point-to-integer conversions. These
-        // instructions by definition in LLVM do not trap. For the WebAssembly
-        // target, however, we'll lower in some cases to intrinsic calls instead
-        // which may trap. If we detect that this is a situation where we'll be
-        // using the intrinsics then we report that the call map trap, which
-        // callers might need to handle.
-        if !self.wasm_and_missing_nontrapping_fptoint() {
-            return false;
-        }
-        let src_ty = self.cx.val_ty(val);
-        let float_width = self.cx.float_width(src_ty);
-        let int_width = self.cx.int_width(dest_ty);
-        matches!((int_width, float_width), (32, 32) | (32, 64) | (64, 32) | (64, 64))
+        None
     }
 
     fn fptoui(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
-        // When we can, use the native wasm intrinsics which have tighter
-        // codegen. Note that this has a semantic difference in that the
-        // intrinsic can trap whereas `fptoui` never traps. That difference,
-        // however, is handled by `fptosui_may_trap` above.
+        // On WebAssembly the `fptoui` and `fptosi` instructions currently have
+        // poor codegen. The reason for this is that the corresponding wasm
+        // instructions, `i32.trunc_f32_s` for example, will trap when the float
+        // is out-of-bounds, infinity, or nan. This means that LLVM
+        // automatically inserts control flow around `fptoui` and `fptosi`
+        // because the LLVM instruction `fptoui` is defined as producing a
+        // poison value, not having UB on out-of-bounds values.
         //
-        // Note that we skip the wasm intrinsics for vector types where `fptoui`
-        // must be used instead.
-        if self.wasm_and_missing_nontrapping_fptoint() {
+        // This method, however, is only used with non-saturating casts that
+        // have UB on out-of-bounds values. This means that it's ok if we use
+        // the raw wasm instruction since out-of-bounds values can do whatever
+        // we like. To ensure that LLVM picks the right instruction we choose
+        // the raw wasm intrinsic functions which avoid LLVM inserting all the
+        // other control flow automatically.
+        if self.sess().target.arch == "wasm32" {
             let src_ty = self.cx.val_ty(val);
             if self.cx.type_kind(src_ty) != TypeKind::Vector {
                 let float_width = self.cx.float_width(src_ty);
@@ -765,7 +732,8 @@ impl BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     }
 
     fn fptosi(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
-        if self.wasm_and_missing_nontrapping_fptoint() {
+        // see `fptoui` above for why wasm is different here
+        if self.sess().target.arch == "wasm32" {
             let src_ty = self.cx.val_ty(val);
             if self.cx.type_kind(src_ty) != TypeKind::Vector {
                 let float_width = self.cx.float_width(src_ty);
@@ -1419,9 +1387,4 @@ impl Builder<'a, 'll, 'tcx> {
             llvm::LLVMAddIncoming(phi, &val, &bb, 1 as c_uint);
         }
     }
-
-    fn wasm_and_missing_nontrapping_fptoint(&self) -> bool {
-        self.sess().target.arch == "wasm32"
-            && !self.sess().target_features.contains(&sym::nontrapping_dash_fptoint)
-    }
 }
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 773c0c16328e7..f5c54b11c08e7 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -503,14 +503,6 @@ impl CodegenCx<'b, 'tcx> {
         let t_f32 = self.type_f32();
         let t_f64 = self.type_f64();
 
-        ifn!("llvm.wasm.trunc.saturate.unsigned.i32.f32", fn(t_f32) -> t_i32);
-        ifn!("llvm.wasm.trunc.saturate.unsigned.i32.f64", fn(t_f64) -> t_i32);
-        ifn!("llvm.wasm.trunc.saturate.unsigned.i64.f32", fn(t_f32) -> t_i64);
-        ifn!("llvm.wasm.trunc.saturate.unsigned.i64.f64", fn(t_f64) -> t_i64);
-        ifn!("llvm.wasm.trunc.saturate.signed.i32.f32", fn(t_f32) -> t_i32);
-        ifn!("llvm.wasm.trunc.saturate.signed.i32.f64", fn(t_f64) -> t_i32);
-        ifn!("llvm.wasm.trunc.saturate.signed.i64.f32", fn(t_f32) -> t_i64);
-        ifn!("llvm.wasm.trunc.saturate.signed.i64.f64", fn(t_f64) -> t_i64);
         ifn!("llvm.wasm.trunc.unsigned.i32.f32", fn(t_f32) -> t_i32);
         ifn!("llvm.wasm.trunc.unsigned.i32.f64", fn(t_f64) -> t_i32);
         ifn!("llvm.wasm.trunc.unsigned.i64.f32", fn(t_f32) -> t_i64);
@@ -520,6 +512,28 @@ impl CodegenCx<'b, 'tcx> {
         ifn!("llvm.wasm.trunc.signed.i64.f32", fn(t_f32) -> t_i64);
         ifn!("llvm.wasm.trunc.signed.i64.f64", fn(t_f64) -> t_i64);
 
+        ifn!("llvm.fptosi.sat.i8.f32", fn(t_f32) -> t_i8);
+        ifn!("llvm.fptosi.sat.i16.f32", fn(t_f32) -> t_i16);
+        ifn!("llvm.fptosi.sat.i32.f32", fn(t_f32) -> t_i32);
+        ifn!("llvm.fptosi.sat.i64.f32", fn(t_f32) -> t_i64);
+        ifn!("llvm.fptosi.sat.i128.f32", fn(t_f32) -> t_i128);
+        ifn!("llvm.fptosi.sat.i8.f64", fn(t_f64) -> t_i8);
+        ifn!("llvm.fptosi.sat.i16.f64", fn(t_f64) -> t_i16);
+        ifn!("llvm.fptosi.sat.i32.f64", fn(t_f64) -> t_i32);
+        ifn!("llvm.fptosi.sat.i64.f64", fn(t_f64) -> t_i64);
+        ifn!("llvm.fptosi.sat.i128.f64", fn(t_f64) -> t_i128);
+
+        ifn!("llvm.fptoui.sat.i8.f32", fn(t_f32) -> t_i8);
+        ifn!("llvm.fptoui.sat.i16.f32", fn(t_f32) -> t_i16);
+        ifn!("llvm.fptoui.sat.i32.f32", fn(t_f32) -> t_i32);
+        ifn!("llvm.fptoui.sat.i64.f32", fn(t_f32) -> t_i64);
+        ifn!("llvm.fptoui.sat.i128.f32", fn(t_f32) -> t_i128);
+        ifn!("llvm.fptoui.sat.i8.f64", fn(t_f64) -> t_i8);
+        ifn!("llvm.fptoui.sat.i16.f64", fn(t_f64) -> t_i16);
+        ifn!("llvm.fptoui.sat.i32.f64", fn(t_f64) -> t_i32);
+        ifn!("llvm.fptoui.sat.i64.f64", fn(t_f64) -> t_i64);
+        ifn!("llvm.fptoui.sat.i128.f64", fn(t_f64) -> t_i128);
+
         ifn!("llvm.trap", fn() -> void);
         ifn!("llvm.debugtrap", fn() -> void);
         ifn!("llvm.frameaddress", fn(t_i32) -> i8p);
diff --git a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
index 629cb64d43ee1..9917c23f12150 100644
--- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
@@ -11,7 +11,7 @@ use rustc_apfloat::{ieee, Float, Round, Status};
 use rustc_hir::lang_items::LangItem;
 use rustc_middle::mir;
 use rustc_middle::ty::cast::{CastTy, IntTy};
-use rustc_middle::ty::layout::{HasTyCtxt, TyAndLayout};
+use rustc_middle::ty::layout::HasTyCtxt;
 use rustc_middle::ty::{self, adjustment::PointerCast, Instance, Ty, TyCtxt};
 use rustc_span::source_map::{Span, DUMMY_SP};
 use rustc_span::symbol::sym;
@@ -385,10 +385,10 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                                 bx.inttoptr(usize_llval, ll_t_out)
                             }
                             (CastTy::Float, CastTy::Int(IntTy::I)) => {
-                                cast_float_to_int(&mut bx, true, llval, ll_t_in, ll_t_out, cast)
+                                cast_float_to_int(&mut bx, true, llval, ll_t_in, ll_t_out)
                             }
                             (CastTy::Float, CastTy::Int(_)) => {
-                                cast_float_to_int(&mut bx, false, llval, ll_t_in, ll_t_out, cast)
+                                cast_float_to_int(&mut bx, false, llval, ll_t_in, ll_t_out)
                             }
                             _ => bug!("unsupported cast: {:?} to {:?}", operand.layout.ty, cast.ty),
                         };
@@ -790,7 +790,6 @@ fn cast_float_to_int<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
     x: Bx::Value,
     float_ty: Bx::Type,
     int_ty: Bx::Type,
-    int_layout: TyAndLayout<'tcx>,
 ) -> Bx::Value {
     if let Some(false) = bx.cx().sess().opts.debugging_opts.saturating_float_casts {
         return if signed { bx.fptosi(x, int_ty) } else { bx.fptoui(x, int_ty) };
@@ -891,134 +890,39 @@ fn cast_float_to_int<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
     let int_min = bx.cx().const_uint_big(int_ty, int_min(signed, int_width) as u128);
     let zero = bx.cx().const_uint(int_ty, 0);
 
-    // The codegen here differs quite a bit depending on whether our builder's
-    // `fptosi` and `fptoui` instructions may trap for out-of-bounds values. If
-    // they don't trap then we can start doing everything inline with a
-    // `select` instruction because it's ok to execute `fptosi` and `fptoui`
-    // even if we don't use the results.
-    if !bx.fptosui_may_trap(x, int_ty) {
-        // Step 1 ...
-        let fptosui_result = if signed { bx.fptosi(x, int_ty) } else { bx.fptoui(x, int_ty) };
-        let less_or_nan = bx.fcmp(RealPredicate::RealULT, x, f_min);
-        let greater = bx.fcmp(RealPredicate::RealOGT, x, f_max);
-
-        // Step 2: We use two comparisons and two selects, with %s1 being the
-        // result:
-        //     %less_or_nan = fcmp ult %x, %f_min
-        //     %greater = fcmp olt %x, %f_max
-        //     %s0 = select %less_or_nan, int_ty::MIN, %fptosi_result
-        //     %s1 = select %greater, int_ty::MAX, %s0
-        // Note that %less_or_nan uses an *unordered* comparison. This
-        // comparison is true if the operands are not comparable (i.e., if x is
-        // NaN). The unordered comparison ensures that s1 becomes int_ty::MIN if
-        // x is NaN.
-        //
-        // Performance note: Unordered comparison can be lowered to a "flipped"
-        // comparison and a negation, and the negation can be merged into the
-        // select. Therefore, it not necessarily any more expensive than a
-        // ordered ("normal") comparison. Whether these optimizations will be
-        // performed is ultimately up to the backend, but at least x86 does
-        // perform them.
-        let s0 = bx.select(less_or_nan, int_min, fptosui_result);
-        let s1 = bx.select(greater, int_max, s0);
-
-        // Step 3: NaN replacement.
-        // For unsigned types, the above step already yielded int_ty::MIN == 0 if x is NaN.
-        // Therefore we only need to execute this step for signed integer types.
-        if signed {
-            // LLVM has no isNaN predicate, so we use (x == x) instead
-            let cmp = bx.fcmp(RealPredicate::RealOEQ, x, x);
-            bx.select(cmp, s1, zero)
-        } else {
-            s1
-        }
+    // Step 1 ...
+    let fptosui_result = if signed { bx.fptosi(x, int_ty) } else { bx.fptoui(x, int_ty) };
+    let less_or_nan = bx.fcmp(RealPredicate::RealULT, x, f_min);
+    let greater = bx.fcmp(RealPredicate::RealOGT, x, f_max);
+
+    // Step 2: We use two comparisons and two selects, with %s1 being the
+    // result:
+    //     %less_or_nan = fcmp ult %x, %f_min
+    //     %greater = fcmp olt %x, %f_max
+    //     %s0 = select %less_or_nan, int_ty::MIN, %fptosi_result
+    //     %s1 = select %greater, int_ty::MAX, %s0
+    // Note that %less_or_nan uses an *unordered* comparison. This
+    // comparison is true if the operands are not comparable (i.e., if x is
+    // NaN). The unordered comparison ensures that s1 becomes int_ty::MIN if
+    // x is NaN.
+    //
+    // Performance note: Unordered comparison can be lowered to a "flipped"
+    // comparison and a negation, and the negation can be merged into the
+    // select. Therefore, it not necessarily any more expensive than a
+    // ordered ("normal") comparison. Whether these optimizations will be
+    // performed is ultimately up to the backend, but at least x86 does
+    // perform them.
+    let s0 = bx.select(less_or_nan, int_min, fptosui_result);
+    let s1 = bx.select(greater, int_max, s0);
+
+    // Step 3: NaN replacement.
+    // For unsigned types, the above step already yielded int_ty::MIN == 0 if x is NaN.
+    // Therefore we only need to execute this step for signed integer types.
+    if signed {
+        // LLVM has no isNaN predicate, so we use (x == x) instead
+        let cmp = bx.fcmp(RealPredicate::RealOEQ, x, x);
+        bx.select(cmp, s1, zero)
     } else {
-        // In this case we cannot execute `fptosi` or `fptoui` and then later
-        // discard the result. The builder is telling us that these instructions
-        // will trap on out-of-bounds values, so we need to use basic blocks and
-        // control flow to avoid executing the `fptosi` and `fptoui`
-        // instructions.
-        //
-        // The general idea of what we're constructing here is, for f64 -> i32:
-        //
-        //      ;; block so far... %0 is the argument
-        //      %result = alloca i32, align 4
-        //      %inbound_lower = fcmp oge double %0, 0xC1E0000000000000
-        //      %inbound_upper = fcmp ole double %0, 0x41DFFFFFFFC00000
-        //      ;; match (inbound_lower, inbound_upper) {
-        //      ;;     (true, true) => %0 can be converted without trapping
-        //      ;;     (false, false) => %0 is a NaN
-        //      ;;     (true, false) => %0 is too large
-        //      ;;     (false, true) => %0 is too small
-        //      ;; }
-        //      ;;
-        //      ;; The (true, true) check, go to %convert if so.
-        //      %inbounds = and i1 %inbound_lower, %inbound_upper
-        //      br i1 %inbounds, label %convert, label %specialcase
-        //
-        //  convert:
-        //      %cvt = call i32 @llvm.wasm.trunc.signed.i32.f64(double %0)
-        //      store i32 %cvt, i32* %result, align 4
-        //      br label %done
-        //
-        //  specialcase:
-        //      ;; Handle the cases where the number is NaN, too large or too small
-        //
-        //      ;; Either (true, false) or (false, true)
-        //      %is_not_nan = or i1 %inbound_lower, %inbound_upper
-        //      ;; Figure out which saturated value we are interested in if not `NaN`
-        //      %saturated = select i1 %inbound_lower, i32 2147483647, i32 -2147483648
-        //      ;; Figure out between saturated and NaN representations
-        //      %result_nan = select i1 %is_not_nan, i32 %saturated, i32 0
-        //      store i32 %result_nan, i32* %result, align 4
-        //      br label %done
-        //
-        //  done:
-        //      %r = load i32, i32* %result, align 4
-        //      ;; ...
-        let done = bx.build_sibling_block("float_cast_done");
-        let mut convert = bx.build_sibling_block("float_cast_convert");
-        let mut specialcase = bx.build_sibling_block("float_cast_specialcase");
-
-        let result = PlaceRef::alloca(bx, int_layout);
-        result.storage_live(bx);
-
-        // Use control flow to figure out whether we can execute `fptosi` in a
-        // basic block, or whether we go to a different basic block to implement
-        // the saturating logic.
-        let inbound_lower = bx.fcmp(RealPredicate::RealOGE, x, f_min);
-        let inbound_upper = bx.fcmp(RealPredicate::RealOLE, x, f_max);
-        let inbounds = bx.and(inbound_lower, inbound_upper);
-        bx.cond_br(inbounds, convert.llbb(), specialcase.llbb());
-
-        // Translation of the `convert` basic block
-        let cvt = if signed { convert.fptosi(x, int_ty) } else { convert.fptoui(x, int_ty) };
-        convert.store(cvt, result.llval, result.align);
-        convert.br(done.llbb());
-
-        // Translation of the `specialcase` basic block. Note that like above
-        // we try to be a bit clever here for unsigned conversions. In those
-        // cases the `int_min` is zero so we don't need two select instructions,
-        // just one to choose whether we need `int_max` or not. If
-        // `inbound_lower` is true then we're guaranteed to not be `NaN` and
-        // since we're greater than zero we must be saturating to `int_max`. If
-        // `inbound_lower` is false then we're either NaN or less than zero, so
-        // we saturate to zero.
-        let result_nan = if signed {
-            let is_not_nan = specialcase.or(inbound_lower, inbound_upper);
-            let saturated = specialcase.select(inbound_lower, int_max, int_min);
-            specialcase.select(is_not_nan, saturated, zero)
-        } else {
-            specialcase.select(inbound_lower, int_max, int_min)
-        };
-        specialcase.store(result_nan, result.llval, result.align);
-        specialcase.br(done.llbb());
-
-        // Translation of the `done` basic block, positioning ourselves to
-        // continue from that point as well.
-        *bx = done;
-        let ret = bx.load(result.llval, result.align);
-        result.storage_dead(bx);
-        ret
+        s1
     }
 }
diff --git a/compiler/rustc_codegen_ssa/src/traits/builder.rs b/compiler/rustc_codegen_ssa/src/traits/builder.rs
index d5bd2780388e6..1bc05f30e5c37 100644
--- a/compiler/rustc_codegen_ssa/src/traits/builder.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/builder.rs
@@ -171,7 +171,6 @@ pub trait BuilderMethods<'a, 'tcx>:
     fn sext(&mut self, val: Self::Value, dest_ty: Self::Type) -> Self::Value;
     fn fptoui_sat(&mut self, val: Self::Value, dest_ty: Self::Type) -> Option<Self::Value>;
     fn fptosi_sat(&mut self, val: Self::Value, dest_ty: Self::Type) -> Option<Self::Value>;
-    fn fptosui_may_trap(&self, val: Self::Value, dest_ty: Self::Type) -> bool;
     fn fptoui(&mut self, val: Self::Value, dest_ty: Self::Type) -> Self::Value;
     fn fptosi(&mut self, val: Self::Value, dest_ty: Self::Type) -> Self::Value;
     fn uitofp(&mut self, val: Self::Value, dest_ty: Self::Type) -> Self::Value;
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index 52270f0e6277b..1d1471fdeca04 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -796,7 +796,6 @@ symbols! {
         non_modrs_mods,
         none_error,
         nontemporal_store,
-        nontrapping_dash_fptoint: "nontrapping-fptoint",
         noop_method_borrow,
         noop_method_clone,
         noop_method_deref,