Merged main:40c2aaf54e9a7b5c560bb68796d444180ad67b5d into amd-gfx:866…

…25b660a80 Local branch amd-gfx 86625b6 Merged main:b7730a23efb222944b732bbdb3a7b965b7bffd98 into amd-gfx:0616aed9b07e Remote branch main 40c2aaf [RISCV][sema] Correct the requirement of `vf[n|w]cvt.x[|u].f` intrinsics (llvm#101811)
GPUOpen-Drivers · Aug 6, 2024 · 00d817b · 00d817b
2 parents 86625b6 + 40c2aaf
commit 00d817b
Show file tree

Hide file tree

Showing 66 changed files with 6,025 additions and 4,165 deletions.
diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
@@ -1378,6 +1378,9 @@ let HasMasked = false,
     let RequiredFeatures = ["Zvfhmin"] in
       defm vmv_v : RVVOutBuiltinSet<"vmv_v_v", "x",
                                     [["v", "v", "vv"]]>;
+    let RequiredFeatures = ["Zvfbfmin"] in
+      defm vmv_v : RVVOutBuiltinSet<"vmv_v_v", "y",
+                                    [["v", "v", "vv"]]>;
   let SupportOverloading = false in
     defm vmv_v : RVVOutBuiltinSet<"vmv_v_x", "csil",
                                    [["x", "v", "ve"],
@@ -1890,6 +1893,9 @@ let HasMasked = false,
   let RequiredFeatures = ["Zvfhmin"] in
     defm vmerge : RVVOutOp1BuiltinSet<"vmerge", "x",
                                       [["vvm", "v", "vvvm"]]>;
+  let RequiredFeatures = ["Zvfbfmin"] in
+    defm vmerge : RVVOutOp1BuiltinSet<"vmerge", "y",
+                                      [["vvm", "v", "vvvm"]]>;
   defm vfmerge : RVVOutOp1BuiltinSet<"vfmerge", "xfd",
                                      [["vfm", "v", "vvem"]]>;
 }
@@ -1912,8 +1918,18 @@ def vfcvt_rtz_x_f_v : RVVConvToSignedBuiltin<"vfcvt_rtz_x">;
 let Log2LMUL = [-3, -2, -1, 0, 1, 2] in {
   def vfwcvt_rtz_xu_f_v : RVVConvToWidenUnsignedBuiltin<"vfwcvt_rtz_xu">;
   def vfwcvt_rtz_x_f_v : RVVConvToWidenSignedBuiltin<"vfwcvt_rtz_x">;
-  def vfwcvt_f_xu_v : RVVConvBuiltin<"Fw", "FwUv", "csi", "vfwcvt_f">;
-  def vfwcvt_f_x_v : RVVConvBuiltin<"Fw", "Fwv", "csi", "vfwcvt_f">;
+  def vfwcvt_f_xu_v : RVVConvBuiltin<"Fw", "FwUv", "si", "vfwcvt_f">;
+  def vfwcvt_f_x_v : RVVConvBuiltin<"Fw", "Fwv", "si", "vfwcvt_f">;
+  let RequiredFeatures = ["Zvfh"] in {
+    let Name = "vfwcvt_f_xu_v",
+        IRName = "vfwcvt_f_xu_v",
+        MaskedIRName = "vfwcvt_f_xu_v_mask" in
+      def : RVVConvBuiltin<"Fw", "FwUv", "c", "vfwcvt_f">;
+    let Name = "vfwcvt_f_x_v",
+        IRName = "vfwcvt_f_x_v",
+        MaskedIRName = "vfwcvt_f_x_v_mask" in
+      def : RVVConvBuiltin<"Fw", "Fwv", "c", "vfwcvt_f">;
+  }
   def vfwcvt_f_f_v : RVVConvBuiltin<"w", "wv", "f", "vfwcvt_f">;
   let RequiredFeatures = ["Zvfhmin"] in
     def vfwcvt_f_f_v_fp16 : RVVConvBuiltin<"w", "wv", "x", "vfwcvt_f"> {
@@ -1927,6 +1943,16 @@ let Log2LMUL = [-3, -2, -1, 0, 1, 2] in {
 let Log2LMUL = [-3, -2, -1, 0, 1, 2] in {
   def vfncvt_rtz_xu_f_w : RVVConvToNarrowingUnsignedBuiltin<"vfncvt_rtz_xu">;
   def vfncvt_rtz_x_f_w : RVVConvToNarrowingSignedBuiltin<"vfncvt_rtz_x">;
+  let RequiredFeatures = ["Zvfh"] in {
+    let Name = "vfncvt_rtz_xu_f_w",
+        IRName = "vfncvt_rtz_xu_f_w",
+        MaskedIRName = "vfncvt_rtz_xu_f_w_mask" in
+      def : RVVConvBuiltin<"Uv", "UvFw", "c", "vfncvt_rtz_xu">;
+    let Name = "vfncvt_rtz_x_f_w",
+        IRName = "vfncvt_rtz_x_f_w",
+        MaskedIRName = "vfncvt_rtz_x_f_w_mask" in
+      def : RVVConvBuiltin<"Iv", "IvFw", "c", "vfncvt_rtz_x">;
+  }
   def vfncvt_rod_f_f_w : RVVConvBuiltin<"v", "vw", "xf", "vfncvt_rod_f">;
 }
 
@@ -2005,10 +2031,18 @@ let ManualCodegen = [{
     let Log2LMUL = [-3, -2, -1, 0, 1, 2] in {
       let OverloadedName = "vfncvt_x" in
         defm :
-          RVVConvBuiltinSet<"vfncvt_x_f_w", "csi", [["Iv", "IvFwu"]]>;
+          RVVConvBuiltinSet<"vfncvt_x_f_w", "si", [["Iv", "IvFwu"]]>;
       let OverloadedName = "vfncvt_xu" in
         defm :
-          RVVConvBuiltinSet<"vfncvt_xu_f_w", "csi", [["Uv", "UvFwu"]]>;
+          RVVConvBuiltinSet<"vfncvt_xu_f_w", "si", [["Uv", "UvFwu"]]>;
+      let RequiredFeatures = ["Zvfh"] in {
+        let OverloadedName = "vfncvt_x" in
+          defm :
+            RVVConvBuiltinSet<"vfncvt_x_f_w", "c", [["Iv", "IvFwu"]]>;
+        let OverloadedName = "vfncvt_xu" in
+          defm :
+            RVVConvBuiltinSet<"vfncvt_xu_f_w", "c", [["Uv", "UvFwu"]]>;
+      }
       let OverloadedName = "vfncvt_f" in {
         defm :
           RVVConvBuiltinSet<"vfncvt_f_x_w", "xf", [["v", "vIwu"]]>;
@@ -2055,10 +2089,18 @@ let ManualCodegen = [{
   let Log2LMUL = [-3, -2, -1, 0, 1, 2] in {
     let OverloadedName = "vfncvt_x" in
       defm :
-        RVVConvBuiltinSet<"vfncvt_x_f_w", "csi", [["Iv", "IvFw"]]>;
+        RVVConvBuiltinSet<"vfncvt_x_f_w", "si", [["Iv", "IvFw"]]>;
     let OverloadedName = "vfncvt_xu" in
       defm :
-        RVVConvBuiltinSet<"vfncvt_xu_f_w", "csi", [["Uv", "UvFw"]]>;
+        RVVConvBuiltinSet<"vfncvt_xu_f_w", "si", [["Uv", "UvFw"]]>;
+    let RequiredFeatures = ["Zvfh"] in {
+      let OverloadedName = "vfncvt_x" in
+        defm :
+          RVVConvBuiltinSet<"vfncvt_x_f_w", "c", [["Iv", "IvFw"]]>;
+      let OverloadedName = "vfncvt_xu" in
+        defm :
+          RVVConvBuiltinSet<"vfncvt_xu_f_w", "c", [["Uv", "UvFw"]]>;
+    }
     let OverloadedName = "vfncvt_f" in {
       defm :
         RVVConvBuiltinSet<"vfncvt_f_x_w", "xf", [["v", "vIw"]]>;
@@ -2256,10 +2298,22 @@ defm vfslide1down : RVVFloatingBinVFBuiltinSet;
 
 // 16.4. Vector Register Gather Instructions
 // signed and floating type
-defm vrgather : RVVOutBuiltinSet<"vrgather_vv", "csilxfd",
+defm vrgather : RVVOutBuiltinSet<"vrgather_vv", "csilfd",
                                  [["vv", "v", "vvUv"]]>;
-defm vrgather : RVVOutBuiltinSet<"vrgather_vx", "csilxfd",
+defm vrgather : RVVOutBuiltinSet<"vrgather_vx", "csilfd",
                                  [["vx", "v", "vvz"]]>;
+let RequiredFeatures = ["Zvfhmin"] in {
+  defm vrgather : RVVOutBuiltinSet<"vrgather_vv", "x",
+                                   [["vv", "v", "vvUv"]]>;
+  defm vrgather : RVVOutBuiltinSet<"vrgather_vx", "x",
+                                   [["vx", "v", "vvz"]]>;
+}
+let RequiredFeatures = ["Zvfbfmin"] in {
+  defm vrgather : RVVOutBuiltinSet<"vrgather_vv", "y",
+                                   [["vv", "v", "vvUv"]]>;
+  defm vrgather : RVVOutBuiltinSet<"vrgather_vx", "y",
+                                   [["vx", "v", "vvz"]]>;
+}
 defm vrgatherei16 : RVVOutBuiltinSet<"vrgatherei16_vv", "csilxfd",
                                      [["vv", "v", "vv(Log2EEW:4)Uv"]]>;
 // unsigned type
@@ -2282,8 +2336,14 @@ let HasMasked = false,
       IntrinsicTypes = {ResultType, Ops.back()->getType()};
     }] in {
   // signed and floating type
-  defm vcompress : RVVOutBuiltinSet<"vcompress", "csilxfd",
+  defm vcompress : RVVOutBuiltinSet<"vcompress", "csilfd",
                                     [["vm", "v", "vvm"]]>;
+  let RequiredFeatures = ["Zvfhmin"] in
+    defm vcompress : RVVOutBuiltinSet<"vcompress", "x",
+                                      [["vm", "v", "vvm"]]>;
+  let RequiredFeatures = ["Zvfbfmin"] in
+    defm vcompress : RVVOutBuiltinSet<"vcompress", "y",
+                                      [["vm", "v", "vvm"]]>;
   // unsigned type
   defm vcompress : RVVOutBuiltinSet<"vcompress", "csil",
                                     [["vm", "Uv", "UvUvm"]]>;

diff --git a/clang/include/clang/Basic/riscv_vector_common.td b/clang/include/clang/Basic/riscv_vector_common.td
@@ -604,10 +604,10 @@ class RVVConvToWidenUnsignedBuiltin<string overloaded_name>
     : RVVConvBuiltin<"Uw", "Uwv", "xf", overloaded_name>;
 
 class RVVConvToNarrowingSignedBuiltin<string overloaded_name>
-    : RVVConvBuiltin<"Iv", "IvFw", "csi", overloaded_name>;
+    : RVVConvBuiltin<"Iv", "IvFw", "si", overloaded_name>;
 
 class RVVConvToNarrowingUnsignedBuiltin<string overloaded_name>
-    : RVVConvBuiltin<"Uv", "UvFw", "csi", overloaded_name>;
+    : RVVConvBuiltin<"Uv", "UvFw", "si", overloaded_name>;
 
 let HasMaskedOffOperand = true in {
   multiclass RVVSignedReductionBuiltin {

diff --git a/clang/include/clang/Support/RISCVVIntrinsicUtils.h b/clang/include/clang/Support/RISCVVIntrinsicUtils.h
@@ -502,7 +502,8 @@ enum RVVRequire : uint32_t {
   RVV_REQ_Zvksh = 1 << 15,
   RVV_REQ_Zvfbfwma = 1 << 16,
   RVV_REQ_Zvfbfmin = 1 << 17,
-  RVV_REQ_Experimental = 1 << 18,
+  RVV_REQ_Zvfh = 1 << 18,
+  RVV_REQ_Experimental = 1 << 19,
 
   LLVM_MARK_AS_BITMASK_ENUM(RVV_REQ_Experimental)
 };

diff --git a/clang/lib/Sema/SemaRISCV.cpp b/clang/lib/Sema/SemaRISCV.cpp
@@ -222,6 +222,7 @@ void RISCVIntrinsicManagerImpl::ConstructRVVIntrinsics(
       {"zvksh", RVV_REQ_Zvksh},
       {"zvfbfwma", RVV_REQ_Zvfbfwma},
       {"zvfbfmin", RVV_REQ_Zvfbfmin},
+      {"zvfh", RVV_REQ_Zvfh},
       {"experimental", RVV_REQ_Experimental}};
 
   // Construction of RVVIntrinsicRecords need to sync with createRVVIntrinsics
@@ -280,6 +281,11 @@ void RISCVIntrinsicManagerImpl::ConstructRVVIntrinsics(
       if ((BaseTypeI & Record.TypeRangeMask) != BaseTypeI)
         continue;
 
+      // TODO: Remove the check below and use RequiredFeatures in
+      // riscv_vector.td to check the intrinsics instead, the type check should
+      // be done in checkRVVTypeSupport. This check also not able to work on the
+      // intrinsics that have Float16 but the BaseType is not Float16 such as
+      // `vfcvt_f_x_v`.
       if (BaseType == BasicType::Float16) {
         if ((Record.RequiredExtensions & RVV_REQ_Zvfhmin) == RVV_REQ_Zvfhmin) {
           if (!TI.hasFeature("zvfhmin"))

diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcompress.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcompress.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
-// RUN:   -target-feature +zvfh -disable-O0-optnone  \
+// RUN:   -target-feature +zvfhmin -target-feature +zvfbfmin -disable-O0-optnone  \
 // RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
 // RUN:   FileCheck --check-prefix=CHECK-RV64 %s
 
@@ -597,3 +597,63 @@ vuint64m8_t test_vcompress_vm_u64m8(vuint64m8_t src, vbool8_t mask, size_t vl) {
   return __riscv_vcompress_vm_u64m8(src, mask, vl);
 }
 
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vcompress_vm_bf16mf4
+// CHECK-RV64-SAME: (<vscale x 1 x bfloat> [[SRC:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vcompress.nxv1bf16.i64(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> [[SRC]], <vscale x 1 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vcompress_vm_bf16mf4(vbfloat16mf4_t src, vbool64_t mask, size_t vl) {
+  return __riscv_vcompress_vm_bf16mf4(src, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vcompress_vm_bf16mf2
+// CHECK-RV64-SAME: (<vscale x 2 x bfloat> [[SRC:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vcompress.nxv2bf16.i64(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> [[SRC]], <vscale x 2 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vcompress_vm_bf16mf2(vbfloat16mf2_t src, vbool32_t mask, size_t vl) {
+  return __riscv_vcompress_vm_bf16mf2(src, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vcompress_vm_bf16m1
+// CHECK-RV64-SAME: (<vscale x 4 x bfloat> [[SRC:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vcompress.nxv4bf16.i64(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> [[SRC]], <vscale x 4 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vcompress_vm_bf16m1(vbfloat16m1_t src, vbool16_t mask, size_t vl) {
+  return __riscv_vcompress_vm_bf16m1(src, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vcompress_vm_bf16m2
+// CHECK-RV64-SAME: (<vscale x 8 x bfloat> [[SRC:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vcompress.nxv8bf16.i64(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> [[SRC]], <vscale x 8 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vcompress_vm_bf16m2(vbfloat16m2_t src, vbool8_t mask, size_t vl) {
+  return __riscv_vcompress_vm_bf16m2(src, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vcompress_vm_bf16m4
+// CHECK-RV64-SAME: (<vscale x 16 x bfloat> [[SRC:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vcompress.nxv16bf16.i64(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> [[SRC]], <vscale x 16 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vcompress_vm_bf16m4(vbfloat16m4_t src, vbool4_t mask, size_t vl) {
+  return __riscv_vcompress_vm_bf16m4(src, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vcompress_vm_bf16m8
+// CHECK-RV64-SAME: (<vscale x 32 x bfloat> [[SRC:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vcompress.nxv32bf16.i64(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> [[SRC]], <vscale x 32 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vcompress_vm_bf16m8(vbfloat16m8_t src, vbool2_t mask, size_t vl) {
+  return __riscv_vcompress_vm_bf16m8(src, mask, vl);
+}
+
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vmerge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vmerge.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
-// RUN:   -target-feature +zvfhmin -disable-O0-optnone  \
+// RUN:   -target-feature +zvfhmin -target-feature +zvfbfmin -disable-O0-optnone  \
 // RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
 // RUN:   FileCheck --check-prefix=CHECK-RV64 %s
 
@@ -1037,3 +1037,62 @@ vfloat64m8_t test_vmerge_vvm_f64m8(vfloat64m8_t op1, vfloat64m8_t op2, vbool8_t
   return __riscv_vmerge_vvm_f64m8(op1, op2, mask, vl);
 }
 
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vmerge_vvm_bf16mf4
+// CHECK-RV64-SAME: (<vscale x 1 x bfloat> [[OP1:%.*]], <vscale x 1 x bfloat> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vmerge.nxv1bf16.nxv1bf16.i64(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> [[OP1]], <vscale x 1 x bfloat> [[OP2]], <vscale x 1 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vmerge_vvm_bf16mf4(vbfloat16mf4_t op1, vbfloat16mf4_t op2, vbool64_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16mf4(op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vmerge_vvm_bf16mf2
+// CHECK-RV64-SAME: (<vscale x 2 x bfloat> [[OP1:%.*]], <vscale x 2 x bfloat> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vmerge.nxv2bf16.nxv2bf16.i64(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> [[OP1]], <vscale x 2 x bfloat> [[OP2]], <vscale x 2 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vmerge_vvm_bf16mf2(vbfloat16mf2_t op1, vbfloat16mf2_t op2, vbool32_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16mf2(op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vmerge_vvm_bf16m1
+// CHECK-RV64-SAME: (<vscale x 4 x bfloat> [[OP1:%.*]], <vscale x 4 x bfloat> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vmerge.nxv4bf16.nxv4bf16.i64(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> [[OP1]], <vscale x 4 x bfloat> [[OP2]], <vscale x 4 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vmerge_vvm_bf16m1(vbfloat16m1_t op1, vbfloat16m1_t op2, vbool16_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16m1(op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vmerge_vvm_bf16m2
+// CHECK-RV64-SAME: (<vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vmerge.nxv8bf16.nxv8bf16.i64(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> [[OP1]], <vscale x 8 x bfloat> [[OP2]], <vscale x 8 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vmerge_vvm_bf16m2(vbfloat16m2_t op1, vbfloat16m2_t op2, vbool8_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16m2(op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vmerge_vvm_bf16m4
+// CHECK-RV64-SAME: (<vscale x 16 x bfloat> [[OP1:%.*]], <vscale x 16 x bfloat> [[OP2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vmerge.nxv16bf16.nxv16bf16.i64(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> [[OP1]], <vscale x 16 x bfloat> [[OP2]], <vscale x 16 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vmerge_vvm_bf16m4(vbfloat16m4_t op1, vbfloat16m4_t op2, vbool4_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16m4(op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vmerge_vvm_bf16m8
+// CHECK-RV64-SAME: (<vscale x 32 x bfloat> [[OP1:%.*]], <vscale x 32 x bfloat> [[OP2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vmerge.nxv32bf16.nxv32bf16.i64(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> [[OP1]], <vscale x 32 x bfloat> [[OP2]], <vscale x 32 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vmerge_vvm_bf16m8(vbfloat16m8_t op1, vbfloat16m8_t op2, vbool2_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16m8(op1, op2, mask, vl);
+}