[RISCV][ISel] Remove redundant vmerge for the vwadd. #78403

sun-jacobi · 2024-01-17T07:31:27Z

This patch is aiming at resolving the below missed-optimization case.

Code

define <8 x i64> @vwadd_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
    %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
    %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
    %sa = sext <8 x i32> %a to <8 x i64>
    %ret = add <8 x i64> %sa, %y
    ret <8 x i64> %ret
}

Before this patch

Compiler Explorer

vwadd_mask_v8i32:
        li      a0, 42
        vsetivli        zero, 8, e32, m2, ta, ma
        vmslt.vx        v0, v8, a0
        vmv.v.i v10, 0
        vmerge.vvm      v16, v10, v8, v0
        vwadd.wv        v8, v12, v16
        ret

After this patch

vwadd_mask_v8i32:
        li a0, 42
        vsetivli zero, 8, e32, m2, ta, ma
        vmslt.vx v0, v8, a0
        vsetvli zero, zero, e32, m2, tu, mu
        vwadd.wv v12, v12, v8, v0.t
        vmv4r.v v8, v12
        ret

This pattern could be found in a reduction with a widening destination

Specifically, we first do a fold like (vwadd.wv y, (vmerge cond, x, 0)) -> (vwadd.wv y, x, y, cond), then do pattern matching on it.

llvmbot · 2024-01-17T07:31:56Z

@llvm/pr-subscribers-backend-risc-v

Author: Chia (sun-jacobi)

Changes

This patch is aiming at resolving the below missed-optimization case.

Code

define &lt;8 x i64&gt; @<!-- -->vwadd_mask_v8i32(&lt;8 x i32&gt; %x, &lt;8 x i64&gt; %y) {
    %mask = icmp slt &lt;8 x i32&gt; %x, &lt;i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42&gt;
    %a = select &lt;8 x i1&gt; %mask, &lt;8 x i32&gt; %x, &lt;8 x i32&gt; zeroinitializer
    %sa = sext &lt;8 x i32&gt; %a to &lt;8 x i64&gt;
    %ret = add &lt;8 x i64&gt; %sa, %y
    ret &lt;8 x i64&gt; %ret
}

Before this patch

Compiler Explorer

vwadd_mask_v8i32:
        li      a0, 42
        vsetivli        zero, 8, e32, m2, ta, ma
        vmslt.vx        v0, v8, a0
        vmv.v.i v10, 0
        vmerge.vvm      v16, v10, v8, v0
        vwadd.wv        v8, v12, v16
        ret

After this patch

vwadd_mask_v8i32:
        li a0, 42
        vsetivli zero, 8, e32, m2, ta, ma
        vmslt.vx v0, v8, a0
        vwadd.wv v16, v12, v8, v0.t
        vmv4r.v v8, v16
        ret

This pattern could be found in a reduction with a widening destination

Specifically, we first do a fold like (vwadd y, (select cond, x, 0)) -> select cond (vwadd y, x), y, then do pattern matching on it.

Full diff: https://github.com/llvm/llvm-project/pull/78403.diff

3 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+53-1)
(modified) llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td (+27)
(added) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll (+35)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index cb9ffabc41236e..a030538e5e8ba9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13457,6 +13457,56 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
   return InputRootReplacement;
 }
 
+// (vwadd y, (select cond, x, 0)) -> select cond (vwadd y, x), y
+static SDValue combineVWADDSelect(SDNode *N, SelectionDAG &DAG) {
+  unsigned Opc = N->getOpcode();
+  assert(Opc == RISCVISD::VWADD_VL || Opc == RISCVISD::VWADD_W_VL ||
+         Opc == RISCVISD::VWADDU_W_VL);
+
+  SDValue VL = N->getOperand(4);
+  SDValue Y = N->getOperand(0);
+  SDValue Merge = N->getOperand(1);
+
+  if (Merge.getOpcode() != RISCVISD::VMERGE_VL)
+    return SDValue();
+
+  SDValue Cond = Merge->getOperand(0);
+  SDValue X = Merge->getOperand(1);
+  SDValue Z = Merge->getOperand(2);
+
+  if (Z.getOpcode() != ISD::INSERT_SUBVECTOR ||
+      !isNullConstant(Z.getOperand(2)))
+    return SDValue();
+
+  if (!Merge.hasOneUse())
+    return SDValue();
+
+  SmallVector<SDValue, 6> Ops(N->op_values());
+  Ops[0] = Y;
+  Ops[1] = X;
+
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+
+  SDValue WX = DAG.getNode(Opc, DL, VT, Ops, N->getFlags());
+  return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Cond, WX, Y, DAG.getUNDEF(VT),
+                     VL);
+}
+
+static SDValue performVWADD_VLCombine(SDNode *N,
+                                      TargetLowering::DAGCombinerInfo &DCI) {
+  unsigned Opc = N->getOpcode();
+  assert(Opc == RISCVISD::VWADD_VL || Opc == RISCVISD::VWADD_W_VL ||
+         Opc == RISCVISD::VWADDU_W_VL);
+
+  if (Opc != RISCVISD::VWADD_VL) {
+    if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI))
+      return V;
+  }
+
+  return combineVWADDSelect(N, DCI.DAG);
+}
+
 // Helper function for performMemPairCombine.
 // Try to combine the memory loads/stores LSNode1 and LSNode2
 // into a single memory pair operation.
@@ -15500,9 +15550,11 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
     if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI))
       return V;
     return combineToVWMACC(N, DAG, Subtarget);
-  case RISCVISD::SUB_VL:
+  case RISCVISD::VWADD_VL:
   case RISCVISD::VWADD_W_VL:
   case RISCVISD::VWADDU_W_VL:
+    return performVWADD_VLCombine(N, DCI);
+  case RISCVISD::SUB_VL:
   case RISCVISD::VWSUB_W_VL:
   case RISCVISD::VWSUBU_W_VL:
   case RISCVISD::MUL_VL:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 1deb9a709463e8..6744a38d036b00 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -691,6 +691,30 @@ multiclass VPatTiedBinaryNoMaskVL_V<SDNode vop,
                      GPR:$vl, sew, TU_MU)>;
 }
 
+class VPatTiedBinaryMaskVL_V<SDNode vop,
+                                string instruction_name,
+                                string suffix,
+                                ValueType result_type,
+                                ValueType op2_type,
+                                ValueType mask_type,
+                                int sew,
+                                LMULInfo vlmul,
+                                VReg result_reg_class,
+                                VReg op2_reg_class>
+  : Pat<(riscv_vmerge_vl (mask_type V0),
+          (result_type (vop
+                       result_reg_class:$rs1,
+                       (op2_type op2_reg_class:$rs2),
+                       srcvalue,
+                       true_mask,
+                       VLOpFrag)),
+          result_reg_class:$rs1, result_reg_class:$merge, VLOpFrag),
+        (!cast<Instruction>(instruction_name#"_"#suffix#"_"# vlmul.MX#"_MASK")
+          result_reg_class:$merge,
+          result_reg_class:$rs1,
+          op2_reg_class:$rs2,
+          (mask_type V0), GPR:$vl, sew, TAIL_AGNOSTIC)>;
+
 multiclass VPatTiedBinaryNoMaskVL_V_RM<SDNode vop,
                                        string instruction_name,
                                        string suffix,
@@ -819,6 +843,9 @@ multiclass VPatBinaryWVL_VV_VX_WV_WX<SDPatternOperator vop, SDNode vop_w,
       defm : VPatTiedBinaryNoMaskVL_V<vop_w, instruction_name, "WV",
                                       wti.Vector, vti.Vector, vti.Log2SEW,
                                       vti.LMul, wti.RegClass, vti.RegClass>;
+      def : VPatTiedBinaryMaskVL_V<vop_w, instruction_name, "WV",
+                                      wti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
+                                      vti.LMul, wti.RegClass, vti.RegClass>;
       def : VPatBinaryVL_V<vop_w, instruction_name, "WV",
                            wti.Vector, wti.Vector, vti.Vector, vti.Mask,
                            vti.Log2SEW, vti.LMul, wti.RegClass, wti.RegClass,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll
new file mode 100644
index 00000000000000..afc59b875d79df
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+
+define <8 x i64> @vwadd_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
+; CHECK-LABEL: vwadd_mask_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vwadd.wv v16, v12, v8, v0.t
+; CHECK-NEXT:    vmv4r.v v8, v16
+; CHECK-NEXT:    ret
+    %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
+    %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+    %sa = sext <8 x i32> %a to <8 x i64>
+    %ret = add <8 x i64> %sa, %y
+    ret <8 x i64> %ret
+}
+
+define <8 x i64> @vwadd_mask_v8i32_commutative(<8 x i32> %x, <8 x i64> %y) {
+; CHECK-LABEL: vwadd_mask_v8i32_commutative:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vwadd.wv v16, v12, v8, v0.t
+; CHECK-NEXT:    vmv4r.v v8, v16
+; CHECK-NEXT:    ret
+    %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
+    %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+    %sa = sext <8 x i32> %a to <8 x i64>
+    %ret = add <8 x i64> %y, %sa
+    ret <8 x i64> %ret
+}

topperc · 2024-01-17T07:53:56Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+  EVT VT = N->getValueType(0);
+
+  SDValue WX = DAG.getNode(Opc, DL, VT, Ops, N->getFlags());
+  return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Cond, WX, Y, DAG.getUNDEF(VT),


Operand 2 of the original vmerge is the passthru operand for elements past VL if the VWADD is tail undisturbed. This VMERGE_VL has undef for its passthru. That corrupts the elements past VL.

topperc · 2024-01-17T07:57:10Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+  SDValue Z = Merge->getOperand(2);
+
+  if (Z.getOpcode() != ISD::INSERT_SUBVECTOR ||
+      !isNullConstant(Z.getOperand(2)))


This only checks that the insertion index is 0. Where do you check the vector being inserted is 0?

Thank you for pointing out this, I will fix it.

lukel97 · 2024-01-17T08:28:43Z

I think this might be an issue in performCombineVMergeAndVOps. In the case below we're able to merge the
PseudoVMERGE into the PseudoVADD after isel, but not the PseudoVWADD. Although I'm not exactly sure why, since the vmerge is an input operand to the vadd, not the other way round

define <vscale x 2 x i32> @f(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y) {
    %mask = icmp slt <vscale x 2 x i32> %x, shufflevector(<vscale x 2 x i32> insertelement(<vscale x 2 x i32> poison, i32 42, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
    %a = select <vscale x 2 x i1> %mask, <vscale x 2 x i32> %x, <vscale x 2 x i32> zeroinitializer
    %ret = add <vscale x 2 x i32> %a, %y
    ret <vscale x 2 x i32> %ret
}

define <vscale x 2 x i64> @g(<vscale x 2 x i32> %x, <vscale x 2 x i64> %y) {
    %mask = icmp slt <vscale x 2 x i32> %x, shufflevector(<vscale x 2 x i32> insertelement(<vscale x 2 x i32> poison, i32 42, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
    %a = select <vscale x 2 x i1> %mask, <vscale x 2 x i32> %x, <vscale x 2 x i32> zeroinitializer
    %sa = sext <vscale x 2 x i32> %a to <vscale x 2 x i64>
    %ret = add <vscale x 2 x i64> %sa, %y
    ret <vscale x 2 x i64> %ret
}

Update: It's not an issue with performCombineVMergeAndVOps, we're doing a similar combine as this patch somewhere for add.

Initial selection DAG: %bb.0 'f:'
SelectionDAG has 19 nodes:
  t0: ch,glue = EntryToken
  t2: nxv2i32,ch = CopyFromReg t0, Register:nxv2i32 %0
  t9: nxv2i32 = insert_vector_elt undef:nxv2i32, Constant:i32<42>, Constant:i64<0>
          t10: nxv2i32 = splat_vector Constant:i32<42>
        t12: nxv2i1 = setcc t2, t10, setlt:ch
        t13: nxv2i32 = splat_vector Constant:i32<0>
      t14: nxv2i32 = vselect t12, t2, t13
      t4: nxv2i32,ch = CopyFromReg t0, Register:nxv2i32 %1
    t15: nxv2i32 = add t14, t4
  t17: ch,glue = CopyToReg t0, Register:nxv2i32 $v8, t15
  t18: ch = RISCVISD::RET_GLUE t17, Register:nxv2i32 $v8, t17:1



Optimized lowered selection DAG: %bb.0 'f:'
SelectionDAG has 15 nodes:
  t0: ch,glue = EntryToken
  t2: nxv2i32,ch = CopyFromReg t0, Register:nxv2i32 %0
        t10: nxv2i32 = splat_vector Constant:i32<42>
      t12: nxv2i1 = setcc t2, t10, setlt:ch
      t20: nxv2i32 = add t19, t2
    t21: nxv2i32 = vselect t12, t20, t19
  t17: ch,glue = CopyToReg t0, Register:nxv2i32 $v8, t21
    t4: nxv2i32,ch = CopyFromReg t0, Register:nxv2i32 %1
  t19: nxv2i32 = freeze t4
  t18: ch = RISCVISD::RET_GLUE t17, Register:nxv2i32 $v8, t17:1

Update: The combine is DAGCombiner::foldBinOpIntoSelect, which doesn't trigger for the sext case because there's a sign_extend in between the add and vselect:

t16: nxv2i64 = add t15, t4
  t15: nxv2i64 = sign_extend t14
    t14: nxv2i32 = vselect t12, t2, t13
      t12: nxv2i1 = setcc t2, t10, setlt:ch
        t2: nxv2i32,ch = CopyFromReg t0, Register:nxv2i32 %0
          t0: ch,glue = EntryToken
        t10: nxv2i32 = splat_vector Constant:i32<42>
      t13: nxv2i32 = splat_vector Constant:i32<0>
  t4: nxv2i64,ch = CopyFromReg t0, Register:nxv2i64 %1

sun-jacobi · 2024-01-17T08:39:01Z

I think this might be an issue in performCombineVMergeAndVOps. In the case below we're able to merge the PseudoVMERGE into the PseudoVADD after isel, but not the PseudoVWADD. Although I'm not exactly sure why, since the vmerge is an input operand to the vadd, not the other way round

define <vscale x 2 x i32> @f(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y) {
    %mask = icmp slt <vscale x 2 x i32> %x, shufflevector(<vscale x 2 x i32> insertelement(<vscale x 2 x i32> poison, i32 42, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
    %a = select <vscale x 2 x i1> %mask, <vscale x 2 x i32> %x, <vscale x 2 x i32> zeroinitializer
    %ret = add <vscale x 2 x i32> %a, %y
    ret <vscale x 2 x i32> %ret
}

define <vscale x 2 x i64> @g(<vscale x 2 x i32> %x, <vscale x 2 x i64> %y) {
    %mask = icmp slt <vscale x 2 x i32> %x, shufflevector(<vscale x 2 x i32> insertelement(<vscale x 2 x i32> poison, i32 42, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
    %a = select <vscale x 2 x i1> %mask, <vscale x 2 x i32> %x, <vscale x 2 x i32> zeroinitializer
    %sa = sext <vscale x 2 x i32> %a to <vscale x 2 x i64>
    %ret = add <vscale x 2 x i64> %sa, %y
    ret <vscale x 2 x i64> %ret
}

Yes, the original performCombineVMergeAndVOp actually works for vadd.
The DAGCombiner does a target-independent folding similar to what this issue does so that the performCombineVMergeAndVOps can fold the vadd into the masked version.
This is also the motivation why I do the (vwadd y, (select cond, x, 0)) -> select cond (vwadd y, x), y.

sun-jacobi · 2024-01-17T10:26:57Z

@lukel97 Thank you! That's exactly what I mean.

topperc · 2024-01-18T00:12:48Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+    return SDValue();
+
+  SmallVector<SDValue, 6> Ops(N->op_values());
+  Ops[0] = Y;


Isn't Ops[0] already Y?

topperc · 2024-01-18T00:12:51Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+  EVT VT = N->getValueType(0);
+
+  SDValue WX = DAG.getNode(Opc, DL, VT, Ops, N->getFlags());
+  return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Cond, WX, Y, Y, VL);


This is still incorrect. You have to use N->getOperand(2) for the passthru operand to the vmerge.

You're also losing any mask that the VWADD_W_VL may have already had.

topperc · 2024-01-18T00:15:55Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+// (vwadd y, (select cond, x, 0)) -> select cond (vwadd y, x), y
+static SDValue combineVWADDSelect(SDNode *N, SelectionDAG &DAG) {
+  unsigned Opc = N->getOpcode();
+  assert(Opc == RISCVISD::VWADD_VL || Opc == RISCVISD::VWADD_W_VL ||


It can't be VWADD_VL due to the check in performVWADD_VLCombine right?

The check in performVWADD_VLCombine is for RISCVISD::VWADD_W_VL and RISCVISD::VWADDU_W_VL.
We need to first do combineBinOp_VLToVWBinOp_VL on those.

Oops. You're right. Sorry about that.

topperc · 2024-01-18T00:19:31Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+  EVT VT = N->getValueType(0);
+
+  SDValue WX = DAG.getNode(Opc, DL, VT, Ops, N->getFlags());
+  return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Cond, WX, Y, Y, VL);


You don't need to create a VMERGE, you just need to change the Mask operand when you create WX. RISCVISD::VWADD_W_VL supports all the operands you need to describe this.

Thank you for the advice. It works.

With this we get a normal masked instruction, but in this case I think we need the MASK_TIED.

topperc · 2024-01-18T17:59:29Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+
+  SmallVector<SDValue, 6> Ops(N->op_values());
+  Ops[MergeID] = X;
+  Ops[3] = Cond;


You can't replace operand 3 without checking that operand 3 was an all 1s mask or the passthru was undef originally. If the mask wasn't all 1s or the passthru wasn't undef then then original add produced the passthru operand for masked off elements.

topperc · 2024-01-18T18:00:23Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+  SDValue X = Merge->getOperand(1);
+  SDValue Z = Merge->getOperand(2);
+
+  if (Z.getOpcode() != ISD::INSERT_SUBVECTOR ||


This doesn't check what operand 0 of the insert is or the size of the insertion. So you only know some subvector of the input is 0. You don't know the whole vector is 0.

topperc · 2024-01-18T18:01:27Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+  if (!Merge.hasOneUse())
+    return SDValue();
+
+  SmallVector<SDValue, 6> Ops(N->op_values());


Why 6? I think there are only 5 operands. LHS, RHS, Passthru, Mask, VL

sun-jacobi · 2024-01-20T09:37:32Z

AFAIU, we may need MASK_TIED to use the same register for vd and vs2.

For vwadd.vv and vwaddu.vv, we could not make sure vd and vs2 are the same, thus this folding might not work for them.

sun-jacobi · 2024-01-26T04:10:01Z

Sorry for Ping.

topperc

LGTM

…80079) Similar to #78403, but for scalable `vwadd(u).wv`, given that #76785 is recommited. ### Code ``` define <vscale x 8 x i64> @vwadd_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) { %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer %sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64> %ret = add <vscale x 8 x i64> %sa, %y ret <vscale x 8 x i64> %ret } ``` ### Before this patch [Compiler Explorer](https://godbolt.org/z/xsoa5xPrd) ``` vwadd_wv_mask_v8i32: li a0, 42 vsetvli a1, zero, e32, m4, ta, ma vmslt.vx v0, v8, a0 vmv.v.i v12, 0 vmerge.vvm v24, v12, v8, v0 vwadd.wv v8, v16, v24 ret ``` ### After this patch ``` vwadd_wv_mask_v8i32: li a0, 42 vsetvli a1, zero, e32, m4, ta, ma vmslt.vx v0, v8, a0 vsetvli zero, zero, e32, m4, tu, mu vwadd.wv v16, v16, v8, v0.t vmv8r.v v8, v16 ret ```

Note we can't use vwaddu.wv because it will get combined away with #78403

llvmbot added the backend:RISC-V label Jan 17, 2024

wangpc-pp requested review from preames, lukel97, jacquesguan and topperc January 17, 2024 07:40

topperc reviewed Jan 17, 2024

View reviewed changes

sun-jacobi requested a review from topperc January 17, 2024 15:31

topperc reviewed Jan 18, 2024

View reviewed changes

sun-jacobi requested a review from topperc January 18, 2024 04:49

topperc reviewed Jan 18, 2024

View reviewed changes

sun-jacobi requested a review from topperc January 20, 2024 14:27

topperc approved these changes Jan 26, 2024

View reviewed changes

[RISCV][Isel] Remove redundant vmerge for the vwadd.

1386a93

sun-jacobi force-pushed the merge-vwadd branch from 263047c to 1386a93 Compare January 27, 2024 06:59

sun-jacobi merged commit 3855757 into llvm:main Jan 27, 2024
3 of 4 checks passed

sun-jacobi mentioned this pull request Jan 30, 2024

[RISCV][Isel] Remove redundant vmerge for the scalable vwadd(u).wv #80079

Merged

sun-jacobi mentioned this pull request Feb 3, 2024

[RISCV][ISel] Remove redundant vmerge for vwsub(u).wv. #80523

Merged

lukel97 added a commit that referenced this pull request Mar 27, 2024

[RISCV] Add test case to show missing vmerge fold on tied pseudos. NFC

f15b7de

Note we can't use vwaddu.wv because it will get combined away with #78403

sun-jacobi mentioned this pull request Mar 29, 2024

[RISCV] Allow folding vmerge with implicit passthru when true has tied dest #78565

Merged

sun-jacobi deleted the merge-vwadd branch April 14, 2024 05:08

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[RISCV][ISel] Remove redundant vmerge for the vwadd. #78403

[RISCV][ISel] Remove redundant vmerge for the vwadd. #78403

sun-jacobi commented Jan 17, 2024 •

edited

Loading

llvmbot commented Jan 17, 2024

Code

Before this patch

After this patch

topperc Jan 17, 2024

topperc Jan 17, 2024

sun-jacobi Jan 17, 2024

lukel97 commented Jan 17, 2024 •

edited

Loading

sun-jacobi commented Jan 17, 2024 •

edited

Loading

sun-jacobi commented Jan 17, 2024 •

edited

Loading

topperc Jan 18, 2024

topperc Jan 18, 2024

topperc Jan 18, 2024

sun-jacobi Jan 18, 2024

topperc Jan 18, 2024

topperc Jan 18, 2024

sun-jacobi Jan 18, 2024

sun-jacobi Jan 18, 2024

topperc Jan 18, 2024

topperc Jan 18, 2024

topperc Jan 18, 2024

sun-jacobi commented Jan 20, 2024 •

edited

Loading

sun-jacobi commented Jan 26, 2024 •

edited

Loading

topperc left a comment

[RISCV][ISel] Remove redundant vmerge for the vwadd. #78403

[RISCV][ISel] Remove redundant vmerge for the vwadd. #78403

Conversation

sun-jacobi commented Jan 17, 2024 • edited Loading

Code

Before this patch

After this patch

llvmbot commented Jan 17, 2024

Code

Before this patch

After this patch

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

lukel97 commented Jan 17, 2024 • edited Loading

sun-jacobi commented Jan 17, 2024 • edited Loading

sun-jacobi commented Jan 17, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

sun-jacobi commented Jan 20, 2024 • edited Loading

sun-jacobi commented Jan 26, 2024 • edited Loading

topperc left a comment

Choose a reason for hiding this comment

sun-jacobi commented Jan 17, 2024 •

edited

Loading

lukel97 commented Jan 17, 2024 •

edited

Loading

sun-jacobi commented Jan 17, 2024 •

edited

Loading

sun-jacobi commented Jan 17, 2024 •

edited

Loading

sun-jacobi commented Jan 20, 2024 •

edited

Loading

sun-jacobi commented Jan 26, 2024 •

edited

Loading