Skip to content

Commit

Permalink
[RISCV] Move vmerge same mask peephole to RISCVVectorPeephole (llvm#1…
Browse files Browse the repository at this point in the history
…06108)

We currently fold a vmerge.vvm into its true operand if the true operand
is a masked pseudo with the same mask.

We can move this over to RISCVVectorPeephole by instead splitting it up
into a smaller peephole which converts it to a vmv.v.v first. The
existing foldVMV_V_V peephole will then take care of folding it if
needed.

This is very similar to the existing all-ones mask peephole and we could
potentially do it inside of it. I opted to put it in a separate peephole
to make it easier to reason about, given that the duplication is small,
but I could be persuaded either way.
  • Loading branch information
lukel97 authored and VitaNuo committed Sep 12, 2024
1 parent be0c950 commit ecad68f
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 60 deletions.
42 changes: 5 additions & 37 deletions llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3833,15 +3833,8 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
uint64_t TrueTSFlags = TrueMCID.TSFlags;
bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);

bool IsMasked = false;
const RISCV::RISCVMaskedPseudoInfo *Info =
RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
if (!Info && HasTiedDest) {
Info = RISCV::getMaskedPseudoInfo(TrueOpc);
IsMasked = true;
}
assert(!(IsMasked && !HasTiedDest) && "Expected tied dest");

if (!Info)
return false;

Expand All @@ -3853,19 +3846,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
return false;
}

// If True is masked then the vmerge must have either the same mask or an all
// 1s mask, since we're going to keep the mask from True.
if (IsMasked) {
// FIXME: Support mask agnostic True instruction which would have an
// undef passthru operand.
SDValue TrueMask =
getMaskSetter(True->getOperand(Info->MaskOpIdx),
True->getOperand(True->getNumOperands() - 1));
assert(TrueMask);
if (!usesAllOnesMask(Mask, Glue) && getMaskSetter(Mask, Glue) != TrueMask)
return false;
}

// Skip if True has side effect.
if (TII->get(TrueOpc).hasUnmodeledSideEffects())
return false;
Expand Down Expand Up @@ -3930,24 +3910,13 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
(Mask && !usesAllOnesMask(Mask, Glue)))
return false;

// If we end up changing the VL or mask of True, then we need to make sure it
// doesn't raise any observable fp exceptions, since changing the active
// elements will affect how fflags is set.
if (TrueVL != VL || !IsMasked)
if (mayRaiseFPException(True.getNode()) &&
!True->getFlags().hasNoFPExcept())
return false;
// Make sure it doesn't raise any observable fp exceptions, since changing the
// active elements will affect how fflags is set.
if (mayRaiseFPException(True.getNode()) && !True->getFlags().hasNoFPExcept())
return false;

SDLoc DL(N);

// From the preconditions we checked above, we know the mask and thus glue
// for the result node will be taken from True.
if (IsMasked) {
Mask = True->getOperand(Info->MaskOpIdx);
Glue = True->getOperand(True->getNumOperands() - 1);
assert(Glue.getValueType() == MVT::Glue);
}

unsigned MaskedOpc = Info->MaskedPseudo;
#ifndef NDEBUG
const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
Expand Down Expand Up @@ -3977,8 +3946,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
Ops.push_back(False);

const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
const unsigned NormalOpsEnd = TrueVLIndex - HasRoundingMode;
Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);

Ops.push_back(Mask);
Expand Down
74 changes: 63 additions & 11 deletions llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ class RISCVVectorPeephole : public MachineFunctionPass {
bool convertToVLMAX(MachineInstr &MI) const;
bool convertToWholeRegister(MachineInstr &MI) const;
bool convertToUnmasked(MachineInstr &MI) const;
bool convertVMergeToVMv(MachineInstr &MI) const;
bool convertAllOnesVMergeToVMv(MachineInstr &MI) const;
bool convertSameMaskVMergeToVMv(MachineInstr &MI) const;
bool foldUndefPassthruVMV_V_V(MachineInstr &MI);
bool foldVMV_V_V(MachineInstr &MI);

Expand Down Expand Up @@ -342,17 +343,13 @@ bool RISCVVectorPeephole::convertToWholeRegister(MachineInstr &MI) const {
return true;
}

// Transform (VMERGE_VVM_<LMUL> pt, false, true, allones, vl, sew) to
// (VMV_V_V_<LMUL> pt, true, vl, sew). It may decrease uses of VMSET.
bool RISCVVectorPeephole::convertVMergeToVMv(MachineInstr &MI) const {
static unsigned getVMV_V_VOpcodeForVMERGE_VVM(const MachineInstr &MI) {
#define CASE_VMERGE_TO_VMV(lmul) \
case RISCV::PseudoVMERGE_VVM_##lmul: \
NewOpc = RISCV::PseudoVMV_V_V_##lmul; \
break;
unsigned NewOpc;
return RISCV::PseudoVMV_V_V_##lmul;
switch (MI.getOpcode()) {
default:
return false;
return 0;
CASE_VMERGE_TO_VMV(MF8)
CASE_VMERGE_TO_VMV(MF4)
CASE_VMERGE_TO_VMV(MF2)
Expand All @@ -361,14 +358,68 @@ bool RISCVVectorPeephole::convertVMergeToVMv(MachineInstr &MI) const {
CASE_VMERGE_TO_VMV(M4)
CASE_VMERGE_TO_VMV(M8)
}
}

/// Convert a PseudoVMERGE_VVM with an all ones mask to a PseudoVMV_V_V.
///
/// %x = PseudoVMERGE_VVM %passthru, %false, %true, %allones, sew, vl
/// ->
/// %x = PseudoVMV_V_V %passthru, %true, vl, sew, tu_mu
bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const {
unsigned NewOpc = getVMV_V_VOpcodeForVMERGE_VVM(MI);
if (!NewOpc)
return false;
assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0);
if (!isAllOnesMask(V0Defs.lookup(&MI)))
return false;

MI.setDesc(TII->get(NewOpc));
MI.removeOperand(2); // False operand
MI.removeOperand(3); // Mask operand
MI.removeOperand(2); // False operand
MI.removeOperand(3); // Mask operand
MI.addOperand(
MachineOperand::CreateImm(RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED));

// vmv.v.v doesn't have a mask operand, so we may be able to inflate the
// register class for the destination and passthru operands e.g. VRNoV0 -> VR
MRI->recomputeRegClass(MI.getOperand(0).getReg());
if (MI.getOperand(1).getReg() != RISCV::NoRegister)
MRI->recomputeRegClass(MI.getOperand(1).getReg());
return true;
}

/// If a PseudoVMERGE_VVM's true operand is a masked pseudo and both have the
/// same mask, and the masked pseudo's passthru is the same as the false
/// operand, we can convert the PseudoVMERGE_VVM to a PseudoVMV_V_V.
///
/// %true = PseudoVADD_VV_M1_MASK %false, %x, %y, %mask, vl1, sew, policy
/// %x = PseudoVMERGE_VVM %passthru, %false, %true, %mask, vl2, sew
/// ->
/// %true = PseudoVADD_VV_M1_MASK %false, %x, %y, %mask, vl1, sew, policy
/// %x = PseudoVMV_V_V %passthru, %true, vl2, sew, tu_mu
bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) const {
unsigned NewOpc = getVMV_V_VOpcodeForVMERGE_VVM(MI);
if (!NewOpc)
return false;
MachineInstr *True = MRI->getVRegDef(MI.getOperand(3).getReg());
if (!True || !RISCV::getMaskedPseudoInfo(True->getOpcode()) ||
!hasSameEEW(MI, *True))
return false;

// True's passthru needs to be equivalent to False
Register TruePassthruReg = True->getOperand(1).getReg();
Register FalseReg = MI.getOperand(2).getReg();
if (TruePassthruReg != RISCV::NoRegister && TruePassthruReg != FalseReg)
return false;

const MachineInstr *TrueV0Def = V0Defs.lookup(True);
const MachineInstr *MIV0Def = V0Defs.lookup(&MI);
assert(TrueV0Def && TrueV0Def->isCopy() && MIV0Def && MIV0Def->isCopy());
if (TrueV0Def->getOperand(1).getReg() != MIV0Def->getOperand(1).getReg())
return false;

MI.setDesc(TII->get(NewOpc));
MI.removeOperand(2); // False operand
MI.removeOperand(3); // Mask operand
MI.addOperand(
MachineOperand::CreateImm(RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED));

Expand Down Expand Up @@ -623,7 +674,8 @@ bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) {
Changed |= tryToReduceVL(MI);
Changed |= convertToUnmasked(MI);
Changed |= convertToWholeRegister(MI);
Changed |= convertVMergeToVMv(MI);
Changed |= convertAllOnesVMergeToVMv(MI);
Changed |= convertSameMaskVMergeToVMv(MI);
if (foldUndefPassthruVMV_V_V(MI)) {
Changed |= true;
continue; // MI is erased
Expand Down
22 changes: 10 additions & 12 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,11 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado
; CHECK-NEXT: li a4, 5
; CHECK-NEXT: .LBB1_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmv1r.v v9, v8
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu
; CHECK-NEXT: vlse8.v v9, (a1), a4, v0.t
; CHECK-NEXT: vle8.v v10, (a0)
; CHECK-NEXT: vadd.vv v9, v10, v9
; CHECK-NEXT: vse8.v v9, (a0)
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; CHECK-NEXT: vlse8.v v8, (a1), a4, v0.t
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vadd.vv v8, v9, v8
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 32
; CHECK-NEXT: addi a1, a1, 160
; CHECK-NEXT: bne a0, a2, .LBB1_1
Expand Down Expand Up @@ -344,12 +343,11 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read
; CHECK-NEXT: li a4, 5
; CHECK-NEXT: .LBB7_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu
; CHECK-NEXT: vle8.v v9, (a1)
; CHECK-NEXT: vmv1r.v v10, v8
; CHECK-NEXT: vlse8.v v10, (a0), a4, v0.t
; CHECK-NEXT: vadd.vv v9, v10, v9
; CHECK-NEXT: vsse8.v v9, (a0), a4, v0.t
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vlse8.v v9, (a0), a4, v0.t
; CHECK-NEXT: vadd.vv v8, v9, v8
; CHECK-NEXT: vsse8.v v8, (a0), a4, v0.t
; CHECK-NEXT: addi a1, a1, 32
; CHECK-NEXT: addi a0, a0, 160
; CHECK-NEXT: bne a1, a2, .LBB7_1
Expand Down
70 changes: 70 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,73 @@ body: |
$v0 = COPY %mask
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5
...
---
name: same_mask
body: |
bb.0:
liveins: $v8, $v9, $v0
; CHECK-LABEL: name: same_mask
; CHECK: liveins: $v8, $v9, $v0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %pt:vr = COPY $v8
; CHECK-NEXT: %false:vrnov0 = COPY $v9
; CHECK-NEXT: %mask:vr = COPY $v0
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 8, 5 /* e32 */, 0 /* tu, mu */
%pt:vrnov0 = COPY $v8
%false:vrnov0 = COPY $v9
%mask:vr = COPY $v0
$v0 = COPY %mask
%true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */
$v0 = COPY %mask
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */
...
---
# Shouldn't be converted because false operands are different
name: same_mask_different_false
body: |
bb.0:
liveins: $v8, $v9, $v0
; CHECK-LABEL: name: same_mask_different_false
; CHECK: liveins: $v8, $v9, $v0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %pt:vrnov0 = COPY $v8
; CHECK-NEXT: %false:vrnov0 = COPY $v9
; CHECK-NEXT: %mask:vr = COPY $v0
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %pt, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */
%pt:vrnov0 = COPY $v8
%false:vrnov0 = COPY $v9
%mask:vr = COPY $v0
$v0 = COPY %mask
%true:vrnov0 = PseudoVADD_VV_M1_MASK %pt, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */
$v0 = COPY %mask
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */
...
---
# Shouldn't be converted because EEWs are different
name: same_mask_different_eew
body: |
bb.0:
liveins: $v8, $v9, $v0
; CHECK-LABEL: name: same_mask_different_eew
; CHECK: liveins: $v8, $v9, $v0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %pt:vrnov0 = COPY $v8
; CHECK-NEXT: %false:vrnov0 = COPY $v9
; CHECK-NEXT: %mask:vr = COPY $v0
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 4 /* e16 */, 0 /* tu, mu */
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */
%pt:vrnov0 = COPY $v8
%false:vrnov0 = COPY $v9
%mask:vr = COPY $v0
$v0 = COPY %mask
%true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 4 /* e16 */, 0 /* tu, mu */
$v0 = COPY %mask
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */

0 comments on commit ecad68f

Please sign in to comment.