Skip to content

Commit

Permalink
Merged master:45ebe38ffc4 into amd-gfx:86103e348b6
Browse files Browse the repository at this point in the history
Local branch amd-gfx 86103e3 Merged master:bfdf9ef009a into amd-gfx:1cc12f0b22d
Remote branch master 45ebe38 [X86][AVX] Pad small shuffle inputs in combineX86ShufflesRecursively
  • Loading branch information
Sw authored and Sw committed May 31, 2020
2 parents 86103e3 + 45ebe38 commit e87ce7d
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 33 deletions.
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/ScheduleDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,10 @@ class TargetRegisterInfo;
public:
ScheduleDAGTopologicalSort(std::vector<SUnit> &SUnits, SUnit *ExitSU);

/// Add a SUnit without predecessors to the end of the topological order. It
/// also must be the first new node added to the DAG.
void AddSUnitWithoutPredecessors(const SUnit *SU);

/// Creates the initial topological ordering from the DAG to be scheduled.
void InitDAGTopologicalSorting();

Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/CodeGen/ScheduleDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,14 @@ bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
return false;
}

void ScheduleDAGTopologicalSort::AddSUnitWithoutPredecessors(const SUnit *SU) {
assert(SU->NodeNum == Index2Node.size() && "Node cannot be added at the end");
assert(SU->NumPreds == 0 && "Can only add SU's with no predecessors");
Node2Index.push_back(Index2Node.size());
Index2Node.push_back(SU->NodeNum);
Visited.resize(Node2Index.size());
}

bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
const SUnit *TargetSU) {
FixOrder();
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ class ScheduleDAGRRList : public ScheduleDAGSDNodes {
SUnit *NewNode = newSUnit(N);
// Update the topological ordering.
if (NewNode->NodeNum >= NumSUnits)
Topo.MarkDirty();
Topo.AddSUnitWithoutPredecessors(NewNode);
return NewNode;
}

Expand All @@ -289,7 +289,7 @@ class ScheduleDAGRRList : public ScheduleDAGSDNodes {
SUnit *NewNode = Clone(N);
// Update the topological ordering.
if (NewNode->NodeNum >= NumSUnits)
Topo.MarkDirty();
Topo.AddSUnitWithoutPredecessors(NewNode);
return NewNode;
}

Expand Down
44 changes: 13 additions & 31 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7662,20 +7662,8 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
}
case X86ISD::VBROADCAST: {
SDValue Src = N.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
if (!SrcVT.isVector())
if (!Src.getSimpleValueType().isVector())
return false;

if (NumSizeInBits != SrcVT.getSizeInBits()) {
assert((NumSizeInBits % SrcVT.getSizeInBits()) == 0 &&
"Illegal broadcast type");
SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
NumSizeInBits / SrcVT.getScalarSizeInBits());
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), SrcVT,
DAG.getUNDEF(SrcVT), Src,
DAG.getIntPtrConstant(0, SDLoc(N)));
}

Ops.push_back(Src);
Mask.append(NumElts, 0);
return true;
Expand All @@ -7692,22 +7680,10 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
(SrcVT.getScalarSizeInBits() % 8) != 0)
return false;

unsigned NumSrcBitsPerElt = SrcVT.getScalarSizeInBits();
bool IsAnyExtend =
(ISD::ANY_EXTEND == Opcode || ISD::ANY_EXTEND_VECTOR_INREG == Opcode);
DecodeZeroExtendMask(NumSrcBitsPerElt, NumBitsPerElt, NumElts, IsAnyExtend,
Mask);

if (NumSizeInBits != SrcVT.getSizeInBits()) {
assert((NumSizeInBits % SrcVT.getSizeInBits()) == 0 &&
"Illegal zero-extension type");
SrcVT = MVT::getVectorVT(SrcVT.getSimpleVT().getScalarType(),
NumSizeInBits / NumSrcBitsPerElt);
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), SrcVT,
DAG.getUNDEF(SrcVT), Src,
DAG.getIntPtrConstant(0, SDLoc(N)));
}

DecodeZeroExtendMask(SrcVT.getScalarSizeInBits(), NumBitsPerElt, NumElts,
IsAnyExtend, Mask);
Ops.push_back(Src);
return true;
}
Expand Down Expand Up @@ -35054,7 +35030,8 @@ static SDValue combineX86ShufflesRecursively(

assert(Root.getSimpleValueType().isVector() &&
"Shuffles operate on vector types!");
assert(VT.getSizeInBits() == Root.getSimpleValueType().getSizeInBits() &&
unsigned RootSizeInBits = Root.getSimpleValueType().getSizeInBits();
assert(VT.getSizeInBits() == RootSizeInBits &&
"Can only combine shuffles of the same vector register size.");

// Extract target shuffle mask and resolve sentinels and inputs.
Expand All @@ -35068,12 +35045,17 @@ static SDValue combineX86ShufflesRecursively(
OpZero, DAG, Depth, false))
return SDValue();

// Shuffle inputs must be the same size as the result.
if (llvm::any_of(OpInputs, [VT](SDValue Op) {
return VT.getSizeInBits() != Op.getValueSizeInBits();
// Shuffle inputs must be the same size as the result, bail on any larger
// inputs and widen any smaller inputs.
if (llvm::any_of(OpInputs, [RootSizeInBits](SDValue Op) {
return Op.getValueSizeInBits() > RootSizeInBits;
}))
return SDValue();

for (SDValue &Op : OpInputs)
if (Op.getValueSizeInBits() < RootSizeInBits)
Op = widenSubVector(Op, false, Subtarget, DAG, SDLoc(Op), RootSizeInBits);

SmallVector<int, 64> Mask;
SmallVector<SDValue, 16> Ops;

Expand Down

0 comments on commit e87ce7d

Please sign in to comment.