Skip to content

Commit

Permalink
[AArch64] Add costs for ST3 and ST4 instructions, modelled as store(s…
Browse files Browse the repository at this point in the history
…huffle). (#87934)

This tries to add some costs for the shuffle in a ST3/ST4 instruction,
which are represented in LLVM IR as store(interleaving shuffle). In
order to detect the store, it needs to add a CxtI context instruction to
check the users of the shuffle. LD3 and LD4 are added, LD2 should be a
zip1 shuffle, which will be added in another patch.

It should help fix some of the regressions from #87510.
  • Loading branch information
davemgreen authored Apr 9, 2024
1 parent e280407 commit 4ac2721
Show file tree
Hide file tree
Showing 22 changed files with 121 additions and 92 deletions.
26 changes: 13 additions & 13 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1291,12 +1291,11 @@ class TargetTransformInfo {
/// passed through \p Args, which helps improve the cost estimation in some
/// cases, like in broadcast loads.
/// NOTE: For subvector extractions Tp represents the source type.
InstructionCost
getShuffleCost(ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask = std::nullopt,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
int Index = 0, VectorType *SubTp = nullptr,
ArrayRef<const Value *> Args = std::nullopt) const;
InstructionCost getShuffleCost(
ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask = std::nullopt,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, int Index = 0,
VectorType *SubTp = nullptr, ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr) const;

/// Represents a hint about the context in which a cast is used.
///
Expand Down Expand Up @@ -2008,11 +2007,10 @@ class TargetTransformInfo::Concept {
const SmallBitVector &OpcodeMask,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const = 0;

virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) = 0;
virtual InstructionCost
getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
ArrayRef<const Value *> Args, const Instruction *CxtI) = 0;
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
Type *Src, CastContextHint CCH,
TTI::TargetCostKind CostKind,
Expand Down Expand Up @@ -2647,8 +2645,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args) override {
return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
ArrayRef<const Value *> Args,
const Instruction *CxtI) override {
return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args,
CxtI);
}
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
CastContextHint CCH,
Expand Down
44 changes: 25 additions & 19 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -579,10 +579,12 @@ class TargetTransformInfoImplBase {
return InstructionCost::getInvalid();
}

InstructionCost
getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt) const {
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr) const {
return 1;
}

Expand Down Expand Up @@ -1341,13 +1343,13 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
if (Shuffle->isExtractSubvectorMask(SubIndex))
return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
Mask, CostKind, SubIndex, VecTy,
Operands);
Operands, Shuffle);

if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
return TargetTTI->getShuffleCost(
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
Operands);
Operands, Shuffle);

int ReplicationFactor, VF;
if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
Expand All @@ -1374,7 +1376,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {

return TargetTTI->getShuffleCost(
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy,
AdjustMask, CostKind, 0, nullptr);
AdjustMask, CostKind, 0, nullptr, {}, Shuffle);
}

// Narrowing shuffle - perform shuffle at original wider width and
Expand All @@ -1383,49 +1385,53 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {

InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc,
VecSrcTy, AdjustMask, CostKind, 0, nullptr);
VecSrcTy, AdjustMask, CostKind, 0, nullptr, {}, Shuffle);

SmallVector<int, 16> ExtractMask(Mask.size());
std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
return ShuffleCost + TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector,
VecSrcTy, ExtractMask,
CostKind, 0, VecTy);
return ShuffleCost + TargetTTI->getShuffleCost(
TTI::SK_ExtractSubvector, VecSrcTy,
ExtractMask, CostKind, 0, VecTy, {}, Shuffle);
}

if (Shuffle->isIdentity())
return 0;

if (Shuffle->isReverse())
return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, Mask, CostKind,
0, nullptr, Operands);
0, nullptr, Operands, Shuffle);

if (Shuffle->isSelect())
return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, Mask, CostKind,
0, nullptr, Operands);
0, nullptr, Operands, Shuffle);

if (Shuffle->isTranspose())
return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, Mask,
CostKind, 0, nullptr, Operands);
CostKind, 0, nullptr, Operands,
Shuffle);

if (Shuffle->isZeroEltSplat())
return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, Mask,
CostKind, 0, nullptr, Operands);
CostKind, 0, nullptr, Operands,
Shuffle);

if (Shuffle->isSingleSource())
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Mask,
CostKind, 0, nullptr, Operands);
CostKind, 0, nullptr, Operands,
Shuffle);

if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
return TargetTTI->getShuffleCost(
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands);
FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
Shuffle);

if (Shuffle->isSplice(SubIndex))
return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, Mask, CostKind,
SubIndex, nullptr, Operands);
SubIndex, nullptr, Operands, Shuffle);

return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Mask,
CostKind, 0, nullptr, Operands);
CostKind, 0, nullptr, Operands, Shuffle);
}
case Instruction::ExtractElement: {
auto *EEI = dyn_cast<ExtractElementInst>(U);
Expand Down
3 changes: 2 additions & 1 deletion llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1018,7 +1018,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt) {
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr) {
switch (improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp)) {
case TTI::SK_Broadcast:
if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -916,9 +916,9 @@ InstructionCost TargetTransformInfo::getAltInstrCost(
InstructionCost TargetTransformInfo::getShuffleCost(
ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) const {
InstructionCost Cost =
TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind, Index, SubTp, Args);
ArrayRef<const Value *> Args, const Instruction *CxtI) const {
InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind,
Index, SubTp, Args, CxtI);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
Expand Down
28 changes: 20 additions & 8 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3815,18 +3815,29 @@ InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) {
return LegalizationCost * LT.first;
}

InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
InstructionCost AArch64TTIImpl::getShuffleCost(
TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
ArrayRef<const Value *> Args, const Instruction *CxtI) {
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);

// If we have a Mask, and the LT is being legalized somehow, split the Mask
// into smaller vectors and sum the cost of each shuffle.
if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
Tp->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {

// Check for ST3/ST4 instructions, which are represented in llvm IR as
// store(interleaving-shuffle). The shuffle cost could potentially be free,
// but we model it with a cost of LT.first so that LD3/LD3 have a higher
// cost than just the store.
if (CxtI && CxtI->hasOneUse() && isa<StoreInst>(*CxtI->user_begin()) &&
(ShuffleVectorInst::isInterleaveMask(
Mask, 4, Tp->getElementCount().getKnownMinValue() * 2) ||
ShuffleVectorInst::isInterleaveMask(
Mask, 3, Tp->getElementCount().getKnownMinValue() * 2)))
return LT.first;

unsigned TpNumElts = Mask.size();
unsigned LTNumElts = LT.second.getVectorNumElements();
unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
Expand Down Expand Up @@ -3874,7 +3885,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (NumSources <= 2)
Cost += getShuffleCost(NumSources <= 1 ? TTI::SK_PermuteSingleSrc
: TTI::SK_PermuteTwoSrc,
NTp, NMask, CostKind, 0, nullptr, Args);
NTp, NMask, CostKind, 0, nullptr, Args, CxtI);
else if (any_of(enumerate(NMask), [&](const auto &ME) {
return ME.value() % LTNumElts == ME.index();
}))
Expand Down Expand Up @@ -4055,7 +4066,8 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// Restore optimal kind.
if (IsExtractSubvector)
Kind = TTI::SK_ExtractSubvector;
return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args,
CxtI);
}

static bool containsDecreasingPointers(Loop *TheLoop,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);

InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1127,7 +1127,8 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *VT, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
Kind = improveShuffleKindFromMask(Kind, Mask, VT, Index, SubTp);
// Treat extractsubvector as single op permutation.
bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);

bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1212,7 +1212,8 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp);
// Treat extractsubvector as single op permutation.
bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARM/ARMTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);

bool preferInLoopReduction(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,8 @@ InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, Type *SubTp,
ArrayRef<const Value *> Args) {
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
return 1;
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
Type *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,8 @@ InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, Type *SubTp,
ArrayRef<const Value *> Args) {
ArrayRef<const Value *> Args,
const Instruction *CxtI) {

InstructionCost CostFactor =
vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr);
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
Type *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp);

std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
Expand Down
Loading

0 comments on commit 4ac2721

Please sign in to comment.