Skip to content

Commit

Permalink
[AArch64] Consider histcnt smaller than i32 in the cost model
Browse files Browse the repository at this point in the history
This PR updates the AArch64 cost model to consider the cheaper cost of
<i32 histograms to reflect the improvements from
llvm#101017 and llvm#103037

Work by Max Beck-Jones (@DevM-uk)
  • Loading branch information
DevM-uk authored and SamTebbs33 committed Sep 13, 2024
1 parent d6832a6 commit 6ca9685
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 20 deletions.
28 changes: 17 additions & 11 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -517,25 +517,31 @@ static bool isUnpackedVectorVT(EVT VecVT) {
static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
Type *BucketPtrsTy = ICA.getArgTypes()[0]; // Type of vector of pointers
Type *EltTy = ICA.getArgTypes()[1]; // Type of bucket elements
unsigned TotalHistCnts = 1;

// Only allow (32b and 64b) integers or pointers for now...
// Only allow (up to 64b) integers or pointers
if ((!EltTy->isIntegerTy() && !EltTy->isPointerTy()) ||
(EltTy->getScalarSizeInBits() != 32 &&
EltTy->getScalarSizeInBits() != 64))
EltTy->getScalarSizeInBits() > 64)
return InstructionCost::getInvalid();

// FIXME: Hacky check for legal vector types. We can promote smaller types
// but we cannot legalize vectors via splitting for histcnt.
// FIXME: We should be able to generate histcnt for fixed-length vectors
// using ptrue with a specific VL.
if (VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy))
if ((VTy->getElementCount().getKnownMinValue() != 2 &&
VTy->getElementCount().getKnownMinValue() != 4) ||
VTy->getPrimitiveSizeInBits().getKnownMinValue() > 128 ||
!VTy->isScalableTy())
if (VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy)) {
unsigned EC = VTy->getElementCount().getKnownMinValue();
if (!isPowerOf2_64(EC) || !VTy->isScalableTy())
return InstructionCost::getInvalid();

return InstructionCost(BaseHistCntCost);
bool Element64b = EltTy->isIntegerTy(64);

if (EC == 2 || (!Element64b && EC == 4))
return InstructionCost(BaseHistCntCost);

unsigned NaturalVectorWidth = Element64b ? AArch64::SVEBitsPerBlock / 64
: AArch64::SVEBitsPerBlock / 32;
TotalHistCnts = EC / NaturalVectorWidth;
}

return InstructionCost(BaseHistCntCost * TotalHistCnts);
}

InstructionCost
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -971,26 +971,26 @@ define void @histogram_nxv4i32(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %m
ret void
}

define void @histogram_nxv8i16(<vscale x 8 x ptr> %buckets, <vscale x 8 x i1> %mask) {
define void @histogram_nxv8i16(<vscale x 8 x ptr> %buckets, <vscale x 8 x i1> %mask) #3 {
; CHECK-LABEL: 'histogram_nxv8i16'
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv8i16'
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
ret void
}

define void @histogram_nxv16i8(<vscale x 16 x ptr> %buckets, <vscale x 16 x i1> %mask) {
define void @histogram_nxv16i8(<vscale x 16 x ptr> %buckets, <vscale x 16 x i1> %mask) #3 {
; CHECK-LABEL: 'histogram_nxv16i8'
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv16i8'
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call void @llvm.experimental.vector.histogram.add.nxv16p0.i64(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
Expand Down Expand Up @@ -1049,13 +1049,13 @@ define void @histogram_v16i8(<16 x ptr> %buckets, <16 x i1> %mask) {
ret void
}

define void @histogram_nxv4i64(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %mask) {
define void @histogram_nxv4i64(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %mask) #3 {
; CHECK-LABEL: 'histogram_nxv4i64'
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv4i64'
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
Expand Down

0 comments on commit 6ca9685

Please sign in to comment.