Skip to content

Commit

Permalink
update P7 32-bit partial vector load cost (#108261)
Browse files Browse the repository at this point in the history
Update cost model to reflect codegen change to use lfiwzx 
for 32-bit partial vector loads on pwr7 with
#104507.
  • Loading branch information
RolandF77 authored Oct 3, 2024
1 parent 487686b commit 06c8210
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 8 deletions.
16 changes: 11 additions & 5 deletions llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -800,13 +800,19 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// PPCTargetLowering can't compute the cost appropriately. So here we
// explicitly check this case. There are also corresponding store
// instructions.
unsigned MemBytes = Src->getPrimitiveSizeInBits();
if (ST->hasVSX() && IsAltivecType &&
(MemBytes == 64 || (ST->hasP8Vector() && MemBytes == 32)))
return 1;
unsigned MemBits = Src->getPrimitiveSizeInBits();
unsigned SrcBytes = LT.second.getStoreSize();
if (ST->hasVSX() && IsAltivecType) {
if (MemBits == 64 || (ST->hasP8Vector() && MemBits == 32))
return 1;

// Use lfiwax/xxspltw
Align AlignBytes = Alignment ? *Alignment : Align(1);
if (Opcode == Instruction::Load && MemBits == 32 && AlignBytes < SrcBytes)
return 2;
}

// Aligned loads and stores are easy.
unsigned SrcBytes = LT.second.getStoreSize();
if (!SrcBytes || !Alignment || *Alignment >= SrcBytes)
return Cost;

Expand Down
7 changes: 4 additions & 3 deletions llvm/test/Analysis/CostModel/PowerPC/vsr_load_32_64.ll
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck %s
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck -DCOST32=1 %s
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck -DCOST32=2 %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"

define i32 @loads(i32 %arg) {
; CHECK: cost of 1 {{.*}} load
; CHECK: cost of [[COST32]] {{.*}} load
load <4 x i8>, ptr undef, align 1

; CHECK: cost of 1 {{.*}} load
load <8 x i8>, ptr undef, align 1

; CHECK: cost of 1 {{.*}} load
; CHECK: cost of [[COST32]] {{.*}} load
load <2 x i16>, ptr undef, align 2

; CHECK: cost of 1 {{.*}} load
Expand Down

0 comments on commit 06c8210

Please sign in to comment.