diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index b7bdbeb535d5269..bc76cf1bfad9628 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -801,13 +801,19 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, // PPCTargetLowering can't compute the cost appropriately. So here we // explicitly check this case. There are also corresponding store // instructions. - unsigned MemBytes = Src->getPrimitiveSizeInBits(); - if (ST->hasVSX() && IsAltivecType && - (MemBytes == 64 || (ST->hasP8Vector() && MemBytes == 32))) - return 1; + unsigned MemBits = Src->getPrimitiveSizeInBits(); + unsigned SrcBytes = LT.second.getStoreSize(); + if (ST->hasVSX() && IsAltivecType) { + if (MemBits == 64 || (ST->hasP8Vector() && MemBits == 32)) + return 1; + + // Use lfiwax/xxspltw + Align AlignBytes = Alignment ? *Alignment : Align(1); + if (Opcode == Instruction::Load && MemBits == 32 && AlignBytes < SrcBytes) + return 2; + } // Aligned loads and stores are easy. - unsigned SrcBytes = LT.second.getStoreSize(); if (!SrcBytes || !Alignment || *Alignment >= SrcBytes) return Cost; diff --git a/llvm/test/Analysis/CostModel/PowerPC/vsr_load_32_64.ll b/llvm/test/Analysis/CostModel/PowerPC/vsr_load_32_64.ll index 54cafa0ae59f393..17bcdd4d3f44cf5 100644 --- a/llvm/test/Analysis/CostModel/PowerPC/vsr_load_32_64.ll +++ b/llvm/test/Analysis/CostModel/PowerPC/vsr_load_32_64.ll @@ -1,15 +1,16 @@ -; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck -DCOST32=1 %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck -DCOST32=2 %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" define i32 @loads(i32 %arg) { - ; CHECK: cost of 1 {{.*}} load + ; CHECK: cost of [[COST32]] {{.*}} load load <4 x i8>, ptr undef, align 1 ; CHECK: cost of 1 {{.*}} load load <8 x i8>, ptr undef, align 1 - ; CHECK: cost of 1 {{.*}} load + ; CHECK: cost of [[COST32]] {{.*}} load load <2 x i16>, ptr undef, align 2 ; CHECK: cost of 1 {{.*}} load