-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
update P7 32-bit partial vector load cost #108261
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -802,12 +802,17 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, | |
// explicitly check this case. There are also corresponding store | ||
// instructions. | ||
unsigned MemBytes = Src->getPrimitiveSizeInBits(); | ||
if (ST->hasVSX() && IsAltivecType && | ||
(MemBytes == 64 || (ST->hasP8Vector() && MemBytes == 32))) | ||
return 1; | ||
Align AlignBytes = Alignment ? *Alignment : Align(1); | ||
unsigned SrcBytes = LT.second.getStoreSize(); | ||
if (ST->hasVSX() && IsAltivecType) { | ||
if (MemBytes == 64 || (ST->hasP8Vector() && MemBytes == 32)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: this maybe not related to the patch, I think the variable
I think we can modify the variable name in the patch by the way? |
||
return 1; | ||
// Use lfiwax/xxspltw | ||
if (Opcode == Instruction::Load && MemBytes == 32 && AlignBytes < SrcBytes) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am curious that why need the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If a partial vector (< 128 bits) is being loaded with a full vector aligned address (>= 128 bits), the load will be done as a full vector load since we know from alignment that it is safe. Therefore the cost of a partial vector load does not apply. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks for explaining. |
||
return 2; | ||
} | ||
|
||
// Aligned loads and stores are easy. | ||
unsigned SrcBytes = LT.second.getStoreSize(); | ||
if (!SrcBytes || !Alignment || *Alignment >= SrcBytes) | ||
return Cost; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,21 @@ | ||
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck %s | ||
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck --check-prefix=P7 %s | ||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" | ||
target triple = "powerpc64-unknown-linux-gnu" | ||
|
||
define i32 @loads(i32 %arg) { | ||
; CHECK: cost of 1 {{.*}} load | ||
; P7: cost of 2 {{.*}} load | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would change to opt < %s -passes="print" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck %s -DCOST=1 opt < %s -passes="print" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck %s -DCOST=2 add change
to it is only a suggestion, feel free to keep it if you do not want to modify |
||
load <4 x i8>, ptr undef, align 1 | ||
|
||
; CHECK: cost of 1 {{.*}} load | ||
; CHECK, P7: cost of 1 {{.*}} load | ||
load <8 x i8>, ptr undef, align 1 | ||
|
||
; CHECK: cost of 1 {{.*}} load | ||
; P7: cost of 2 {{.*}} load | ||
load <2 x i16>, ptr undef, align 2 | ||
|
||
; CHECK: cost of 1 {{.*}} load | ||
; CHECK, P7: cost of 1 {{.*}} load | ||
load <4 x i16>, ptr undef, align 2 | ||
|
||
ret i32 undef | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we can put the definition of the variable before the
if (Opcode == Instruction::Load && MemBytes == 32 && AlignBytes < SrcBytes)
)