forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AMDGPU][PromoteAlloca] Don't stop when an alloca is too big to promo…
…te (llvm#93466) When I rewrote this, I made a mistake in the control flow. I thought we could just stop promoting if an alloca is too big to vectorize, but we can't. Other allocas in the list may be promotable and fit within the budget. Fixes SWDEV-455343
- Loading branch information
Showing
2 changed files
with
59 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
41 changes: 41 additions & 0 deletions
41
llvm/test/CodeGen/AMDGPU/promote-alloca-budget-exhausted.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca-to-vector-limit=128 -passes=amdgpu-promote-alloca-to-vector %s -o - | FileCheck %s | ||
|
||
; Check that when we see an alloca that's too big to vectorize given the remaining budget, | ||
; we don't give up and we keep looking for other allocas to vectorize. | ||
|
||
define amdgpu_kernel void @simple_users_scores() { | ||
; CHECK-LABEL: define amdgpu_kernel void @simple_users_scores( | ||
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { | ||
; CHECK-NEXT: [[ENTRY:.*:]] | ||
; CHECK-NEXT: [[MANYUSERS:%.*]] = alloca [64 x i64], align 4, addrspace(5) | ||
; CHECK-NEXT: [[MANYUSERS_1:%.*]] = getelementptr i8, ptr addrspace(5) [[MANYUSERS]], i64 2 | ||
; CHECK-NEXT: [[V0:%.*]] = load i8, ptr addrspace(5) [[MANYUSERS_1]], align 1 | ||
; CHECK-NEXT: [[V0_EXT:%.*]] = zext i8 [[V0]] to i64 | ||
; CHECK-NEXT: store i64 [[V0_EXT]], ptr addrspace(5) [[MANYUSERS_1]], align 8 | ||
; CHECK-NEXT: [[MANYUSERS_2:%.*]] = getelementptr i8, ptr addrspace(5) [[MANYUSERS]], i64 1 | ||
; CHECK-NEXT: [[V1:%.*]] = load i8, ptr addrspace(5) [[MANYUSERS_2]], align 1 | ||
; CHECK-NEXT: [[V1_EXT:%.*]] = zext i8 [[V0]] to i64 | ||
; CHECK-NEXT: store i64 [[V1_EXT]], ptr addrspace(5) [[MANYUSERS_2]], align 8 | ||
; CHECK-NEXT: ret void | ||
; | ||
entry: | ||
; should get a score of 1 | ||
%simpleuser = alloca [4 x i64], align 4, addrspace(5) | ||
; should get a score of 4 and be visited first. | ||
%manyusers = alloca [64 x i64], align 4, addrspace(5) | ||
|
||
store i64 42, ptr addrspace(5) %simpleuser | ||
|
||
%manyusers.1 = getelementptr i8, ptr addrspace(5) %manyusers, i64 2 | ||
%v0 = load i8, ptr addrspace(5) %manyusers.1 | ||
%v0.ext = zext i8 %v0 to i64 | ||
store i64 %v0.ext, ptr addrspace(5) %manyusers.1 | ||
|
||
%manyusers.2 = getelementptr i8, ptr addrspace(5) %manyusers, i64 1 | ||
%v1 = load i8, ptr addrspace(5) %manyusers.2 | ||
%v1.ext = zext i8 %v0 to i64 | ||
store i64 %v1.ext, ptr addrspace(5) %manyusers.2 | ||
|
||
ret void | ||
} |