-
Notifications
You must be signed in to change notification settings - Fork 12.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Disallow negative s_load offsets in isLegalAddressingMode #91327
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Jay Foad (jayfoad) ChangesFull diff: https://github.com/llvm/llvm-project/pull/91327.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ed41c10b50d32..dd2dea501380c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1604,6 +1604,14 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return false;
}
+ if (AS == AMDGPUAS::CONSTANT_ADDRESS && AM.BaseOffs < 0) {
+ // Scalar (non-buffer) loads can only use a negative offset if
+ // soffset+offset is non-negative. Since the compiler can only prove that
+ // in a few special cases, it is safer to claim that negative offsets are
+ // not supported.
+ return false;
+ }
+
if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
return true;
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
index 54dc5b8b9d3dd..b0bbd90f165b9 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
@@ -279,38 +279,30 @@ end:
}
define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
-; GFX678-LABEL: test_sink_smem_offset_neg400:
-; GFX678: ; %bb.0: ; %entry
-; GFX678-NEXT: s_add_u32 s0, s0, 0xfffffe70
-; GFX678-NEXT: s_addc_u32 s1, s1, -1
-; GFX678-NEXT: .LBB5_1: ; %loop
-; GFX678-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX678-NEXT: s_waitcnt lgkmcnt(0)
-; GFX678-NEXT: s_load_dword s3, s[0:1], 0x0
-; GFX678-NEXT: s_add_i32 s2, s2, -1
-; GFX678-NEXT: s_cmp_lg_u32 s2, 0
-; GFX678-NEXT: s_cbranch_scc1 .LBB5_1
-; GFX678-NEXT: ; %bb.2: ; %end
-; GFX678-NEXT: s_endpgm
-;
-; GFX9-LABEL: test_sink_smem_offset_neg400:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: .LBB5_1: ; %loop
-; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dword s3, s[0:1], -0x190
-; GFX9-NEXT: s_add_i32 s2, s2, -1
-; GFX9-NEXT: s_cmp_lg_u32 s2, 0
-; GFX9-NEXT: s_cbranch_scc1 .LBB5_1
-; GFX9-NEXT: ; %bb.2: ; %end
-; GFX9-NEXT: s_endpgm
+; GFX6789-LABEL: test_sink_smem_offset_neg400:
+; GFX6789: ; %bb.0: ; %entry
+; GFX6789-NEXT: s_add_u32 s0, s0, 0xfffffe70
+; GFX6789-NEXT: s_addc_u32 s1, s1, -1
+; GFX6789-NEXT: .LBB5_1: ; %loop
+; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX6789-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6789-NEXT: s_load_dword s3, s[0:1], 0x0
+; GFX6789-NEXT: s_add_i32 s2, s2, -1
+; GFX6789-NEXT: s_cmp_lg_u32 s2, 0
+; GFX6789-NEXT: s_cbranch_scc1 .LBB5_1
+; GFX6789-NEXT: ; %bb.2: ; %end
+; GFX6789-NEXT: s_endpgm
;
; GFX12-LABEL: test_sink_smem_offset_neg400:
; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_movk_i32 s4, 0xfe70
+; GFX12-NEXT: s_mov_b32 s5, -1
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: .LBB5_1: ; %loop
; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_load_b32 s3, s[0:1], -0x190
+; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x0
; GFX12-NEXT: s_add_co_i32 s2, s2, -1
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: s_cmp_lg_u32 s2, 0
@@ -331,3 +323,6 @@ loop:
end:
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX678: {{.*}}
+; GFX9: {{.*}}
|
This is intended to avoid some code quality regressions I saw with #89165. Perhaps it should even be folded into that PR. |
; GFX9-NEXT: s_endpgm | ||
; GFX6789-LABEL: test_sink_smem_offset_neg400: | ||
; GFX6789: ; %bb.0: ; %entry | ||
; GFX6789-NEXT: s_add_u32 s0, s0, 0xfffffe70 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For GFX9 this calculation is done outside the loop. With just #89165 applied, this calculation would be done inside the loop.
@@ -1604,6 +1604,14 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, | |||
return false; | |||
} | |||
|
|||
if (AS == AMDGPUAS::CONSTANT_ADDRESS && AM.BaseOffs < 0) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should also cover the constant_32bit (but again, this whole thing is just a weak heuristic for might-use-scalar-loads)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I specifically didn't want to include constant_32bit because I saw a case where LLPC was using that address space for an s_buffer_load, not an s_load. But I did not look into why that was happening. I have never understood constant_32bit.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a CONSTANT_ADDRESS truncated to the low 32-bits, where the high bits are assumed a constant from an attribute. The addressing modes should behave the same as constant, it's just implicitly promoted to the 64-bit pointer when codegenned
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should also cover the constant_32bit
Done
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm with test added
@@ -279,38 +279,30 @@ end: | |||
} | |||
|
|||
define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr, i32 inreg %val) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add the constant 32-bit test (6)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
…relim) This is a cherry-pick of an unmerged upstream change llvm#91327 Once the upstream change is finished and merged, this will need reverting. Change-Id: Ie89236494ed38063856d881b5c2944e650ee17c4
No description provided.