Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Attributor] Enable AAAddressSpace in OpenMPOpt #104363

Merged
merged 1 commit into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/IPO/OpenMPOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5569,6 +5569,8 @@ void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) {
bool UsedAssumedInformation = false;
A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr,
UsedAssumedInformation, AA::Interprocedural);
A.getOrCreateAAFor<AAAddressSpace>(
IRPosition::value(*LI->getPointerOperand()));
continue;
}
if (auto *CI = dyn_cast<CallBase>(&I)) {
Expand All @@ -5578,6 +5580,8 @@ void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) {
}
if (auto *SI = dyn_cast<StoreInst>(&I)) {
A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
A.getOrCreateAAFor<AAAddressSpace>(
IRPosition::value(*SI->getPointerOperand()));
continue;
}
if (auto *FI = dyn_cast<FenceInst>(&I)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ cond.end: ; preds = %cond.true, %entry
; CHECK-LABEL: define {{[^@]+}}@_ZN6Kokkos4Impl14SubviewExtentsILj2ELj1EE3setIJLm0ELm0EEJiEEEbjjRKNS0_13ViewDimensionIJXspT_EEEENS0_5ALL_tEDpT0_.internalized
; CHECK-SAME: (ptr nocapture writeonly [[THIS:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[THIS]] to ptr addrspace(5)
; CHECK-NEXT: ret i1 false
;
;
Expand Down
16 changes: 7 additions & 9 deletions llvm/test/Transforms/OpenMP/barrier_removal.ll
Original file line number Diff line number Diff line change
Expand Up @@ -269,10 +269,9 @@ define void @neg_empty_2() "kernel" {
define void @pos_constant_loads() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@pos_constant_loads
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(4), ptr addrspacecast (ptr addrspace(4) @GPtr4 to ptr), align 8
; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @GC2 to ptr), align 4
; CHECK-NEXT: [[ARGC:%.*]] = addrspacecast ptr addrspace(4) [[ARG]] to ptr
; CHECK-NEXT: [[C:%.*]] = load i32, ptr [[ARGC]], align 4
; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(4), ptr addrspace(4) @GPtr4, align 8
; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspace(4) @GC2, align 4
; CHECK-NEXT: [[C:%.*]] = load i32, ptr addrspace(4) [[ARG]], align 4
; CHECK-NEXT: [[D:%.*]] = add i32 42, [[B]]
; CHECK-NEXT: [[E:%.*]] = add i32 [[D]], [[C]]
; CHECK-NEXT: call void @useI32(i32 [[E]])
Expand Down Expand Up @@ -303,7 +302,7 @@ define void @neg_loads() "kernel" {
; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8
; CHECK-NEXT: [[A:%.*]] = load i32, ptr @G, align 4
; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @GS to ptr), align 4
; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspace(3) @GS, align 4
; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: [[C:%.*]] = load i32, ptr [[ARG]], align 4
; CHECK-NEXT: call void @aligned_barrier()
Expand Down Expand Up @@ -335,9 +334,8 @@ define void @pos_priv_mem() "kernel" {
; CHECK-NEXT: [[LOC:%.*]] = alloca i32, align 4, addrspace(5)
; CHECK-NEXT: [[A:%.*]] = load i32, ptr @PG1, align 4
; CHECK-NEXT: store i32 [[A]], ptr addrspace(5) [[LOC]], align 4
; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(5) @PG2 to ptr), align 4
; CHECK-NEXT: [[ARGC:%.*]] = addrspacecast ptr addrspace(5) [[ARG]] to ptr
; CHECK-NEXT: store i32 [[B]], ptr [[ARGC]], align 4
; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspace(5) @PG2, align 4
; CHECK-NEXT: store i32 [[B]], ptr addrspace(5) [[ARG]], align 4
; CHECK-NEXT: [[V:%.*]] = load i32, ptr addrspace(5) [[LOC]], align 4
; CHECK-NEXT: store i32 [[V]], ptr @PG1, align 4
; CHECK-NEXT: ret void
Expand Down Expand Up @@ -370,7 +368,7 @@ define void @neg_mem() "kernel" {
; CHECK-NEXT: store i32 [[A]], ptr [[ARG]], align 4
; CHECK-NEXT: fence release
; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @G2 to ptr), align 4
; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspace(1) @G2, align 4
; CHECK-NEXT: store i32 [[B]], ptr @G1, align 4
; CHECK-NEXT: fence acquire
; CHECK-NEXT: ret void
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ define internal void @func() {
; CHECK-LABEL: define {{[^@]+}}@func
; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I:%.*]] = load ptr, ptr null, align 4294967296
; CHECK-NEXT: [[I:%.*]] = load ptr, ptr addrspace(5) null, align 4294967296
; CHECK-NEXT: store i64 0, ptr [[I]], align 8
; CHECK-NEXT: ret void
;
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/OpenMP/nested_parallelism.ll
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(ptr %dyn,
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED_I:%.*]], label [[_Z3FOOI_INTERNALIZED_EXIT:%.*]]
; CHECK: region.guarded.i:
; CHECK-NEXT: [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32
; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), align 16
; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i_shared, align 16
; CHECK-NEXT: br label [[_Z3FOOI_INTERNALIZED_EXIT]]
; CHECK: _Z3fooi.internalized.exit:
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
Expand Down Expand Up @@ -140,7 +140,7 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l16(ptr %dyn,
; CHECK-NEXT: [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), align 16
; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i.i_shared, align 16
; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
Expand Down
25 changes: 13 additions & 12 deletions llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,22 @@ define i32 @fputs() {
define internal i32 @__kmpc_target_init(ptr %0, ptr %dyn) {
; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[DYN:%.*]]) #[[ATTR1:[0-9]+]] {
; AMDGPU-NEXT: [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(1) @__omp_offloading_10302_b20a40e_main_l4_kernel_environment to ptr), i64 2), align 2
; AMDGPU-NEXT: [[TMP3:%.*]] = and i8 [[TMP2]], 2
; AMDGPU-NEXT: [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0
; AMDGPU-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR3:[0-9]+]]
; AMDGPU-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
; AMDGPU-NEXT: [[OR_COND:%.*]] = select i1 [[TMP4]], i1 [[TMP6]], i1 false
; AMDGPU-NEXT: br i1 [[OR_COND]], label [[TMP7:%.*]], label [[TMP8:%.*]]
; AMDGPU: 7:
; AMDGPU-NEXT: store i8 0, ptr addrspace(3) null, align 2147483648
; AMDGPU-NEXT: br label [[TMP8]]
; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(1) @__omp_offloading_10302_b20a40e_main_l4_kernel_environment to ptr), i64 2) to ptr addrspace(1)
; AMDGPU-NEXT: [[TMP3:%.*]] = load i8, ptr addrspace(1) [[TMP2]], align 2
; AMDGPU-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], 2
; AMDGPU-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
; AMDGPU-NEXT: [[TMP6:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR3:[0-9]+]]
; AMDGPU-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
; AMDGPU-NEXT: [[OR_COND:%.*]] = select i1 [[TMP5]], i1 [[TMP7]], i1 false
; AMDGPU-NEXT: br i1 [[OR_COND]], label [[TMP8:%.*]], label [[TMP9:%.*]]
; AMDGPU: 8:
; AMDGPU-NEXT: br label [[TMP10:%.*]]
; AMDGPU-NEXT: store i8 0, ptr addrspace(3) null, align 2147483648
; AMDGPU-NEXT: br label [[TMP9]]
; AMDGPU: 9:
; AMDGPU-NEXT: unreachable
; AMDGPU-NEXT: br label [[TMP11:%.*]]
; AMDGPU: 10:
; AMDGPU-NEXT: unreachable
; AMDGPU: 11:
; AMDGPU-NEXT: ret i32 0
;
%2 = getelementptr %struct.ConfigurationEnvironmentTy.8, ptr %0, i64 0, i32 2
Expand Down
Loading