From aaec1d040ce48b0eab45678629200fc2f4f0499b Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Mon, 19 Aug 2024 13:33:00 +0000 Subject: [PATCH 1/4] [Analysis] Teach ScalarEvolution::getRangeRef about more dereferenceable objects Whilst dealing with review comments on https://github.com/llvm/llvm-project/pull/96752 I discovered that SCEV does not know about the dereferenceable attribute on function arguments so I have updated getRangeRef to make use of it by calling getPointerDereferenceableBytes. --- llvm/lib/Analysis/ScalarEvolution.cpp | 7 +++---- .../ScalarEvolution/different-loops-recs.ll | 2 +- .../ScalarEvolution/no-wrap-add-exprs.ll | 21 +++++++++++++++++-- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index a19358dee8ef49..88a201725f19b7 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -6858,10 +6858,9 @@ const ConstantRange &ScalarEvolution::getRangeRef( ObjectSizeOpts Opts; Opts.RoundToAlign = false; Opts.NullIsUnknownSize = true; - uint64_t ObjSize; - if ((isa(V) || isa(V) || - isAllocationFn(V, &TLI)) && - getObjectSize(V, ObjSize, DL, &TLI, Opts) && ObjSize > 1) { + bool CanBeNull, CanBeFreed; + uint64_t ObjSize = V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed); + if (ObjSize > 1) { // The highest address the object can start is ObjSize bytes before the // end (unsigned max value). If this value is not a multiple of the // alignment, the last possible start value is the next lowest multiple diff --git a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll index 359e22fa41bacd..41e1d059803b21 100644 --- a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll +++ b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll @@ -457,7 +457,7 @@ define void @test_05(i32 %N) { ; CHECK-NEXT: %"alloca point" = bitcast i32 0 to i32 ; CHECK-NEXT: --> 0 U: [0,1) S: [0,1) ; CHECK-NEXT: %tmp = getelementptr [1000 x i32], ptr @A, i32 0, i32 %i.0 -; CHECK-NEXT: --> {(8 + @A),+,4}<%bb3> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (408 + @A) LoopDispositions: { %bb3: Computable } +; CHECK-NEXT: --> {(8 + @A),+,4}<%bb3> U: [40,-3623) S: [-9223372036854775808,9223372036854775805) Exits: (408 + @A) LoopDispositions: { %bb3: Computable } ; CHECK-NEXT: %tmp2 = add i32 %i.0, 1 ; CHECK-NEXT: --> {3,+,1}<%bb3> U: [3,104) S: [3,104) Exits: 103 LoopDispositions: { %bb3: Computable } ; CHECK-NEXT: %i.0 = phi i32 [ 2, %entry ], [ %tmp2, %bb ] diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll index bd2ffddf396fe8..bb41a3dfc182fe 100644 --- a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll +++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll @@ -183,7 +183,7 @@ define void @f3(ptr %x_addr, ptr %y_addr, ptr %tmp_addr) { ; CHECK-NEXT: %s3.zext = zext i8 %s3 to i16 ; CHECK-NEXT: --> (1 + (zext i8 (4 + (32 * %x) + (36 * %y)) to i16)) U: [1,254) S: [1,257) ; CHECK-NEXT: %ptr = bitcast ptr @z_addr to ptr -; CHECK-NEXT: --> @z_addr U: [0,-3) S: [-9223372036854775808,9223372036854775805) +; CHECK-NEXT: --> @z_addr U: [4,-19) S: [-9223372036854775808,9223372036854775805) ; CHECK-NEXT: %int0 = ptrtoint ptr %ptr to i32 ; CHECK-NEXT: --> (trunc i64 (ptrtoint ptr @z_addr to i64) to i32) U: [0,-3) S: [-2147483648,2147483645) ; CHECK-NEXT: %int5 = add i32 %int0, 5 @@ -191,7 +191,7 @@ define void @f3(ptr %x_addr, ptr %y_addr, ptr %tmp_addr) { ; CHECK-NEXT: %int.zext = zext i32 %int5 to i64 ; CHECK-NEXT: --> (1 + (zext i32 (4 + (trunc i64 (ptrtoint ptr @z_addr to i64) to i32)) to i64)) U: [1,4294967294) S: [1,4294967297) ; CHECK-NEXT: %ptr_noalign = bitcast ptr @z_addr_noalign to ptr -; CHECK-NEXT: --> @z_addr_noalign U: full-set S: full-set +; CHECK-NEXT: --> @z_addr_noalign U: [1,-16) S: full-set ; CHECK-NEXT: %int0_na = ptrtoint ptr %ptr_noalign to i32 ; CHECK-NEXT: --> (trunc i64 (ptrtoint ptr @z_addr_noalign to i64) to i32) U: full-set S: full-set ; CHECK-NEXT: %int5_na = add i32 %int0_na, 5 @@ -362,3 +362,20 @@ loop: exit2: ret i1 false } + + +define void @dereferenceable_arg(ptr dereferenceable(128) %len_addr, ptr dereferenceable(128) align(8) %len_addr2) { +; CHECK-LABEL: 'dereferenceable_arg' +; CHECK-NEXT: Classifying expressions for: @dereferenceable_arg +; CHECK-NEXT: %ptr = bitcast ptr %len_addr to ptr +; CHECK-NEXT: --> %len_addr U: [1,-128) S: full-set +; CHECK-NEXT: %ptr2 = bitcast ptr %len_addr2 to ptr +; CHECK-NEXT: --> %len_addr2 U: [8,-135) S: [-9223372036854775808,9223372036854775801) +; CHECK-NEXT: Determining loop execution counts for: @dereferenceable_arg +; + entry: + %ptr = bitcast ptr %len_addr to ptr + %ptr2 = bitcast ptr %len_addr2 to ptr + + ret void +} From d303607a986b9fb7a9f0a73476886459302ff6e1 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Wed, 21 Aug 2024 08:26:26 +0000 Subject: [PATCH 2/4] Address review comments * Fix code formatting issues. * Remove unused ObjectSizeOpts variable. * Renamed ObjSize -> DerefBytes. * Added test for dereferenceable_or_null. * While trying to make use of the CanBeNull variable that is set by getPointerDereferenceableBytes I discovered a bug when the Value is an alloca in a non-null address space. I fixed this by making the behaviour consistent with llvm::isKnownNonZero. --- llvm/lib/Analysis/ScalarEvolution.cpp | 17 ++++++++--------- llvm/lib/IR/Value.cpp | 2 +- .../ScalarEvolution/no-wrap-add-exprs.ll | 16 ++++++++++++++++ 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 88a201725f19b7..8253c1de326898 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -6855,24 +6855,23 @@ const ConstantRange &ScalarEvolution::getRangeRef( if (U->getType()->isPointerTy() && SignHint == HINT_RANGE_UNSIGNED) { // Strengthen the range if the underlying IR value is a // global/alloca/heap allocation using the size of the object. - ObjectSizeOpts Opts; - Opts.RoundToAlign = false; - Opts.NullIsUnknownSize = true; bool CanBeNull, CanBeFreed; - uint64_t ObjSize = V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed); - if (ObjSize > 1) { - // The highest address the object can start is ObjSize bytes before the - // end (unsigned max value). If this value is not a multiple of the + uint64_t DerefBytes = + V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed); + if (DerefBytes > 1) { + // The highest address the object can start is DerefBytes bytes before + // the end (unsigned max value). If this value is not a multiple of the // alignment, the last possible start value is the next lowest multiple // of the alignment. Note: The computations below cannot overflow, // because if they would there's no possible start address for the // object. - APInt MaxVal = APInt::getMaxValue(BitWidth) - APInt(BitWidth, ObjSize); + APInt MaxVal = + APInt::getMaxValue(BitWidth) - APInt(BitWidth, DerefBytes); uint64_t Align = U->getValue()->getPointerAlignment(DL).value(); uint64_t Rem = MaxVal.urem(Align); MaxVal -= APInt(BitWidth, Rem); APInt MinVal = APInt::getZero(BitWidth); - if (llvm::isKnownNonZero(V, DL)) + if (!CanBeNull || llvm::isKnownNonZero(V, DL)) MinVal = Align; ConservativeResult = ConservativeResult.intersectWith( ConstantRange::getNonEmpty(MinVal, MaxVal + 1), RangeType); diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp index b2ee75811fbb7d..8a13d3c83f313d 100644 --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -909,7 +909,7 @@ uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL, if (!AI->isArrayAllocation()) { DerefBytes = DL.getTypeStoreSize(AI->getAllocatedType()).getKnownMinValue(); - CanBeNull = false; + CanBeNull = AI->getType()->getPointerAddressSpace() != 0; CanBeFreed = false; } } else if (auto *GV = dyn_cast(this)) { diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll index bb41a3dfc182fe..63890f193baa75 100644 --- a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll +++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll @@ -379,3 +379,19 @@ define void @dereferenceable_arg(ptr dereferenceable(128) %len_addr, ptr derefer ret void } + +define void @dereferenceable_or_null_arg(ptr dereferenceable_or_null(128) %len_addr, ptr dereferenceable_or_null(128) align(8) %len_addr2) { +; CHECK-LABEL: 'dereferenceable_or_null_arg' +; CHECK-NEXT: Classifying expressions for: @dereferenceable_or_null_arg +; CHECK-NEXT: %ptr = bitcast ptr %len_addr to ptr +; CHECK-NEXT: --> %len_addr U: [0,-128) S: full-set +; CHECK-NEXT: %ptr2 = bitcast ptr %len_addr2 to ptr +; CHECK-NEXT: --> %len_addr2 U: [0,-135) S: [-9223372036854775808,9223372036854775801) +; CHECK-NEXT: Determining loop execution counts for: @dereferenceable_or_null_arg +; + entry: + %ptr = bitcast ptr %len_addr to ptr + %ptr2 = bitcast ptr %len_addr2 to ptr + + ret void +} From adc58da76d60e5bd5eb5e47c2576c562a207208f Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Wed, 21 Aug 2024 12:15:25 +0000 Subject: [PATCH 3/4] Revert some changes and add new test * Reverted the changes to llvm/lib/IR/Value.cpp. * Added another test in Analysis/ScalarEvolution/no-wrap-add-exprs.ll for a differently sized pointer argument. --- llvm/lib/Analysis/ScalarEvolution.cpp | 2 +- llvm/lib/IR/Value.cpp | 2 +- llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll | 6 +++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 8253c1de326898..21a1c74eefc071 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -6871,7 +6871,7 @@ const ConstantRange &ScalarEvolution::getRangeRef( uint64_t Rem = MaxVal.urem(Align); MaxVal -= APInt(BitWidth, Rem); APInt MinVal = APInt::getZero(BitWidth); - if (!CanBeNull || llvm::isKnownNonZero(V, DL)) + if (llvm::isKnownNonZero(V, DL)) MinVal = Align; ConservativeResult = ConservativeResult.intersectWith( ConstantRange::getNonEmpty(MinVal, MaxVal + 1), RangeType); diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp index 8a13d3c83f313d..b2ee75811fbb7d 100644 --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -909,7 +909,7 @@ uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL, if (!AI->isArrayAllocation()) { DerefBytes = DL.getTypeStoreSize(AI->getAllocatedType()).getKnownMinValue(); - CanBeNull = AI->getType()->getPointerAddressSpace() != 0; + CanBeNull = false; CanBeFreed = false; } } else if (auto *GV = dyn_cast(this)) { diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll index 63890f193baa75..b096adc7c5eb40 100644 --- a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll +++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll @@ -364,22 +364,26 @@ exit2: } -define void @dereferenceable_arg(ptr dereferenceable(128) %len_addr, ptr dereferenceable(128) align(8) %len_addr2) { +define void @dereferenceable_arg(ptr dereferenceable(128) %len_addr, ptr dereferenceable(128) align(8) %len_addr2, ptr dereferenceable(13) align(1) %len_addr3) { ; CHECK-LABEL: 'dereferenceable_arg' ; CHECK-NEXT: Classifying expressions for: @dereferenceable_arg ; CHECK-NEXT: %ptr = bitcast ptr %len_addr to ptr ; CHECK-NEXT: --> %len_addr U: [1,-128) S: full-set ; CHECK-NEXT: %ptr2 = bitcast ptr %len_addr2 to ptr ; CHECK-NEXT: --> %len_addr2 U: [8,-135) S: [-9223372036854775808,9223372036854775801) +; CHECK-NEXT: %ptr3 = bitcast ptr %len_addr3 to ptr +; CHECK-NEXT: --> %len_addr3 U: [1,-13) S: full-set ; CHECK-NEXT: Determining loop execution counts for: @dereferenceable_arg ; entry: %ptr = bitcast ptr %len_addr to ptr %ptr2 = bitcast ptr %len_addr2 to ptr + %ptr3 = bitcast ptr %len_addr3 to ptr ret void } + define void @dereferenceable_or_null_arg(ptr dereferenceable_or_null(128) %len_addr, ptr dereferenceable_or_null(128) align(8) %len_addr2) { ; CHECK-LABEL: 'dereferenceable_or_null_arg' ; CHECK-NEXT: Classifying expressions for: @dereferenceable_or_null_arg From efa5f41cafbe92b44d00c20a2aa9d49567f7d1a4 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Thu, 22 Aug 2024 11:52:36 +0000 Subject: [PATCH 4/4] Fix test scev-custom-dl.ll --- llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll b/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll index aaea1a453664b9..d5a422ad41f559 100644 --- a/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll +++ b/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll @@ -112,7 +112,7 @@ define void @test_range_ref1a(i32 %x) { ; CHECK-NEXT: %i.01.0 = phi i32 [ 100, %entry ], [ %tmp4, %bb ] ; CHECK-NEXT: --> {100,+,-1}<%bb> U: [0,101) S: [0,101) Exits: 0 LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %tmp1 = getelementptr [101 x i32], ptr @array, i32 0, i32 %i.01.0 -; CHECK-NEXT: --> {(400 + @array),+,-4}<%bb> U: [0,-3) S: [-2147483648,2147483645) Exits: @array LoopDispositions: { %bb: Computable } +; CHECK-NEXT: --> {(400 + @array),+,-4}<%bb> U: [0,-3) S: [-2147483648,2147483645) Exits: @array LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %tmp4 = add nsw i32 %i.01.0, -1 ; CHECK-NEXT: --> {99,+,-1}<%bb> U: [-1,100) S: [-1,100) Exits: -1 LoopDispositions: { %bb: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_range_ref1a