Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SimplifyCFG] Deduce paths unreachable if they cause div/rem UB #109008

Closed

Conversation

goldsteinn
Copy link
Contributor

@goldsteinn goldsteinn commented Sep 17, 2024

  • [SimplifyCFG] Add tests for deducing paths unreachable if they cause div/rem UB; NFC
  • [SimplifyCFG] Deduce paths unreachable if they cause div/rem UB

Same we way mark a path unreachable if it may cause a nullptr
dereference, div/rem by zero or signed div/rem of INT_MIN by -1 cause
immediate UB.

@llvmbot
Copy link
Collaborator

llvmbot commented Sep 17, 2024

@llvm/pr-subscribers-llvm-transforms

Author: None (goldsteinn)

Changes
  • [SimplifyCFG] Add tests for deducing paths unreachable if they cause div/rem UB; NFC
  • [SimplifyCFG] Deduce paths unreachable if they cause div/rem UB

Full diff: https://github.com/llvm/llvm-project/pull/109008.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Utils/SimplifyCFG.cpp (+20-3)
  • (modified) llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll (+274)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 5a694b5e7f204b..b60f279ca441cf 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7864,10 +7864,10 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
   if (I->use_empty())
     return false;
 
-  if (C->isNullValue() || isa<UndefValue>(C)) {
+  if (C->isNullValue() || isa<UndefValue>(C) || C->isAllOnesValue()) {
     // Only look at the first use we can handle, avoid hurting compile time with
     // long uselists
-    auto FindUse = llvm::find_if(I->users(), [](auto *U) {
+    auto FindUse = llvm::find_if(I->users(), [C](auto *U) {
       auto *Use = cast<Instruction>(U);
       // Change this list when we want to add new instructions.
       switch (Use->getOpcode()) {
@@ -7881,7 +7881,13 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
       case Instruction::Call:
       case Instruction::CallBr:
       case Instruction::Invoke:
-        return true;
+        return C->isNullValue() || isa<UndefValue>(C);
+      case Instruction::UDiv:
+      case Instruction::URem:
+        return C->isNullValue();
+      case Instruction::SDiv:
+      case Instruction::SRem:
+        return C->isNullValue() || C->isAllOnesValue();
       }
     });
     if (FindUse == I->user_end())
@@ -7982,6 +7988,17 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
           }
       }
     }
+    if (match(Use, m_BinOp(m_Value(), m_Specific(I)))) {
+      // Immediate UB to divide by zero
+      if (Use->getOpcode() == Instruction::UDiv ||
+          Use->getOpcode() == Instruction::URem)
+        return C->isNullValue();
+      // Immediate UB to signed-divide INT_MIN by -1
+      if (Use->getOpcode() == Instruction::SDiv ||
+          Use->getOpcode() == Instruction::SRem)
+        return C->isNullValue() ||
+               (C->isAllOnesValue() && match(Use->getOperand(1), m_SignMask()));
+    }
   }
   return false;
 }
diff --git a/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll b/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
index c4602e72ecbce0..8312057190143d 100644
--- a/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
+++ b/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
@@ -918,6 +918,280 @@ bb5:                                              ; preds = %bb3, %bb
   ret i32 %i7
 }
 
+declare void @side.effect()
+declare i8 @get.i8()
+
+define i8 @udiv_by_zero(i8 noundef %x, i8 noundef %i, i8 noundef %v) {
+; CHECK-LABEL: @udiv_by_zero(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i8 [[I:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:      i8 9, label [[SW_BB2:%.*]]
+; CHECK-NEXT:      i8 2, label [[RETURN:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb2:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.default:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[Y:%.*]] = phi i8 [ 9, [[SW_BB2]] ], [ [[V:%.*]], [[SW_DEFAULT]] ], [ 2, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[R:%.*]] = udiv i8 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+entry:
+  switch i8 %i, label %sw.default [
+  i8 0, label %sw.bb0
+  i8 2, label %sw.bb1
+  i8 9, label %sw.bb2
+  ]
+
+sw.bb0:
+  br label %return
+
+sw.bb1:
+  br label %return
+sw.bb2:
+  br label %return
+sw.default:
+  br label %return
+
+return:
+  %y = phi i8 [ 0, %sw.bb0 ], [ 2, %sw.bb1 ], [ 9, %sw.bb2 ], [ %v, %sw.default ]
+  %r = udiv i8 %x, %y
+  ret i8 %r
+}
+
+define i8 @urem_by_zero(i8 noundef %x, i8 noundef %i, i8 noundef %v) {
+; CHECK-LABEL: @urem_by_zero(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i8 [[I:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:      i8 0, label [[RETURN:%.*]]
+; CHECK-NEXT:      i8 2, label [[SW_BB1:%.*]]
+; CHECK-NEXT:      i8 9, label [[SW_BB2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb1:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.bb2:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.default:
+; CHECK-NEXT:    unreachable
+; CHECK:       return:
+; CHECK-NEXT:    [[Y:%.*]] = phi i8 [ 2, [[SW_BB1]] ], [ 9, [[SW_BB2]] ], [ [[V:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[R:%.*]] = urem i8 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+entry:
+  switch i8 %i, label %sw.default [
+  i8 0, label %sw.bb0
+  i8 2, label %sw.bb1
+  i8 9, label %sw.bb2
+  ]
+
+sw.bb0:
+  br label %return
+
+sw.bb1:
+  br label %return
+sw.bb2:
+  br label %return
+sw.default:
+  br label %return
+
+return:
+  %y = phi i8 [ %v, %sw.bb0 ], [ 2, %sw.bb1 ], [ 9, %sw.bb2 ], [ 0, %sw.default ]
+  %r = urem i8 %x, %y
+  ret i8 %r
+}
+
+define i8 @udiv_of_zero_okay(i8 noundef %x, i8 noundef %i, i8 noundef %v) {
+; CHECK-LABEL: @udiv_of_zero_okay(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i8 [[I:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:      i8 0, label [[RETURN:%.*]]
+; CHECK-NEXT:      i8 2, label [[SW_BB1:%.*]]
+; CHECK-NEXT:      i8 9, label [[SW_BB2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb1:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.bb2:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.default:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[Y:%.*]] = phi i8 [ 2, [[SW_BB1]] ], [ 9, [[SW_BB2]] ], [ [[V:%.*]], [[SW_DEFAULT]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[R:%.*]] = udiv i8 [[Y]], [[X:%.*]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+entry:
+  switch i8 %i, label %sw.default [
+  i8 0, label %sw.bb0
+  i8 2, label %sw.bb1
+  i8 9, label %sw.bb2
+  ]
+
+sw.bb0:
+  br label %return
+
+sw.bb1:
+  br label %return
+sw.bb2:
+  br label %return
+sw.default:
+  br label %return
+
+return:
+  %y = phi i8 [ 0, %sw.bb0 ], [ 2, %sw.bb1 ], [ 9, %sw.bb2 ], [ %v, %sw.default ]
+  %r = udiv i8 %y, %x
+  ret i8 %r
+}
+
+define i8 @srem_by_zero(i8 noundef %x, i8 noundef %i) {
+; CHECK-LABEL: @srem_by_zero(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[I:%.*]], 9
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @side.effect()
+; CHECK-NEXT:    unreachable
+; CHECK:       if.else:
+; CHECK-NEXT:    [[V:%.*]] = call i8 @get.i8()
+; CHECK-NEXT:    [[R:%.*]] = srem i8 [[X:%.*]], [[V]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+entry:
+  %cmp = icmp ult i8 %i, 9
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  call void @side.effect()
+  br label %if.end
+
+if.else:
+  %v = call i8 @get.i8()
+  br label %if.end
+
+if.end:
+  %y = phi i8 [ 0, %if.then ], [ %v, %if.else ]
+  %r = srem i8 %x, %y
+  ret i8 %r
+}
+
+define i8 @srem_no_overflow_okay(i8 noundef %i) {
+; CHECK-LABEL: @srem_no_overflow_okay(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[I:%.*]], 9
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @side.effect()
+; CHECK-NEXT:    br label [[IF_END:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[V:%.*]] = call i8 @get.i8()
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[Y:%.*]] = phi i8 [ -1, [[IF_THEN]] ], [ [[V]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[R:%.*]] = srem i8 [[Y]], -128
+; CHECK-NEXT:    ret i8 [[R]]
+;
+entry:
+  %cmp = icmp ult i8 %i, 9
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  call void @side.effect()
+  br label %if.end
+
+if.else:
+  %v = call i8 @get.i8()
+  br label %if.end
+
+if.end:
+  %y = phi i8 [ -1, %if.then ], [ %v, %if.else ]
+  %r = srem i8 %y, 128
+  ret i8 %r
+}
+
+define i8 @sdiv_overflow_ub(i8 noundef %i) {
+; CHECK-LABEL: @sdiv_overflow_ub(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i8 [[I:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:      i8 0, label [[RETURN:%.*]]
+; CHECK-NEXT:      i8 2, label [[SW_BB1:%.*]]
+; CHECK-NEXT:      i8 9, label [[SW_BB2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb1:
+; CHECK-NEXT:    [[V:%.*]] = call i8 @get.i8()
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.bb2:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.default:
+; CHECK-NEXT:    unreachable
+; CHECK:       return:
+; CHECK-NEXT:    [[Y:%.*]] = phi i8 [ [[V]], [[SW_BB1]] ], [ -1, [[SW_BB2]] ], [ 4, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[R:%.*]] = sdiv i8 -128, [[Y]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+entry:
+  switch i8 %i, label %sw.default [
+  i8 0, label %sw.bb0
+  i8 2, label %sw.bb1
+  i8 9, label %sw.bb2
+  ]
+
+sw.bb0:
+  br label %return
+sw.bb1:
+  %v = call i8 @get.i8()
+  br label %return
+sw.bb2:
+  br label %return
+sw.default:
+  unreachable
+
+return:
+  %y = phi i8 [ 4, %sw.bb0 ], [ %v, %sw.bb1 ], [ -1, %sw.bb2 ]
+  %r = sdiv i8 128, %y
+  ret i8 %r
+}
+
+define i8 @sdiv_overflow_ub_2x(i8 noundef %i) {
+; CHECK-LABEL: @sdiv_overflow_ub_2x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i8 [[I:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:      i8 9, label [[RETURN:%.*]]
+; CHECK-NEXT:      i8 2, label [[SW_BB1:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb1:
+; CHECK-NEXT:    [[V:%.*]] = call i8 @get.i8()
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.default:
+; CHECK-NEXT:    unreachable
+; CHECK:       return:
+; CHECK-NEXT:    [[Y:%.*]] = phi i8 [ [[V]], [[SW_BB1]] ], [ -1, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[R:%.*]] = sdiv i8 -128, [[Y]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+entry:
+  switch i8 %i, label %sw.default [
+  i8 0, label %sw.bb0
+  i8 2, label %sw.bb1
+  i8 9, label %sw.bb2
+  ]
+
+sw.bb0:
+  br label %return
+sw.bb1:
+  %v = call i8 @get.i8()
+  br label %return
+sw.bb2:
+  br label %return
+sw.default:
+  unreachable
+
+return:
+  %y = phi i8 [ 0, %sw.bb0 ], [ %v, %sw.bb1 ], [ -1, %sw.bb2 ]
+  %r = sdiv i8 128, %y
+  ret i8 %r
+}
+
 attributes #0 = { null_pointer_is_valid }
 ;.
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }

@goldsteinn goldsteinn changed the title goldsteinn/div rem ub unreachable [SimplifyCFG] Deduce paths unreachable if they cause div/rem UB Sep 17, 2024
Same we way mark a path unreachable if it may cause a nullptr
dereference, div/rem by zero or signed div/rem of INT_MIN by -1 cause
immediate UB.
@goldsteinn goldsteinn force-pushed the goldsteinn/div-rem-ub-unreachable branch from bd3f7e6 to 9978048 Compare September 17, 2024 16:09
Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the division by zero part of this is reasonable, but handling SignedMin / -1 is an unnecessary complication, especially as it adds another constant root. It's not like SignedMin / x is common code.

@goldsteinn
Copy link
Contributor Author

I think the division by zero part of this is reasonable, but handling SignedMin / -1 is an unnecessary complication, especially as it adds another constant root. It's not like SignedMin / x is common code.

Dropped the INT_MIN / -1 logic.

Copy link

github-actions bot commented Sep 17, 2024

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff 2d13302d3851747a023ffecc6388585e888cd0e9 ceeb62762f24a1f5b91fb8c993f68962c8870a4c --extensions cpp -- llvm/lib/Transforms/Utils/SimplifyCFG.cpp
View the diff from clang-format here.
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 77b0b2b7e1..14c72df880 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7989,7 +7989,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
           }
       }
     }
-	// Div/Rem by zero is immediate UB
+    // Div/Rem by zero is immediate UB
     if (match(Use, m_BinOp(m_Value(), m_Specific(I))) && Use->isIntDivRem())
       return true;
   }

// logic is.
case Instruction::SDiv:
case Instruction::SRem:
return C->isNullValue();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't think you need this check? Division by undef is also UB (if undef is chosen as 0).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we need to worry about undef being chosen on non-zero?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As undef is chosen per-use, we can always chose zero here, even if another use is non-zero.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done :)

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LG

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, but please fix the code formatting.

declare void @side.effect()
declare i8 @get.i8()

define i8 @udiv_by_zero(i8 noundef %x, i8 noundef %i, i8 noundef %v) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are the noundefs relevant?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will fix before pushing.

tmsri pushed a commit to tmsri/llvm-project that referenced this pull request Sep 19, 2024
Same we way mark a path unreachable if it may cause a nullptr
dereference, div/rem by zero or signed div/rem of INT_MIN by -1 cause
immediate UB.

Closes llvm#109008
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants