-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[EarlyCSE] De-Duplicate callsites with differing attrs #110929
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-ir Author: None (goldsteinn) Changes
Full diff: https://github.com/llvm/llvm-project/pull/110929.diff 4 Files Affected:
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index 61dba265dc948b..c6084fa762be08 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -876,7 +876,8 @@ class Instruction : public User,
/// Return true if the specified instruction is exactly identical to the
/// current one. This means that all operands match and any extra information
/// (e.g. load is volatile) agree.
- bool isIdenticalTo(const Instruction *I) const LLVM_READONLY;
+ bool isIdenticalTo(const Instruction *I,
+ bool IntersectAttrs = false) const LLVM_READONLY;
/// This is like isIdenticalTo, except that it ignores the
/// SubclassOptionalData flags, which may specify conditions under which the
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index eec751078698b2..d9b47351bf543d 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -862,8 +862,9 @@ bool Instruction::hasSameSpecialState(const Instruction *I2,
return true;
}
-bool Instruction::isIdenticalTo(const Instruction *I) const {
- return isIdenticalToWhenDefined(I) &&
+bool Instruction::isIdenticalTo(const Instruction *I,
+ bool IntersectAttrs) const {
+ return isIdenticalToWhenDefined(I, IntersectAttrs) &&
SubclassOptionalData == I->SubclassOptionalData;
}
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index cf11f5bc885a75..223f307e1baf08 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -362,7 +362,7 @@ static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
if (LHSI->getOpcode() != RHSI->getOpcode())
return false;
- if (LHSI->isIdenticalToWhenDefined(RHSI)) {
+ if (LHSI->isIdenticalToWhenDefined(RHSI, /*IntersectAttrs=*/true)) {
// Convergent calls implicitly depend on the set of threads that is
// currently executing, so conservatively return false if they are in
// different basic blocks.
@@ -551,7 +551,7 @@ bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
if (LHSI->isConvergent() && LHSI->getParent() != RHSI->getParent())
return false;
- return LHSI->isIdenticalTo(RHSI);
+ return LHSI->isIdenticalTo(RHSI, /*IntersectAttrs=*/true);
}
//===----------------------------------------------------------------------===//
@@ -621,7 +621,7 @@ bool DenseMapInfo<GEPValue>::isEqual(const GEPValue &LHS, const GEPValue &RHS) {
return false;
if (LHS.ConstantOffset.has_value() && RHS.ConstantOffset.has_value())
return LHS.ConstantOffset.value() == RHS.ConstantOffset.value();
- return LGEP->isIdenticalToWhenDefined(RGEP);
+ return LGEP->isIdenticalToWhenDefined(RGEP, /*IntersectAttrs=*/true);
}
//===----------------------------------------------------------------------===//
@@ -1632,6 +1632,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
continue;
}
+
+ // Potential TODO: We may be throwing away attribute information when
+ // we delete Inst that we could propagate too InVal.first.
if (!Inst.use_empty())
Inst.replaceAllUsesWith(InVal.first);
salvageKnowledge(&Inst, &AC);
diff --git a/llvm/test/Transforms/EarlyCSE/replace-calls-def-attrs.ll b/llvm/test/Transforms/EarlyCSE/replace-calls-def-attrs.ll
new file mode 100644
index 00000000000000..3b1f39bcf3498d
--- /dev/null
+++ b/llvm/test/Transforms/EarlyCSE/replace-calls-def-attrs.ll
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt -S -passes=early-cse < %s | FileCheck %s
+
+declare i8 @baz(i8, i8) readnone
+declare i8 @buz(i8, i8)
+define i8 @same_parent_combine_diff_attrs(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @same_parent_combine_diff_attrs(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT: [[C1:%.*]] = call i8 @baz(i8 [[X]], i8 noundef [[Y]])
+; CHECK-NEXT: [[R:%.*]] = call i8 @buz(i8 [[C1]], i8 [[C1]])
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %c1 = call i8 @baz(i8 %x, i8 noundef %y)
+ %c0 = call i8 @baz(i8 noundef %x, i8 noundef %y)
+ %r = call i8 @buz(i8 %c0, i8 %c1)
+ ret i8 %r
+
+}
+
+define i8 @diff_parent_combine_diff_attrs(i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @diff_parent_combine_diff_attrs(
+; CHECK-SAME: i1 [[C:%.*]], i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT: [[C1:%.*]] = call i8 @baz(i8 [[X]], i8 noundef [[Y]])
+; CHECK-NEXT: br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
+; CHECK: [[T]]:
+; CHECK-NEXT: [[R:%.*]] = call i8 @buz(i8 [[C1]], i8 [[C1]])
+; CHECK-NEXT: ret i8 [[R]]
+; CHECK: [[F]]:
+; CHECK-NEXT: [[R2:%.*]] = add i8 [[C1]], 4
+; CHECK-NEXT: ret i8 [[R2]]
+;
+ %c1 = call i8 @baz(i8 %x, i8 noundef %y)
+ br i1 %c, label %T, label %F
+T:
+ %c0 = call i8 @baz(i8 noundef %x, i8 noundef %y)
+ %r = call i8 @buz(i8 %c0, i8 %c1)
+ ret i8 %r
+F:
+ %r2 = add i8 %c1, 4
+ ret i8 %r2
+}
+
+define i8 @same_parent_combine_diff_attrs_todo(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @same_parent_combine_diff_attrs_todo(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT: [[C1:%.*]] = call i8 @baz(i8 [[X]], i8 noundef [[Y]])
+; CHECK-NEXT: [[C0:%.*]] = call i8 @baz(i8 noundef [[X]], i8 noundef [[Y]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT: [[R:%.*]] = call i8 @buz(i8 [[C0]], i8 [[C1]])
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %c1 = call i8 @baz(i8 %x, i8 noundef %y)
+ %c0 = call i8 @baz(i8 noundef %x, i8 noundef %y) alwaysinline
+ %r = call i8 @buz(i8 %c0, i8 %c1)
+ ret i8 %r
+
+}
+
+define i8 @same_parent_combine_diff_attrs_fail(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @same_parent_combine_diff_attrs_fail(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT: [[C1:%.*]] = call i8 @baz(i8 [[X]], i8 noundef [[Y]])
+; CHECK-NEXT: [[C0:%.*]] = call i8 @baz(i8 noundef [[X]], i8 noundef [[Y]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT: [[R:%.*]] = call i8 @buz(i8 [[C0]], i8 [[C1]])
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %c1 = call i8 @baz(i8 %x, i8 noundef %y)
+ %c0 = call i8 @baz(i8 noundef %x, i8 noundef %y) strictfp
+ %r = call i8 @buz(i8 %c0, i8 %c1)
+ ret i8 %r
+
+}
+
+define i8 @diff_parent_combine_diff_attrs_todo(i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @diff_parent_combine_diff_attrs_todo(
+; CHECK-SAME: i1 [[C:%.*]], i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT: [[C1:%.*]] = call i8 @baz(i8 [[X]], i8 noundef [[Y]])
+; CHECK-NEXT: br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
+; CHECK: [[T]]:
+; CHECK-NEXT: [[C0:%.*]] = call i8 @baz(i8 noundef [[X]], i8 noundef [[Y]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT: [[R:%.*]] = call i8 @buz(i8 [[C0]], i8 [[C1]])
+; CHECK-NEXT: ret i8 [[R]]
+; CHECK: [[F]]:
+; CHECK-NEXT: [[R2:%.*]] = add i8 [[C1]], 4
+; CHECK-NEXT: ret i8 [[R2]]
+;
+ %c1 = call i8 @baz(i8 %x, i8 noundef %y)
+ br i1 %c, label %T, label %F
+T:
+ %c0 = call i8 @baz(i8 noundef %x, i8 noundef %y) optnone noinline
+ %r = call i8 @buz(i8 %c0, i8 %c1)
+ ret i8 %r
+F:
+ %r2 = add i8 %c1, 4
+ ret i8 %r2
+}
+
+define i8 @diff_parent_combine_diff_attrs_fail(i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @diff_parent_combine_diff_attrs_fail(
+; CHECK-SAME: i1 [[C:%.*]], i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT: [[C1:%.*]] = call i8 @baz(i8 [[X]], i8 noundef [[Y]])
+; CHECK-NEXT: br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
+; CHECK: [[T]]:
+; CHECK-NEXT: [[C0:%.*]] = call i8 @baz(i8 noundef [[X]], i8 noundef [[Y]]) #[[ATTR2]]
+; CHECK-NEXT: [[R:%.*]] = call i8 @buz(i8 [[C0]], i8 [[C1]])
+; CHECK-NEXT: ret i8 [[R]]
+; CHECK: [[F]]:
+; CHECK-NEXT: [[R2:%.*]] = add i8 [[C1]], 4
+; CHECK-NEXT: ret i8 [[R2]]
+;
+ %c1 = call i8 @baz(i8 %x, i8 noundef %y)
+ br i1 %c, label %T, label %F
+T:
+ %c0 = call i8 @baz(i8 noundef %x, i8 noundef %y) strictfp
+ %r = call i8 @buz(i8 %c0, i8 %c1)
+ ret i8 %r
+F:
+ %r2 = add i8 %c1, 4
+ ret i8 %r2
+}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { memory(none) }
+; CHECK: attributes #[[ATTR1]] = { alwaysinline }
+; CHECK: attributes #[[ATTR2]] = { strictfp }
+; CHECK: attributes #[[ATTR3]] = { noinline optnone }
+;.
|
AFAICT, we can just entirely ignore the attributes, but maybe I'm missing something. |
@@ -1632,6 +1632,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { | |||
LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n"); | |||
continue; | |||
} | |||
|
|||
// Potential TODO: We may be throwing away attribute information when | |||
// we delete Inst that we could propagate too InVal.first. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suspect we will run into correctness issues if we don't combine the attributes after CSE. But I cannot think of a counterexample right now.
// we delete Inst that we could propagate too InVal.first. | |
// we delete Inst that we could propagate to InVal.first. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we dont combine?
I was thinking about it, are there any callsite attributes that can create side-effects?
AFAICT no.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We still need to intersect attributes here. If say the first call returns nonnull
but is (dynamically) unused, and the second one does not return nonnull
but is used, you'll now replace it with a nonnull
call and a potential poison value.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, I can see that argument for return values, but for param or fn?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we did CSE for non-readonly funtions ofc I could see param args mattering, but with the limits I think its only return?
@@ -1632,6 +1632,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { | |||
LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n"); | |||
continue; | |||
} | |||
|
|||
// Potential TODO: We may be throwing away attribute information when | |||
// we delete Inst that we could propagate too InVal.first. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We still need to intersect attributes here. If say the first call returns nonnull
but is (dynamically) unused, and the second one does not return nonnull
but is used, you'll now replace it with a nonnull
call and a potential poison value.
ba98d3c
to
2fbda15
Compare
We only do this if the attributes of the two callsites are compatible (intersectable) which is probably not in fact necessary.
ada9f69
to
8ff29ea
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
// callsite for CSEing, we can probably ignore more attributes. | ||
// Generally poison generating attributes need to be handled with more | ||
// care as they can create *new* UB if preserved/combined and violated. | ||
// Attributes that imply immediate UB on the otherhand would have been |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// Attributes that imply immediate UB on the otherhand would have been | |
// Attributes that imply immediate UB on the other hand would have been |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ill change before pushing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
assert(Success && "Failed to intersect attributes in callsites that " | ||
"passed identical check"); | ||
// For NDEBUG Compile. | ||
(void)Success; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd use [[maybe_unused]]
here, but either way is fine.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tend to prefer the (void)X
. Although not religiously.
We only do this if the attributes of the two callsites are compatible
(intersectable) which is probably not in fact necessary.