Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Float2Int] Resolve FIXME: Pick the smallest legal type that fits #79158

Merged
merged 2 commits into from
Mar 13, 2024

Conversation

AreaZR
Copy link
Contributor

@AreaZR AreaZR commented Jan 23, 2024

Pick the type based on the smallest bit-width possible, using DataLayout.

@llvmbot
Copy link
Collaborator

llvmbot commented Jan 23, 2024

@llvm/pr-subscribers-llvm-transforms

Author: AtariDreams (AtariDreams)

Changes

Pick the type based on the smallest bitwidth possible


Full diff: https://github.com/llvm/llvm-project/pull/79158.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Scalar/Float2Int.cpp (+10-2)
  • (modified) llvm/test/Transforms/Float2Int/basic.ll (+27-28)
diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp
index ccca8bcc1a56ac7..108ad297ce7519e 100644
--- a/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -383,8 +383,16 @@ bool Float2IntPass::validateAndTransform() {
     }
 
     // OK, R is known to be representable. Now pick a type for it.
-    // FIXME: Pick the smallest legal type that will fit.
-    Type *Ty = (MinBW > 32) ? Type::getInt64Ty(*Ctx) : Type::getInt32Ty(*Ctx);
+    Type *Ty;
+    if (MinBW <= 8) {
+      Ty = Type::getInt8Ty(*Ctx);
+    } else if (MinBW <= 16) {
+      Ty = Type::getInt16Ty(*Ctx);
+    } else if (MinBW <= 32) {
+      Ty = Type::getInt32Ty(*Ctx);
+    } else {
+      Ty = Type::getInt64Ty(*Ctx);
+    }
 
     for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
          MI != ME; ++MI)
diff --git a/llvm/test/Transforms/Float2Int/basic.ll b/llvm/test/Transforms/Float2Int/basic.ll
index 2854a83179b7eb6..aa9ef36e4caaab1 100644
--- a/llvm/test/Transforms/Float2Int/basic.ll
+++ b/llvm/test/Transforms/Float2Int/basic.ll
@@ -7,10 +7,9 @@
 
 define i16 @simple1(i8 %a) {
 ; CHECK-LABEL: @simple1(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
-; CHECK-NEXT:    [[T21:%.*]] = add i32 [[TMP1]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[T21]] to i16
-; CHECK-NEXT:    ret i16 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[T21:%.*]] = add i16 [[TMP1]], 1
+; CHECK-NEXT:    ret i16 [[T21]]
 ;
   %t1 = uitofp i8 %a to float
   %t2 = fadd float %t1, 1.0
@@ -20,9 +19,9 @@ define i16 @simple1(i8 %a) {
 
 define i8 @simple2(i8 %a) {
 ; CHECK-LABEL: @simple2(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
-; CHECK-NEXT:    [[T21:%.*]] = sub i32 [[TMP1]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[T21]] to i8
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[T21:%.*]] = sub i16 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i16 [[T21]] to i8
 ; CHECK-NEXT:    ret i8 [[TMP2]]
 ;
   %t1 = uitofp i8 %a to float
@@ -33,9 +32,10 @@ define i8 @simple2(i8 %a) {
 
 define i32 @simple3(i8 %a) {
 ; CHECK-LABEL: @simple3(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
-; CHECK-NEXT:    [[T21:%.*]] = sub i32 [[TMP1]], 1
-; CHECK-NEXT:    ret i32 [[T21]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[T21:%.*]] = sub i16 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[T21]] to i32
+; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
   %t1 = uitofp i8 %a to float
   %t2 = fsub float %t1, 1.0
@@ -45,9 +45,9 @@ define i32 @simple3(i8 %a) {
 
 define i1 @cmp(i8 %a, i8 %b) {
 ; CHECK-LABEL: @cmp(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[B:%.*]] to i32
-; CHECK-NEXT:    [[T31:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16
+; CHECK-NEXT:    [[T31:%.*]] = icmp slt i16 [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret i1 [[T31]]
 ;
   %t1 = uitofp i8 %a to float
@@ -106,13 +106,14 @@ define i32 @simple6(i8 %a, i8 %b) {
 
 define i32 @multi1(i8 %a, i8 %b, i8 %c, float %d) {
 ; CHECK-LABEL: @multi1(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[B:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16
 ; CHECK-NEXT:    [[FC:%.*]] = uitofp i8 [[C:%.*]] to float
-; CHECK-NEXT:    [[X1:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[X1:%.*]] = add i16 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[X1]] to i32
 ; CHECK-NEXT:    [[Z:%.*]] = fadd float [[FC]], [[D:%.*]]
 ; CHECK-NEXT:    [[W:%.*]] = fptoui float [[Z]] to i32
-; CHECK-NEXT:    [[R:%.*]] = add i32 [[X1]], [[W]]
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[TMP3]], [[W]]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %fa = uitofp i8 %a to float
@@ -128,10 +129,9 @@ define i32 @multi1(i8 %a, i8 %b, i8 %c, float %d) {
 
 define i16 @simple_negzero(i8 %a) {
 ; CHECK-LABEL: @simple_negzero(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
-; CHECK-NEXT:    [[T21:%.*]] = add i32 [[TMP1]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[T21]] to i16
-; CHECK-NEXT:    ret i16 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[T21:%.*]] = add i16 [[TMP1]], 0
+; CHECK-NEXT:    ret i16 [[T21]]
 ;
   %t1 = uitofp i8 %a to float
   %t2 = fadd fast float %t1, -0.0
@@ -141,9 +141,9 @@ define i16 @simple_negzero(i8 %a) {
 
 define i32 @simple_negative(i8 %call) {
 ; CHECK-LABEL: @simple_negative(
-; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[CALL:%.*]] to i32
-; CHECK-NEXT:    [[MUL1:%.*]] = mul i32 [[TMP1]], -3
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[MUL1]] to i8
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[CALL:%.*]] to i16
+; CHECK-NEXT:    [[MUL1:%.*]] = mul i16 [[TMP1]], -3
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i16 [[MUL1]] to i8
 ; CHECK-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP2]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV3]]
 ;
@@ -156,10 +156,9 @@ define i32 @simple_negative(i8 %call) {
 
 define i16 @simple_fneg(i8 %a) {
 ; CHECK-LABEL: @simple_fneg(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
-; CHECK-NEXT:    [[T21:%.*]] = sub i32 0, [[TMP1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[T21]] to i16
-; CHECK-NEXT:    ret i16 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[T21:%.*]] = sub i16 0, [[TMP1]]
+; CHECK-NEXT:    ret i16 [[T21]]
 ;
   %t1 = uitofp i8 %a to float
   %t2 = fneg fast float %t1

@AreaZR AreaZR force-pushed the size branch 7 times, most recently from 590d9f6 to 0f671c9 Compare January 26, 2024 16:39
@AreaZR AreaZR force-pushed the size branch 2 times, most recently from 1b1021d to aef840a Compare January 26, 2024 17:02
Copy link

github-actions bot commented Jan 26, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

@AreaZR AreaZR force-pushed the size branch 2 times, most recently from 4175133 to b238489 Compare January 26, 2024 20:37
@AreaZR
Copy link
Contributor Author

AreaZR commented Jan 26, 2024

@nikic Fixed!

@AreaZR AreaZR requested a review from nikic January 26, 2024 20:37
@AreaZR AreaZR force-pushed the size branch 7 times, most recently from ea8d0a5 to 6d156a2 Compare January 29, 2024 18:49
@aeubanks
Copy link
Contributor

aeubanks commented Mar 19, 2024

I was able to reproduce the difference before and after the revert of the return value being different

edit: on x86-64 with clang -O3

@AreaZR
Copy link
Contributor Author

AreaZR commented Mar 19, 2024

I was able to reproduce the difference before and after the revert of the return value being different

edit: on x86-64 with clang -O3

Oddly enough, the pass itself doesn't seem to be the issue.

@AreaZR
Copy link
Contributor Author

AreaZR commented Mar 19, 2024

I was able to reproduce the difference before and after the revert of the return value being different
edit: on x86-64 with clang -O3

Oddly enough, the pass itself doesn't seem to be the issue.

The -passes=float2int one I mean

@AreaZR
Copy link
Contributor Author

AreaZR commented Mar 19, 2024

Screenshot 2024-03-19 at 3 31 09 PM The issue in question is not the Float2Int pass.

@AreaZR
Copy link
Contributor Author

AreaZR commented Mar 19, 2024

PR: only running float2int pass
PRO: Running -O3

@AreaZR
Copy link
Contributor Author

AreaZR commented Mar 19, 2024

Does anyone have a full list of passes that I can try to see is the culprit?

@alexfh
Copy link
Contributor

alexfh commented Mar 19, 2024

As far as clang is concerned, you can use LLVM options -opt-bisect-limit (https://llvm.org/docs/OptBisect.html), -print-before-all, -print-after-all, -print-module-scope, -print-pipeline-passes (all need to be prepended with -mllvm when passed to clang).

Apart from that, the Opt Pipeline view in the compiler explorer may be useful: https://godbolt.org/z/MY38Wc5dn

@alexfh
Copy link
Contributor

alexfh commented Mar 19, 2024

Oddly enough, the pass itself doesn't seem to be the issue.

Sad but true: this is a usual situation, when a seemingly correct change in one pass uncovers issues in one or more of the following passes in the pipeline. This may feel like playing pick up sticks at times, but we see this regularly =(

@jrtc27
Copy link
Collaborator

jrtc27 commented Mar 20, 2024

Screenshot 2024-03-19 at 3 31 09 PM The issue in question is not the Float2Int pass.

Well most passes are pretty uninteresting if you run them on pre-SROA/mem2reg IR. Looking at the later godbolt link, there's the following transform for Float2IntPass:

Before:

define dso_local noundef i32 @main() local_unnamed_addr {
entry:
  %x = alloca i32, align 4
    #dbg_assign(i1 undef, !18, !DIExpression(), !21, ptr %x, !DIExpression(), !22)
  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x)
  store volatile i32 1, ptr %x, align 4
    #dbg_assign(i32 1, !18, !DIExpression(), !29, ptr %x, !DIExpression(), !22)
  %x.0.x.0.x.0. = load volatile i32, ptr %x, align 4
  %cmp = icmp sgt i32 %x.0.x.0.x.0., 0
    #dbg_value(i1 %cmp, !20, !DIExpression(DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_stack_value), !22)
  %conv = uitofp i1 %cmp to float
  %mul = fmul float %conv, 2.550000e+02
  %conv1 = fptoui float %mul to i8
  %conv2 = zext i8 %conv1 to i32
  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x)
  ret i32 %conv2
}

After:

define dso_local noundef i32 @main() local_unnamed_addr {
entry:
  %x = alloca i32, align 4
    #dbg_assign(i1 undef, !18, !DIExpression(), !21, ptr %x, !DIExpression(), !22)
  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x)
  store volatile i32 1, ptr %x, align 4
    #dbg_assign(i32 1, !18, !DIExpression(), !29, ptr %x, !DIExpression(), !22)
  %x.0.x.0.x.0. = load volatile i32, ptr %x, align 4
  %cmp = icmp sgt i32 %x.0.x.0.x.0., 0
    #dbg_value(i1 %cmp, !20, !DIExpression(DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_stack_value), !22)
  %0 = zext i1 %cmp to i8
  %mul3 = mul i8 %0, 127
  %conv2 = zext i8 %mul3 to i32
  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x)
  ret i32 %conv2
}

This is why you've received feedback about these kinds of small changes being awkward to review. When they don't offer significant gains, they are often outweighed by the high risk that something will break, whether that's because the change in question is incorrect or because it exposes incorrect code elsewhere.

@jrtc27
Copy link
Collaborator

jrtc27 commented Mar 20, 2024

At a glance it looks like confusion about whether or not there's a sign bit.

@jrtc27
Copy link
Collaborator

jrtc27 commented Mar 20, 2024

This seems to be a preexisting issue that you can hit if you pick specific float values: https://godbolt.org/z/rxdqrve3d

It's just hit more often now because you're picking smaller integers. Before you had to be in the range [2^31, 2^32), converted to a double, but now it's true for [2^7, 2^8) and [2^15, 2^16), both of which also fit in a single-precision float, unlike the prior case.

@jrtc27
Copy link
Collaborator

jrtc27 commented Mar 20, 2024

Compiler Explorer shows this bug as being reproducible with that exact test case all the way back to Clang 3.7 (but not 3.6). Given Float2Int was new in 3.7 (at least from GitHub's view of available tags) that strongly suggests to me this bug has always been there since it was added to the tree (or at least the final attempt at adding it to the tree).

@topperc
Copy link
Collaborator

topperc commented Mar 20, 2024

I found a bug in the ConstantRange handling UIToFP and SIToFP handling which may explain the failure #86041

topperc added a commit to topperc/llvm-project that referenced this pull request Mar 21, 2024
We were passing the min and max values of the range to the ConstantRange
constructor, but the constructor expects the upper bound to 1
more than the max value so we need to add 1.

We also need to use getNonEmpty so that passing 0, 0 to the constructor
creates a full range rather than an empty range. And passing smin, smax+1
doesn't cause an assertion.

I believe this fixes at least some of the reason llvm#79158 was reverted.
topperc added a commit to topperc/llvm-project that referenced this pull request Mar 21, 2024
We were passing the min and max values of the range to the ConstantRange
constructor, but the constructor expects the upper bound to 1
more than the max value so we need to add 1.

We also need to use getNonEmpty so that passing 0, 0 to the constructor
creates a full range rather than an empty range. And passing smin, smax+1
doesn't cause an assertion.

I believe this fixes at least some of the reason llvm#79158 was reverted.
topperc added a commit to topperc/llvm-project that referenced this pull request Mar 21, 2024
We were passing the min and max values of the range to the ConstantRange
constructor, but the constructor expects the upper bound to 1
more than the max value so we need to add 1.

We also need to use getNonEmpty so that passing 0, 0 to the constructor
creates a full range rather than an empty range. And passing smin, smax+1
doesn't cause an assertion.

I believe this fixes at least some of the reason llvm#79158 was reverted.
topperc added a commit that referenced this pull request Mar 21, 2024
)

We were passing the min and max values of the range to the ConstantRange
constructor, but the constructor expects the upper bound to 1 more than
the max value so we need to add 1.

We also need to use getNonEmpty so that passing 0, 0 to the constructor
creates a full range rather than an empty range. And passing smin,
smax+1 doesn't cause an assertion.

I believe this fixes at least some of the reason #79158 was reverted.
llvmbot pushed a commit to llvmbot/llvm-project that referenced this pull request Mar 21, 2024
…m#86041)

We were passing the min and max values of the range to the ConstantRange
constructor, but the constructor expects the upper bound to 1 more than
the max value so we need to add 1.

We also need to use getNonEmpty so that passing 0, 0 to the constructor
creates a full range rather than an empty range. And passing smin,
smax+1 doesn't cause an assertion.

I believe this fixes at least some of the reason llvm#79158 was reverted.

(cherry picked from commit 1283646)
AreaZR pushed a commit to AreaZR/llvm-project that referenced this pull request Mar 21, 2024
…m#86041)

We were passing the min and max values of the range to the ConstantRange
constructor, but the constructor expects the upper bound to 1 more than
the max value so we need to add 1.

We also need to use getNonEmpty so that passing 0, 0 to the constructor
creates a full range rather than an empty range. And passing smin,
smax+1 doesn't cause an assertion.

I believe this fixes at least some of the reason llvm#79158 was reverted.
AreaZR pushed a commit to AreaZR/llvm-project that referenced this pull request Mar 21, 2024
…m#86041)

We were passing the min and max values of the range to the ConstantRange
constructor, but the constructor expects the upper bound to 1 more than
the max value so we need to add 1.

We also need to use getNonEmpty so that passing 0, 0 to the constructor
creates a full range rather than an empty range. And passing smin,
smax+1 doesn't cause an assertion.

I believe this fixes at least some of the reason llvm#79158 was reverted.
chencha3 pushed a commit to chencha3/llvm-project that referenced this pull request Mar 23, 2024
chencha3 pushed a commit to chencha3/llvm-project that referenced this pull request Mar 23, 2024
…m#86041)

We were passing the min and max values of the range to the ConstantRange
constructor, but the constructor expects the upper bound to 1 more than
the max value so we need to add 1.

We also need to use getNonEmpty so that passing 0, 0 to the constructor
creates a full range rather than an empty range. And passing smin,
smax+1 doesn't cause an assertion.

I believe this fixes at least some of the reason llvm#79158 was reverted.
tstellar pushed a commit to llvmbot/llvm-project that referenced this pull request Apr 10, 2024
…m#86041)

We were passing the min and max values of the range to the ConstantRange
constructor, but the constructor expects the upper bound to 1 more than
the max value so we need to add 1.

We also need to use getNonEmpty so that passing 0, 0 to the constructor
creates a full range rather than an empty range. And passing smin,
smax+1 doesn't cause an assertion.

I believe this fixes at least some of the reason llvm#79158 was reverted.

(cherry picked from commit 1283646)
tstellar pushed a commit to llvmbot/llvm-project that referenced this pull request Apr 10, 2024
…m#86041)

We were passing the min and max values of the range to the ConstantRange
constructor, but the constructor expects the upper bound to 1 more than
the max value so we need to add 1.

We also need to use getNonEmpty so that passing 0, 0 to the constructor
creates a full range rather than an empty range. And passing smin,
smax+1 doesn't cause an assertion.

I believe this fixes at least some of the reason llvm#79158 was reverted.

(cherry picked from commit 1283646)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

8 participants