[Float2Int] Resolve FIXME: Pick the smallest legal type that fits #79158

AreaZR · 2024-01-23T15:54:47Z

Pick the type based on the smallest bit-width possible, using DataLayout.

llvmbot · 2024-01-23T15:55:23Z

@llvm/pr-subscribers-llvm-transforms

Author: AtariDreams (AtariDreams)

Changes

Pick the type based on the smallest bitwidth possible

Full diff: https://github.com/llvm/llvm-project/pull/79158.diff

2 Files Affected:

(modified) llvm/lib/Transforms/Scalar/Float2Int.cpp (+10-2)
(modified) llvm/test/Transforms/Float2Int/basic.ll (+27-28)

diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp
index ccca8bcc1a56ac7..108ad297ce7519e 100644
--- a/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -383,8 +383,16 @@ bool Float2IntPass::validateAndTransform() {
     }
 
     // OK, R is known to be representable. Now pick a type for it.
-    // FIXME: Pick the smallest legal type that will fit.
-    Type *Ty = (MinBW > 32) ? Type::getInt64Ty(*Ctx) : Type::getInt32Ty(*Ctx);
+    Type *Ty;
+    if (MinBW <= 8) {
+      Ty = Type::getInt8Ty(*Ctx);
+    } else if (MinBW <= 16) {
+      Ty = Type::getInt16Ty(*Ctx);
+    } else if (MinBW <= 32) {
+      Ty = Type::getInt32Ty(*Ctx);
+    } else {
+      Ty = Type::getInt64Ty(*Ctx);
+    }
 
     for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
          MI != ME; ++MI)
diff --git a/llvm/test/Transforms/Float2Int/basic.ll b/llvm/test/Transforms/Float2Int/basic.ll
index 2854a83179b7eb6..aa9ef36e4caaab1 100644
--- a/llvm/test/Transforms/Float2Int/basic.ll
+++ b/llvm/test/Transforms/Float2Int/basic.ll
@@ -7,10 +7,9 @@
 
 define i16 @simple1(i8 %a) {
 ; CHECK-LABEL: @simple1(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
-; CHECK-NEXT:    [[T21:%.*]] = add i32 [[TMP1]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[T21]] to i16
-; CHECK-NEXT:    ret i16 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[T21:%.*]] = add i16 [[TMP1]], 1
+; CHECK-NEXT:    ret i16 [[T21]]
 ;
   %t1 = uitofp i8 %a to float
   %t2 = fadd float %t1, 1.0
@@ -20,9 +19,9 @@ define i16 @simple1(i8 %a) {
 
 define i8 @simple2(i8 %a) {
 ; CHECK-LABEL: @simple2(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
-; CHECK-NEXT:    [[T21:%.*]] = sub i32 [[TMP1]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[T21]] to i8
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[T21:%.*]] = sub i16 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i16 [[T21]] to i8
 ; CHECK-NEXT:    ret i8 [[TMP2]]
 ;
   %t1 = uitofp i8 %a to float
@@ -33,9 +32,10 @@ define i8 @simple2(i8 %a) {
 
 define i32 @simple3(i8 %a) {
 ; CHECK-LABEL: @simple3(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
-; CHECK-NEXT:    [[T21:%.*]] = sub i32 [[TMP1]], 1
-; CHECK-NEXT:    ret i32 [[T21]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[T21:%.*]] = sub i16 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[T21]] to i32
+; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
   %t1 = uitofp i8 %a to float
   %t2 = fsub float %t1, 1.0
@@ -45,9 +45,9 @@ define i32 @simple3(i8 %a) {
 
 define i1 @cmp(i8 %a, i8 %b) {
 ; CHECK-LABEL: @cmp(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[B:%.*]] to i32
-; CHECK-NEXT:    [[T31:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16
+; CHECK-NEXT:    [[T31:%.*]] = icmp slt i16 [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret i1 [[T31]]
 ;
   %t1 = uitofp i8 %a to float
@@ -106,13 +106,14 @@ define i32 @simple6(i8 %a, i8 %b) {
 
 define i32 @multi1(i8 %a, i8 %b, i8 %c, float %d) {
 ; CHECK-LABEL: @multi1(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[B:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16
 ; CHECK-NEXT:    [[FC:%.*]] = uitofp i8 [[C:%.*]] to float
-; CHECK-NEXT:    [[X1:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[X1:%.*]] = add i16 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[X1]] to i32
 ; CHECK-NEXT:    [[Z:%.*]] = fadd float [[FC]], [[D:%.*]]
 ; CHECK-NEXT:    [[W:%.*]] = fptoui float [[Z]] to i32
-; CHECK-NEXT:    [[R:%.*]] = add i32 [[X1]], [[W]]
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[TMP3]], [[W]]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %fa = uitofp i8 %a to float
@@ -128,10 +129,9 @@ define i32 @multi1(i8 %a, i8 %b, i8 %c, float %d) {
 
 define i16 @simple_negzero(i8 %a) {
 ; CHECK-LABEL: @simple_negzero(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
-; CHECK-NEXT:    [[T21:%.*]] = add i32 [[TMP1]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[T21]] to i16
-; CHECK-NEXT:    ret i16 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[T21:%.*]] = add i16 [[TMP1]], 0
+; CHECK-NEXT:    ret i16 [[T21]]
 ;
   %t1 = uitofp i8 %a to float
   %t2 = fadd fast float %t1, -0.0
@@ -141,9 +141,9 @@ define i16 @simple_negzero(i8 %a) {
 
 define i32 @simple_negative(i8 %call) {
 ; CHECK-LABEL: @simple_negative(
-; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[CALL:%.*]] to i32
-; CHECK-NEXT:    [[MUL1:%.*]] = mul i32 [[TMP1]], -3
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[MUL1]] to i8
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[CALL:%.*]] to i16
+; CHECK-NEXT:    [[MUL1:%.*]] = mul i16 [[TMP1]], -3
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i16 [[MUL1]] to i8
 ; CHECK-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP2]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV3]]
 ;
@@ -156,10 +156,9 @@ define i32 @simple_negative(i8 %call) {
 
 define i16 @simple_fneg(i8 %a) {
 ; CHECK-LABEL: @simple_fneg(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
-; CHECK-NEXT:    [[T21:%.*]] = sub i32 0, [[TMP1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[T21]] to i16
-; CHECK-NEXT:    ret i16 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[T21:%.*]] = sub i16 0, [[TMP1]]
+; CHECK-NEXT:    ret i16 [[T21]]
 ;
   %t1 = uitofp i8 %a to float
   %t2 = fneg fast float %t1

llvm/lib/Transforms/Scalar/Float2Int.cpp

github-actions · 2024-01-26T17:05:09Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/include/llvm/Transforms/Scalar/Float2Int.h

llvm/lib/Transforms/Scalar/Float2Int.cpp

AreaZR · 2024-01-26T20:37:53Z

@nikic Fixed!

aeubanks · 2024-03-19T19:26:15Z

I was able to reproduce the difference before and after the revert of the return value being different

edit: on x86-64 with clang -O3

AreaZR · 2024-03-19T19:29:52Z

I was able to reproduce the difference before and after the revert of the return value being different

edit: on x86-64 with clang -O3

Oddly enough, the pass itself doesn't seem to be the issue.

AreaZR · 2024-03-19T19:30:08Z

I was able to reproduce the difference before and after the revert of the return value being different
edit: on x86-64 with clang -O3

Oddly enough, the pass itself doesn't seem to be the issue.

The -passes=float2int one I mean

AreaZR · 2024-03-19T19:31:27Z

The issue in question is not the Float2Int pass.

AreaZR · 2024-03-19T19:31:51Z

PR: only running float2int pass
PRO: Running -O3

AreaZR · 2024-03-19T19:35:39Z

Does anyone have a full list of passes that I can try to see is the culprit?

alexfh · 2024-03-19T20:13:25Z

As far as clang is concerned, you can use LLVM options -opt-bisect-limit (https://llvm.org/docs/OptBisect.html), -print-before-all, -print-after-all, -print-module-scope, -print-pipeline-passes (all need to be prepended with -mllvm when passed to clang).

Apart from that, the Opt Pipeline view in the compiler explorer may be useful: https://godbolt.org/z/MY38Wc5dn

alexfh · 2024-03-19T20:27:11Z

Oddly enough, the pass itself doesn't seem to be the issue.

Sad but true: this is a usual situation, when a seemingly correct change in one pass uncovers issues in one or more of the following passes in the pipeline. This may feel like playing pick up sticks at times, but we see this regularly =(

jrtc27 · 2024-03-20T00:07:24Z

The issue in question is not the Float2Int pass.

Well most passes are pretty uninteresting if you run them on pre-SROA/mem2reg IR. Looking at the later godbolt link, there's the following transform for Float2IntPass:

Before:

define dso_local noundef i32 @main() local_unnamed_addr {
entry:
  %x = alloca i32, align 4
    #dbg_assign(i1 undef, !18, !DIExpression(), !21, ptr %x, !DIExpression(), !22)
  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x)
  store volatile i32 1, ptr %x, align 4
    #dbg_assign(i32 1, !18, !DIExpression(), !29, ptr %x, !DIExpression(), !22)
  %x.0.x.0.x.0. = load volatile i32, ptr %x, align 4
  %cmp = icmp sgt i32 %x.0.x.0.x.0., 0
    #dbg_value(i1 %cmp, !20, !DIExpression(DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_stack_value), !22)
  %conv = uitofp i1 %cmp to float
  %mul = fmul float %conv, 2.550000e+02
  %conv1 = fptoui float %mul to i8
  %conv2 = zext i8 %conv1 to i32
  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x)
  ret i32 %conv2
}

After:

define dso_local noundef i32 @main() local_unnamed_addr {
entry:
  %x = alloca i32, align 4
    #dbg_assign(i1 undef, !18, !DIExpression(), !21, ptr %x, !DIExpression(), !22)
  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x)
  store volatile i32 1, ptr %x, align 4
    #dbg_assign(i32 1, !18, !DIExpression(), !29, ptr %x, !DIExpression(), !22)
  %x.0.x.0.x.0. = load volatile i32, ptr %x, align 4
  %cmp = icmp sgt i32 %x.0.x.0.x.0., 0
    #dbg_value(i1 %cmp, !20, !DIExpression(DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_stack_value), !22)
  %0 = zext i1 %cmp to i8
  %mul3 = mul i8 %0, 127
  %conv2 = zext i8 %mul3 to i32
  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x)
  ret i32 %conv2
}

This is why you've received feedback about these kinds of small changes being awkward to review. When they don't offer significant gains, they are often outweighed by the high risk that something will break, whether that's because the change in question is incorrect or because it exposes incorrect code elsewhere.

jrtc27 · 2024-03-20T00:13:05Z

At a glance it looks like confusion about whether or not there's a sign bit.

jrtc27 · 2024-03-20T00:20:05Z

This seems to be a preexisting issue that you can hit if you pick specific float values: https://godbolt.org/z/rxdqrve3d

It's just hit more often now because you're picking smaller integers. Before you had to be in the range [2^31, 2^32), converted to a double, but now it's true for [2^7, 2^8) and [2^15, 2^16), both of which also fit in a single-precision float, unlike the prior case.

jrtc27 · 2024-03-20T00:25:09Z

Compiler Explorer shows this bug as being reproducible with that exact test case all the way back to Clang 3.7 (but not 3.6). Given Float2Int was new in 3.7 (at least from GitHub's view of available tags) that strongly suggests to me this bug has always been there since it was added to the tree (or at least the final attempt at adding it to the tree).

topperc · 2024-03-20T23:44:39Z

I found a bug in the ConstantRange handling UIToFP and SIToFP handling which may explain the failure #86041

We were passing the min and max values of the range to the ConstantRange constructor, but the constructor expects the upper bound to 1 more than the max value so we need to add 1. We also need to use getNonEmpty so that passing 0, 0 to the constructor creates a full range rather than an empty range. And passing smin, smax+1 doesn't cause an assertion. I believe this fixes at least some of the reason llvm#79158 was reverted.

) We were passing the min and max values of the range to the ConstantRange constructor, but the constructor expects the upper bound to 1 more than the max value so we need to add 1. We also need to use getNonEmpty so that passing 0, 0 to the constructor creates a full range rather than an empty range. And passing smin, smax+1 doesn't cause an assertion. I believe this fixes at least some of the reason #79158 was reverted.

…m#86041) We were passing the min and max values of the range to the ConstantRange constructor, but the constructor expects the upper bound to 1 more than the max value so we need to add 1. We also need to use getNonEmpty so that passing 0, 0 to the constructor creates a full range rather than an empty range. And passing smin, smax+1 doesn't cause an assertion. I believe this fixes at least some of the reason llvm#79158 was reverted. (cherry picked from commit 1283646)

…m#86041) We were passing the min and max values of the range to the ConstantRange constructor, but the constructor expects the upper bound to 1 more than the max value so we need to add 1. We also need to use getNonEmpty so that passing 0, 0 to the constructor creates a full range rather than an empty range. And passing smin, smax+1 doesn't cause an assertion. I believe this fixes at least some of the reason llvm#79158 was reverted.

…fits" (llvm#85843) Reverts llvm#79158, which causes a miscompile. See llvm#79158 (comment)

…m#86041) We were passing the min and max values of the range to the ConstantRange constructor, but the constructor expects the upper bound to 1 more than the max value so we need to add 1. We also need to use getNonEmpty so that passing 0, 0 to the constructor creates a full range rather than an empty range. And passing smin, smax+1 doesn't cause an assertion. I believe this fixes at least some of the reason llvm#79158 was reverted.

…m#86041) We were passing the min and max values of the range to the ConstantRange constructor, but the constructor expects the upper bound to 1 more than the max value so we need to add 1. We also need to use getNonEmpty so that passing 0, 0 to the constructor creates a full range rather than an empty range. And passing smin, smax+1 doesn't cause an assertion. I believe this fixes at least some of the reason llvm#79158 was reverted. (cherry picked from commit 1283646)

llvmbot added the llvm:transforms label Jan 23, 2024

AreaZR force-pushed the size branch 7 times, most recently from 590d9f6 to 0f671c9 Compare January 26, 2024 16:39

nikic reviewed Jan 26, 2024

View reviewed changes

llvm/lib/Transforms/Scalar/Float2Int.cpp Outdated Show resolved Hide resolved

AreaZR force-pushed the size branch 2 times, most recently from 1b1021d to aef840a Compare January 26, 2024 17:02

AreaZR force-pushed the size branch from aef840a to 57da5f9 Compare January 26, 2024 17:39

AreaZR requested a review from nikic January 26, 2024 17:41

AreaZR force-pushed the size branch 3 times, most recently from 80a9b71 to 4d74e6d Compare January 26, 2024 18:36

nikic reviewed Jan 26, 2024

View reviewed changes

llvm/include/llvm/Transforms/Scalar/Float2Int.h Outdated Show resolved Hide resolved

llvm/lib/Transforms/Scalar/Float2Int.cpp Show resolved Hide resolved

AreaZR force-pushed the size branch 2 times, most recently from 4175133 to b238489 Compare January 26, 2024 20:37

AreaZR requested a review from nikic January 26, 2024 20:37

AreaZR force-pushed the size branch 7 times, most recently from ea8d0a5 to 6d156a2 Compare January 29, 2024 18:49

topperc mentioned this pull request Mar 20, 2024

[ConstantRange] Fix off by 1 bugs in UIToFP and SIToFP handling. #86041

Merged

chencha3 pushed a commit to chencha3/llvm-project that referenced this pull request Mar 23, 2024

Revert "[Float2Int] Resolve FIXME: Pick the smallest legal type that …

63b2ca5

…fits" (llvm#85843) Reverts llvm#79158, which causes a miscompile. See llvm#79158 (comment)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[Float2Int] Resolve FIXME: Pick the smallest legal type that fits #79158

[Float2Int] Resolve FIXME: Pick the smallest legal type that fits #79158

AreaZR commented Jan 23, 2024 •

edited

Loading

llvmbot commented Jan 23, 2024

github-actions bot commented Jan 26, 2024 •

edited

Loading

AreaZR commented Jan 26, 2024

aeubanks commented Mar 19, 2024 •

edited

Loading

AreaZR commented Mar 19, 2024

AreaZR commented Mar 19, 2024

AreaZR commented Mar 19, 2024

AreaZR commented Mar 19, 2024

AreaZR commented Mar 19, 2024

alexfh commented Mar 19, 2024

alexfh commented Mar 19, 2024

jrtc27 commented Mar 20, 2024

jrtc27 commented Mar 20, 2024

jrtc27 commented Mar 20, 2024

jrtc27 commented Mar 20, 2024

topperc commented Mar 20, 2024 •

edited

Loading

[Float2Int] Resolve FIXME: Pick the smallest legal type that fits #79158

[Float2Int] Resolve FIXME: Pick the smallest legal type that fits #79158

Conversation

AreaZR commented Jan 23, 2024 • edited Loading

llvmbot commented Jan 23, 2024

github-actions bot commented Jan 26, 2024 • edited Loading

AreaZR commented Jan 26, 2024

aeubanks commented Mar 19, 2024 • edited Loading

AreaZR commented Mar 19, 2024

AreaZR commented Mar 19, 2024

AreaZR commented Mar 19, 2024

AreaZR commented Mar 19, 2024

AreaZR commented Mar 19, 2024

alexfh commented Mar 19, 2024

alexfh commented Mar 19, 2024

jrtc27 commented Mar 20, 2024

jrtc27 commented Mar 20, 2024

jrtc27 commented Mar 20, 2024

jrtc27 commented Mar 20, 2024

topperc commented Mar 20, 2024 • edited Loading

AreaZR commented Jan 23, 2024 •

edited

Loading

github-actions bot commented Jan 26, 2024 •

edited

Loading

aeubanks commented Mar 19, 2024 •

edited

Loading

topperc commented Mar 20, 2024 •

edited

Loading