-
Notifications
You must be signed in to change notification settings - Fork 12.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SimplifyCFG] Consider a cross signed max-min table in switchToLookupTable
#67885
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms ChangesMigration from D156612. I created a lookup table to find the shortest lookup table on "the circle based on the bit width all values". Closes #64231. Patch is 121.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/67885.diff 6 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 35fead111aa9666..dba20f3dca9f876 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -6378,18 +6378,20 @@ ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
}
static bool ShouldUseSwitchConditionAsTableIndex(
- ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
+ ConstantInt &BeginCaseVal, const ConstantInt &EndCaseVal,
bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
const DataLayout &DL, const TargetTransformInfo &TTI) {
- if (MinCaseVal.isNullValue())
+ if (BeginCaseVal.isNullValue())
return true;
- if (MinCaseVal.isNegative() ||
- MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
+ if (BeginCaseVal.getValue().sge(EndCaseVal.getValue()))
+ return false;
+ if (BeginCaseVal.isNegative() ||
+ EndCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
!HasDefaultResults)
return false;
return all_of(ResultTypes, [&](const auto &KV) {
return SwitchLookupTable::WouldFitInRegister(
- DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
+ DL, EndCaseVal.getLimitedValue() + 1 /* TableSize */,
KV.second /* ResultType */);
});
}
@@ -6478,7 +6480,7 @@ static void reuseTableCompare(
/// If the switch is only used to initialize one or more phi nodes in a common
/// successor block with different constant values, replace the switch with
/// lookup tables.
-static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
+static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
DomTreeUpdater *DTU, const DataLayout &DL,
const TargetTransformInfo &TTI) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
@@ -6506,9 +6508,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Figure out the corresponding result for each case value and phi node in the
// common destination, as well as the min and max case values.
assert(!SI->cases().empty());
- SwitchInst::CaseIt CI = SI->case_begin();
- ConstantInt *MinCaseVal = CI->getCaseValue();
- ConstantInt *MaxCaseVal = CI->getCaseValue();
BasicBlock *CommonDest = nullptr;
@@ -6519,17 +6518,60 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
SmallDenseMap<PHINode *, Type *> ResultTypes;
SmallVector<PHINode *, 4> PHIs;
- for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
- ConstantInt *CaseVal = CI->getCaseValue();
- if (CaseVal->getValue().slt(MinCaseVal->getValue()))
- MinCaseVal = CaseVal;
- if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
- MaxCaseVal = CaseVal;
+ SmallVector<ConstantInt *, 8> CaseVals;
+ for (auto CI : SI->cases()) {
+ ConstantInt *CaseVal = CI.getCaseValue();
+ CaseVals.push_back(CaseVal);
+ }
+
+ // We want to find a range of indexes that will create the minimal table.
+ // We can treat all possible index values as a circle. For example, the i8 is
+ // [-128, -1] and [0, 127]. After that find the minimal range from this circle
+ // that can cover all exist values. First, create an incrementing sequence.
+ llvm::sort(CaseVals, [](const ConstantInt *A, const ConstantInt *B) {
+ return A->getValue().slt(B->getValue());
+ });
+ auto *CaseValIter = CaseVals.begin();
+ // We start by using the begin and end as the minimal table.
+ ConstantInt *BeginCaseVal = *CaseValIter;
+ ConstantInt *EndCaseVal = *CaseVals.rbegin();
+ bool RangeOverflow = false;
+ uint64_t MinTableSize = EndCaseVal->getValue()
+ .ssub_ov(BeginCaseVal->getValue(), RangeOverflow)
+ .getLimitedValue() +
+ 1;
+ // If there is no overflow, then this must be the minimal table.
+ if (RangeOverflow) {
+ auto MaxValue = APInt::getMaxValue(BeginCaseVal->getBitWidth());
+ while (CaseValIter != CaseVals.end()) {
+ auto *CurrentCaseVal = *CaseValIter++;
+ if (CaseValIter == CaseVals.end()) {
+ break;
+ }
+ ConstantInt *NextCaseVal = *CaseValIter;
+ auto NextVal = NextCaseVal->getValue();
+ auto CurVal = CurrentCaseVal->getValue();
+ uint64_t RequireTableSize =
+ (MaxValue - (NextVal - CurVal) + 1).getLimitedValue() + 1;
+ // FIXME: When there is more than one minimal table, we can choose the
+ // best one. The current simple strategy may not be the best.
+ if (((RequireTableSize < MinTableSize) ||
+ (RequireTableSize == MinTableSize &&
+ NextCaseVal->getValue().isZero()))) {
+ BeginCaseVal = NextCaseVal;
+ EndCaseVal = CurrentCaseVal;
+ MinTableSize = RequireTableSize;
+ }
+ }
+ }
+
+ for (const auto CI : SI->cases()) {
+ ConstantInt *CaseVal = CI.getCaseValue();
// Resulting value at phi nodes for this case value.
using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
ResultsTy Results;
- if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
+ if (!getCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
Results, DL, TTI))
return false;
@@ -6564,13 +6606,12 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
}
bool UseSwitchConditionAsTableIndex = ShouldUseSwitchConditionAsTableIndex(
- *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
+ *BeginCaseVal, *EndCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
uint64_t TableSize;
if (UseSwitchConditionAsTableIndex)
- TableSize = MaxCaseVal->getLimitedValue() + 1;
+ TableSize = EndCaseVal->getLimitedValue() + 1;
else
- TableSize =
- (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
+ TableSize = MinTableSize;
bool TableHasHoles = (NumResults < TableSize);
bool NeedMask = (TableHasHoles && !HasDefaultResults);
@@ -6589,7 +6630,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Compute the maximum table size representable by the integer type we are
// switching upon.
- unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
+ unsigned CaseSize = BeginCaseVal->getType()->getPrimitiveSizeInBits();
uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
assert(MaxTableSize >= TableSize &&
"It is impossible for a switch to have more entries than the max "
@@ -6612,15 +6653,17 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
Value *TableIndex;
ConstantInt *TableIndexOffset;
if (UseSwitchConditionAsTableIndex) {
- TableIndexOffset = ConstantInt::get(MaxCaseVal->getType(), 0);
+ TableIndexOffset = ConstantInt::get(EndCaseVal->getType(), 0);
TableIndex = SI->getCondition();
} else {
- TableIndexOffset = MinCaseVal;
+ TableIndexOffset = BeginCaseVal;
// If the default is unreachable, all case values are s>= MinCaseVal. Then
// we can try to attach nsw.
bool MayWrap = true;
- if (!DefaultIsReachable) {
- APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
+ if (!DefaultIsReachable &&
+ EndCaseVal->getValue().sge(BeginCaseVal->getValue())) {
+ APInt Res =
+ EndCaseVal->getValue().ssub_ov(BeginCaseVal->getValue(), MayWrap);
(void)Res;
}
@@ -6639,7 +6682,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// PHI value for the default case in case we're using a bit mask.
} else {
Value *Cmp = Builder.CreateICmpULT(
- TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
+ TableIndex, ConstantInt::get(BeginCaseVal->getType(), TableSize));
RangeCheckBranch =
Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
if (DTU)
@@ -6883,7 +6926,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// CVP. Therefore, only apply this transformation during late stages of the
// optimisation pipeline.
if (Options.ConvertSwitchToLookupTable &&
- SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
+ switchToLookupTable(SI, Builder, DTU, DL, TTI))
return requestResimplify();
if (ReduceSwitchRange(SI, Builder, DL, TTI))
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/CoveredLookupTable.ll b/llvm/test/Transforms/SimplifyCFG/X86/CoveredLookupTable.ll
index 56fa249d23bba2d..6d72fdb7085159c 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/CoveredLookupTable.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/CoveredLookupTable.ll
@@ -9,12 +9,13 @@ target triple = "x86_64-apple-darwin12.0.0"
define i3 @coveredswitch_test(i3 %input) {
; CHECK-LABEL: @coveredswitch_test(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[INPUT:%.*]], -4
-; CHECK-NEXT: [[SWITCH_CAST:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i24
-; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i24 [[SWITCH_CAST]], 3
-; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i24 7507338, [[SWITCH_SHIFTAMT]]
-; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i24 [[SWITCH_DOWNSHIFT]] to i3
-; CHECK-NEXT: ret i3 [[SWITCH_MASKED]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i3 [[INPUT:%.*]], -2
+; CHECK-NEXT: [[SWITCH_CAST:%.*]] = zext i3 [[INPUT]] to i18
+; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i18 [[SWITCH_CAST]], 3
+; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i18 42792, [[SWITCH_SHIFTAMT]]
+; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i18 [[SWITCH_DOWNSHIFT]] to i3
+; CHECK-NEXT: [[RESULT:%.*]] = select i1 [[TMP0]], i3 [[SWITCH_MASKED]], i3 -2
+; CHECK-NEXT: ret i3 [[RESULT]]
;
entry:
switch i3 %input, label %bb8 [
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
index cf244e6b63457c3..8e291b025113d85 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
@@ -9,11 +9,17 @@ target triple = "x86_64-apple-darwin12.0.0"
define i64 @test(i3 %arg) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[ARG:%.*]], -4
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[ARG:%.*]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i3 [[SWITCH_TABLEIDX]], -2
+; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[DEFAULT:%.*]]
+; CHECK: switch.lookup:
; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i64], ptr @switch.table.test, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i64], ptr @switch.table.test, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i64, ptr [[SWITCH_GEP]], align 8
-; CHECK-NEXT: [[V3:%.*]] = add i64 [[SWITCH_LOAD]], 0
+; CHECK-NEXT: br label [[DEFAULT]]
+; CHECK: Default:
+; CHECK-NEXT: [[V1:%.*]] = phi i64 [ 8, [[ENTRY:%.*]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ]
+; CHECK-NEXT: [[V3:%.*]] = add i64 [[V1]], 0
; CHECK-NEXT: ret i64 [[V3]]
;
entry:
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
index 37001f4fba2aa84..75cee53d230374d 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
@@ -9,11 +9,8 @@ target triple = "x86_64-apple-darwin12.0.0"
define i64 @_TFO6reduce1E5toRawfS0_FT_Si(i2) {
; CHECK-LABEL: @_TFO6reduce1E5toRawfS0_FT_Si(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i2 [[TMP0:%.*]], -2
-; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i2 [[SWITCH_TABLEIDX]] to i3
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i64], ptr @switch.table._TFO6reduce1E5toRawfS0_FT_Si, i32 0, i3 [[SWITCH_TABLEIDX_ZEXT]]
-; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i64, ptr [[SWITCH_GEP]], align 8
-; CHECK-NEXT: ret i64 [[SWITCH_LOAD]]
+; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = zext i2 [[TMP0:%.*]] to i64
+; CHECK-NEXT: ret i64 [[SWITCH_IDX_CAST]]
;
entry:
switch i2 %0, label %1 [
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 3873f0c0ae0bbd5..b05b013a96f8454 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -115,6 +115,121 @@ return:
}
+; The minimal table range is [122, -128]([122, 128]).
+
+define i32 @f_i8_128(i8 %c) {
+; CHECK-LABEL: @f_i8_128(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i8 [[C:%.*]], 122
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i8 [[SWITCH_TABLEIDX]], 7
+; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK: switch.lookup:
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.f_i8_128, i32 0, i8 [[SWITCH_TABLEIDX]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ 15, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+entry:
+ switch i8 %c, label %sw.default [
+ i8 122, label %return
+ i8 123, label %sw.bb1
+ i8 124, label %sw.bb2
+ i8 125, label %sw.bb3
+ i8 126, label %sw.bb4
+ i8 127, label %sw.bb5
+ i8 -128, label %sw.bb6
+ ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.bb5: br label %return
+sw.bb6: br label %return
+sw.default: br label %return
+return:
+ %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
+ ret i32 %retval.0
+}
+
+; The minimal table range is [3, 0].
+
+define i32 @f_min_max(i3 %c) {
+; CHECK-LABEL: @f_min_max(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[C:%.*]], 3
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i3 [[SWITCH_TABLEIDX]], -2
+; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK: switch.lookup:
+; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i32], ptr @switch.table.f_min_max, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ 15, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+entry:
+ switch i3 %c, label %sw.default [
+ i3 -4, label %return
+ i3 -3, label %sw.bb1
+ i3 -2, label %sw.bb2
+ i3 -1, label %sw.bb3
+ i3 0, label %sw.bb4
+ i3 3, label %sw.bb6
+ ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.bb6: br label %return
+sw.default: br label %return
+return:
+ %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
+ ret i32 %retval.0
+}
+
+; The minimal table range is [-1, -4].
+
+define i32 @f_min_max_2(i3 %c) {
+; CHECK-LABEL: @f_min_max_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[C:%.*]], -1
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i3 [[SWITCH_TABLEIDX]], -2
+; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK: switch.lookup:
+; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i32], ptr @switch.table.f_min_max_2, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ 15, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+entry:
+ switch i3 %c, label %sw.default [
+ i3 -1, label %return
+ i3 0, label %sw.bb1
+ i3 1, label %sw.bb2
+ i3 2, label %sw.bb3
+ i3 3, label %sw.bb4
+ i3 -4, label %sw.bb6
+ ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.bb6: br label %return
+sw.default: br label %return
+return:
+ %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
+ ret i32 %retval.0
+}
+
; A switch used to initialize two variables, an i8 and a float.
declare void @dummy(i8 signext, float)
@@ -1538,14 +1653,11 @@ end:
define i32 @covered_switch_with_bit_tests(i3) {
; CHECK-LABEL: @covered_switch_with_bit_tests(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[TMP0:%.*]], -4
-; CHECK-NEXT: [[SWITCH_MASKINDEX:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i8
-; CHECK-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i8 -61, [[SWITCH_MASKINDEX]]
-; CHECK-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1
-; CHECK-NEXT: br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[L6:%.*]]
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[TMP0:%.*]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i3 [[SWITCH_TABLEIDX]], -4
+; CHECK-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[L6:%.*]]
; CHECK: switch.lookup:
-; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.covered_switch_with_bit_tests, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.covered_switch_with_bit_tests, i32 0, i3 [[SWITCH_TABLEIDX]]
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
; CHECK-NEXT: br label [[L6]]
; CHECK: l6:
@@ -1696,11 +1808,10 @@ define i32 @signed_overflow1(i8 %n) {
; CHECK-LABEL: @signed_overflow1(
; CHECK-NEXT: start:
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[N:%.*]] to i2
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i2 [[TRUNC]], -2
-; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i2 [[SWITCH_TABLEIDX]] to i3
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.signed_overflow1, i32 0, i3 [[SWITCH_TABLEIDX_ZEXT]]
-; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
-; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
+; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = zext i2 [[TRUNC]] to i32
+; CHECK-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[SWITCH_IDX_CAST]], 1111
+; CHECK-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i32 [[SWITCH_IDX_MULT]], 1111
+; CHECK-NEXT: ret i32 [[SWITCH_OFFSET]]
;
start:
%trunc = trunc i8 %n to i2
@@ -1732,20 +1843,11 @@ define i32 @signed_overflow2(i8 %n) {
; CHECK-LABEL: @signed_overflow2(
; CHECK-NEXT: start:
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[N:%.*]] to i2
-; CHECK-NEXT: switch i2 [[TRUNC]], label [[BB1:%.*]] [
-; CHECK-NEXT: i2 1, label [[BB6:%.*]]
-; CHECK-NEXT: i2 -2, label [[BB4:%.*]]
-; CHECK-NEXT: i2 -1, label [[BB5:%.*]]
-; CHECK-NEXT: ]
-; CHECK: bb1:
-; CHECK-NEXT: unreachable
-; CHECK: bb4:
-; CHECK-NEXT: br label [[BB6]]
-; CHECK: bb5:
-; CHECK-NEXT: br label [[BB6]]
-; CHECK: bb6:
-; CHECK-NEXT: [[DOTSROA_0_0:%.*]] = phi i32 [ 4444, [[BB5]] ], [ 3333, [[BB4]] ], [ 2222, [[START:%.*]] ]
-; CHECK-NEXT: ret i32 [[DOTSROA_0_0]]
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i2 [[TRUNC]], 1
+; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = zext i2 [[SWITCH_TABLEIDX]] to i32
+; CHECK-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[SWITCH_IDX_CAST]], 1111
+; CHECK-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i32 [[SWITCH_IDX_MULT]], 2222
+; CHECK-NEXT: ret i32...
[truncated]
|
|
||
// We want to find a range of indexes that will create the minimal table. | ||
// We can treat all possible index values as a circle. For example, the i8 is | ||
// [-128, -1] and [0, 127]. After that find the minimal range from this circle |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand this comment. Especially what "the i8 is [-128, -1] and [0, 127]" has do with "circles".
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated.
When considering overflow, the next indexed value of the maximum signed integer is the minimum signed integer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new comment is below:
We consider cases where the starting to the endpoint will cross the signed max and min. For example, for the i8 range [-128, -127, 126, 127]
, we choose from 126 to -127. The length of the lookup table is 4.
; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[ARG:%.*]], -4 | ||
; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[ARG:%.*]], 1 | ||
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i3 [[SWITCH_TABLEIDX]], -2 | ||
; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[DEFAULT:%.*]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems like a problematic regression, because it introduces a fallback branch. Why does this happen?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe. But I think it could also be a lucky result. We found the smallest jump table. The GEP has changed from [8 x i64]
to [6 x i64]
.
@f_min_max_2
,@f_min_max
,@test
, and@coveredswitch_test
look to become worse. But their table size did get smaller.
I rechecked@f_min_max_2
,@f_min_max
and@test
. They become fromtable with holes
totable without holes
. It's a "perfect" table?
I've also found that they luckily have a max and min value. Then we created a cover table.
If we want to deal with this situation, maybe we should add a condition to extend to a cover table (/revert the smallest table).
@coveredswitch_test
also luckily have a max and min value.
From https://reviews.llvm.org/D156612#4585083.
3aba918
to
6082276
Compare
; CHECK-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[SWITCH_IDX_CAST]], 1111 | ||
; CHECK-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i32 [[SWITCH_IDX_MULT]], 1111 | ||
; CHECK-NEXT: ret i32 [[SWITCH_OFFSET]] | ||
; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i2 [[TRUNC]] to i3 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Regression here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
signed_overflow1
is the opposite example. So I think it was just a lucky result.
I think this should be another optimization. After this PR, the minimal table provides all the valid index values. Then select the optimal index value on the ShouldUseSwitchConditionAsTableIndex
and SwitchLookupTable::BuildLookup
.
switchToLookupTable
switchToLookupTable
MinCaseVal = CaseVal; | ||
if (CaseVal->getValue().sgt(MaxCaseVal->getValue())) | ||
MaxCaseVal = CaseVal; | ||
SmallVector<ConstantInt *, 8> CaseVals; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you reserve num cases?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I changed to
SmallVector<ConstantInt *, 8> CaseVals(llvm::map_range(
SI->cases(), [](const auto &C) { return C.getCaseValue(); }));
.getLimitedValue() + | ||
1; | ||
// If there is no overflow, then this must be the minimal table. | ||
if (RangeOverflow) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should there be some concept of a good step value so that indexing can be efficient?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, this also introduces some regressions that I plan to address in this PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The selection of the index is no longer important.
When there exists more than one range satisfying the minimum lookup table, this must be a cover table. I will create another PR to address this issue.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I put it in #73269.
I apologize for the delayed response to the PR. I have resolved all regressions. |
e62ef9e
to
2920342
Compare
Resolve conflicts and ping. |
2920342
to
83c7304
Compare
Ping. |
83c7304
to
4eaa728
Compare
Rebase and ping. |
Ping. |
4eaa728
to
baaaec1
Compare
switchToLookupTable
switchToLookupTable
Ping. I improved the code a bit. |
LookupTableSize = RequireTableSize; | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IIUC this is looking for max distance between any two values. If so think this can be simplified as:
ConstantInt *BeginCaseVal = nullptr;
ConstantInt *EndCaseVal = nullptr;
uint64_t LookupTableSize = 0;
APInt UnsignedDif = UnsignedMax(CaseVals) - UnsignedMin(CaseVals);
APInt SignedDif = SignedMax(CaseVals) - SignedMin(CaseVals);
if (ConsiderCrossSignedMaxMinTable && UnsignedDif.ult(SignedDif)) {
// Cross signed min case
BeginCaseVal = UnsignedMin(CaseVals);
EndCaseVal = UnsignedMax(CaseVals);
LookupTableSize = UnsignedDif.getLimitedValue() + 1;
} else {
// Crossing 0 case
BeginCaseVal = SignedMin(CaseVals);
EndCaseVal = SignedMax(CaseVals);
LookupTableSize = SignedDif.getLimitedValue() + 1;
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It makes sense to me.
Fixes #64231.
Migration from D156612.
If we can't use the signed min-max to build a lookup table, then try a lookup table that crosses the signed min-max range, which can be a smaller table.