Skip to content

Commit

Permalink
Merged master:e56103d25016 into amd-gfx:33ce16c41ea2
Browse files Browse the repository at this point in the history
Local branch amd-gfx 33ce16c Merged master:7fa8b629208c into amd-gfx:3c4445307114
Remote branch master e56103d [InstCombine] add multi-use demanded bits fold for add with low-bit mask
  • Loading branch information
Sw authored and Sw committed Nov 15, 2020
2 parents 33ce16c + e56103d commit 1c08cad
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 11 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Analysis/AliasAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
unsigned ArgIdx = std::distance(Call->arg_begin(), AI);
MemoryLocation ArgLoc =
MemoryLocation::getForArgument(Call, ArgIdx, TLI);
AliasResult ArgAlias = alias(ArgLoc, Loc);
AliasResult ArgAlias = alias(ArgLoc, Loc, AAQI);
if (ArgAlias != NoAlias) {
ModRefInfo ArgMask = getArgModRefInfo(Call, ArgIdx);
AllArgsMask = unionModRef(AllArgsMask, ArgMask);
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,21 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
// do simplifications that apply to *just* the one user if we know that
// this instruction has a simpler value in that context.
switch (I->getOpcode()) {
case Instruction::Add: {
// TODO: Allow undefs and/or non-splat vectors.
const APInt *C;
if (match(I->getOperand(1), m_APInt(C))) {
// Right fill the demanded bits for this add to demand the most
// significant demanded bit and all those below it.
unsigned Ctlz = DemandedMask.countLeadingZeros();
APInt LowMask(APInt::getLowBitsSet(BitWidth, BitWidth - Ctlz));
// If we are adding zeros to every bit below the highest demanded bit,
// just return the add's variable operand.
if ((*C & LowMask).isNullValue())
return I->getOperand(0);
}
break;
}
case Instruction::And: {
// If either the LHS or the RHS are Zero, the result is zero.
computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
Expand Down
38 changes: 36 additions & 2 deletions llvm/test/Transforms/InstCombine/and.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1049,11 +1049,13 @@ define <2 x i32> @lowmask_sext_in_reg_splat(<2 x i32> %x, <2 x i32>* %p) {
ret <2 x i32> %and
}

; Multi-use demanded bits - 'add' doesn't change 'and'

define i8 @lowmask_add(i8 %x) {
; CHECK-LABEL: @lowmask_add(
; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], -64
; CHECK-NEXT: call void @use8(i8 [[A]])
; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], 32
; CHECK-NEXT: [[R:%.*]] = and i8 [[X]], 32
; CHECK-NEXT: ret i8 [[R]]
;
%a = add i8 %x, -64 ; 0xc0
Expand All @@ -1062,6 +1064,8 @@ define i8 @lowmask_add(i8 %x) {
ret i8 %r
}

; Negative test - mask overlaps low bit of add

define i8 @not_lowmask_add(i8 %x) {
; CHECK-LABEL: @not_lowmask_add(
; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], -64
Expand All @@ -1075,6 +1079,8 @@ define i8 @not_lowmask_add(i8 %x) {
ret i8 %r
}

; Negative test - mask overlaps low bit of add

define i8 @not_lowmask_add2(i8 %x) {
; CHECK-LABEL: @not_lowmask_add2(
; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], -96
Expand All @@ -1088,15 +1094,43 @@ define i8 @not_lowmask_add2(i8 %x) {
ret i8 %r
}

; Multi-use demanded bits - 'add' doesn't change 'and'

define <2 x i8> @lowmask_add_splat(<2 x i8> %x, <2 x i8>* %p) {
; CHECK-LABEL: @lowmask_add_splat(
; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], <i8 -64, i8 -64>
; CHECK-NEXT: store <2 x i8> [[A]], <2 x i8>* [[P:%.*]], align 2
; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[A]], <i8 32, i8 32>
; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X]], <i8 32, i8 32>
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%a = add <2 x i8> %x, <i8 -64, i8 -64> ; 0xc0
store <2 x i8> %a, <2 x i8>* %p
%r = and <2 x i8> %a, <i8 32, i8 32> ; 0x20
ret <2 x i8> %r
}

define <2 x i8> @lowmask_add_splat_undef(<2 x i8> %x, <2 x i8>* %p) {
; CHECK-LABEL: @lowmask_add_splat_undef(
; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], <i8 -64, i8 undef>
; CHECK-NEXT: store <2 x i8> [[A]], <2 x i8>* [[P:%.*]], align 2
; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[A]], <i8 undef, i8 32>
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%a = add <2 x i8> %x, <i8 -64, i8 undef> ; 0xc0
store <2 x i8> %a, <2 x i8>* %p
%r = and <2 x i8> %a, <i8 undef, i8 32> ; 0x20
ret <2 x i8> %r
}

define <2 x i8> @lowmask_add_vec(<2 x i8> %x, <2 x i8>* %p) {
; CHECK-LABEL: @lowmask_add_vec(
; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], <i8 -96, i8 -64>
; CHECK-NEXT: store <2 x i8> [[A]], <2 x i8>* [[P:%.*]], align 2
; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[A]], <i8 16, i8 32>
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%a = add <2 x i8> %x, <i8 -96, i8 -64> ; 0xe0, 0xc0
store <2 x i8> %a, <2 x i8>* %p
%r = and <2 x i8> %a, <i8 16, i8 32> ; 0x10, 0x20
ret <2 x i8> %r
}
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
; AUTO_VEC-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP1]], 96
; AUTO_VEC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]]
; AUTO_VEC: vector.ph.new:
; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP3]], 1152921504606846972
; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP3]], -4
; AUTO_VEC-NEXT: br label [[VECTOR_BODY:%.*]]
; AUTO_VEC: vector.body:
; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -306,7 +306,7 @@ define double @external_use_with_fast_math(double* %a, i64 %n) {
; AUTO_VEC-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP2]], 48
; AUTO_VEC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]]
; AUTO_VEC: vector.ph.new:
; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP4]], 2305843009213693948
; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP4]], -4
; AUTO_VEC-NEXT: br label [[VECTOR_BODY:%.*]]
; AUTO_VEC: vector.body:
; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY]] ]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) {
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/runtime-check.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function foo
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
; RUN: opt < %s -loop-vectorize -disable-basic-aa -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s -check-prefix=FORCED_OPTSIZE

Expand Down Expand Up @@ -32,7 +32,7 @@ define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtab
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]], [[DBG9]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]], [[DBG9]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588, [[DBG9]]
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4, [[DBG9]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]], [[DBG9]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [[DBG9]]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ define void @vdiv(double* %x, double* %y, double %a, i32 %N) #0 {
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], 12
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]]
; CHECK: vector.ph.new:
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP2]], 9223372036854775804
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP2]], -4
; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP6:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT]]
Expand Down
4 changes: 2 additions & 2 deletions openmp/runtime/tools/generate-def.pl
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ (\%)
foreach my $entry ( keys( %$entries ) ) {
if ( not $entries->{ $entry }->{ obsolete } ) {
my $ordinal = $entries->{ $entry }->{ ordinal };
# omp_alloc and omp_free are C/C++ only functions, skip "1000+ordinal" for them
if ( $entry =~ m{\A[ok]mp_} and $entry ne "omp_alloc" and $entry ne "omp_free" ) {
# omp_alloc, omp_calloc and omp_free are C/C++ only functions, skip "1000+ordinal" for them
if ( $entry =~ m{\A[ok]mp_} and $entry ne "omp_alloc" and $entry ne "omp_calloc" and $entry ne "omp_free" ) {
if ( not defined( $ordinal ) ) {
runtime_error(
"Bad entry \"$entry\": ordinal number is not specified."
Expand Down

0 comments on commit 1c08cad

Please sign in to comment.