Skip to content

Commit

Permalink
[InstCombine] matchRotate - add support for matching general funnel s…
Browse files Browse the repository at this point in the history
…hifts with constant shift amounts (PR46896)

First step towards extending the existing rotation support to full funnel shift handling now that the backend legalization support has improved.

This enables us to match the shift by constant cases, which are pretty trivial to expand again if necessary.

D88420 will add non-uniform support for funnel shifts as well once its been finalized.

Differential Revision: https://reviews.llvm.org/D88834
  • Loading branch information
RKSimon committed Oct 8, 2020
1 parent c1fd430 commit e1d4ca0
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 40 deletions.
33 changes: 20 additions & 13 deletions llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2052,39 +2052,45 @@ Instruction *InstCombinerImpl::matchBSwap(BinaryOperator &Or) {
return LastInst;
}

/// Transform UB-safe variants of bitwise rotate to the funnel shift intrinsic.
static Instruction *matchRotate(Instruction &Or) {
/// Match UB-safe variants of the funnel shift intrinsic.
static Instruction *matchFunnelShift(Instruction &Or) {
// TODO: Can we reduce the code duplication between this and the related
// rotate matching code under visitSelect and visitTrunc?
unsigned Width = Or.getType()->getScalarSizeInBits();

// First, find an or'd pair of opposite shifts with the same shifted operand:
// or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1)
// First, find an or'd pair of opposite shifts:
// or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
BinaryOperator *Or0, *Or1;
if (!match(Or.getOperand(0), m_BinOp(Or0)) ||
!match(Or.getOperand(1), m_BinOp(Or1)))
return nullptr;

Value *ShVal, *ShAmt0, *ShAmt1;
if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal), m_Value(ShAmt0)))) ||
!match(Or1, m_OneUse(m_LogicalShift(m_Specific(ShVal), m_Value(ShAmt1)))))
Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
!match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))))
return nullptr;

BinaryOperator::BinaryOps ShiftOpcode0 = Or0->getOpcode();
BinaryOperator::BinaryOps ShiftOpcode1 = Or1->getOpcode();
if (ShiftOpcode0 == ShiftOpcode1)
return nullptr;

// Match the shift amount operands for a rotate pattern. This always matches
// a subtraction on the R operand.
auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * {
// Match the shift amount operands for a funnel shift pattern. This always
// matches a subtraction on the R operand.
auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
// Check for constant shift amounts that sum to the bitwidth.
// TODO: Support non-uniform shift amounts.
const APInt *LC, *RC;
if (match(L, m_APIntAllowUndef(LC)) && match(R, m_APIntAllowUndef(RC)))
if (LC->ult(Width) && RC->ult(Width) && (*LC + *RC) == Width)
return ConstantInt::get(L->getType(), *LC);

// For non-constant cases, the following patterns currently only work for
// rotation patterns.
// TODO: Add general funnel-shift compatible patterns.
if (ShVal0 != ShVal1)
return nullptr;

// For non-constant cases we don't support non-pow2 shift masks.
// TODO: Is it worth matching urem as well?
if (!isPowerOf2_32(Width))
Expand Down Expand Up @@ -2121,7 +2127,8 @@ static Instruction *matchRotate(Instruction &Or) {
(SubIsOnLHS && ShiftOpcode1 == BinaryOperator::Shl);
Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType());
return IntrinsicInst::Create(F, { ShVal, ShVal, ShAmt });
return IntrinsicInst::Create(
F, {IsFshl ? ShVal0 : ShVal1, IsFshl ? ShVal1 : ShVal0, ShAmt});
}

/// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns.
Expand Down Expand Up @@ -2574,8 +2581,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Instruction *BSwap = matchBSwap(I))
return BSwap;

if (Instruction *Rotate = matchRotate(I))
return Rotate;
if (Instruction *Funnel = matchFunnelShift(I))
return Funnel;

if (Instruction *Concat = matchOrConcat(I, Builder))
return replaceInstUsesWith(I, Concat);
Expand Down
38 changes: 11 additions & 27 deletions llvm/test/Transforms/InstCombine/funnel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,14 @@

target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"

; TODO: Canonicalize or(shl,lshr) by constant to funnel shift intrinsics.
; Canonicalize or(shl,lshr) by constant to funnel shift intrinsics.
; This should help cost modeling for vectorization, inlining, etc.
; If a target does not have a fshl instruction, the expansion will
; be exactly these same 3 basic ops (shl/lshr/or).

define i32 @fshl_i32_constant(i32 %x, i32 %y) {
; CHECK-LABEL: @fshl_i32_constant(
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], 11
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[Y:%.*]], 21
; CHECK-NEXT: [[R:%.*]] = or i32 [[SHR]], [[SHL]]
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 11)
; CHECK-NEXT: ret i32 [[R]]
;
%shl = shl i32 %x, 11
Expand All @@ -23,9 +21,7 @@ define i32 @fshl_i32_constant(i32 %x, i32 %y) {

define i42 @fshr_i42_constant(i42 %x, i42 %y) {
; CHECK-LABEL: @fshr_i42_constant(
; CHECK-NEXT: [[SHR:%.*]] = lshr i42 [[X:%.*]], 31
; CHECK-NEXT: [[SHL:%.*]] = shl i42 [[Y:%.*]], 11
; CHECK-NEXT: [[R:%.*]] = or i42 [[SHR]], [[SHL]]
; CHECK-NEXT: [[R:%.*]] = call i42 @llvm.fshl.i42(i42 [[Y:%.*]], i42 [[X:%.*]], i42 11)
; CHECK-NEXT: ret i42 [[R]]
;
%shr = lshr i42 %x, 31
Expand All @@ -34,13 +30,11 @@ define i42 @fshr_i42_constant(i42 %x, i42 %y) {
ret i42 %r
}

; TODO: Vector types are allowed.
; Vector types are allowed.

define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) {
; CHECK-LABEL: @fshl_v2i16_constant_splat(
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], <i16 1, i16 1>
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], <i16 15, i16 15>
; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]]
; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> <i16 1, i16 1>)
; CHECK-NEXT: ret <2 x i16> [[R]]
;
%shl = shl <2 x i16> %x, <i16 1, i16 1>
Expand All @@ -51,9 +45,7 @@ define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) {

define <2 x i16> @fshl_v2i16_constant_splat_undef0(<2 x i16> %x, <2 x i16> %y) {
; CHECK-LABEL: @fshl_v2i16_constant_splat_undef0(
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], <i16 undef, i16 1>
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], <i16 15, i16 15>
; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]]
; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> <i16 1, i16 1>)
; CHECK-NEXT: ret <2 x i16> [[R]]
;
%shl = shl <2 x i16> %x, <i16 undef, i16 1>
Expand All @@ -64,9 +56,7 @@ define <2 x i16> @fshl_v2i16_constant_splat_undef0(<2 x i16> %x, <2 x i16> %y) {

define <2 x i16> @fshl_v2i16_constant_splat_undef1(<2 x i16> %x, <2 x i16> %y) {
; CHECK-LABEL: @fshl_v2i16_constant_splat_undef1(
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], <i16 1, i16 1>
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], <i16 15, i16 undef>
; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]]
; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> <i16 1, i16 1>)
; CHECK-NEXT: ret <2 x i16> [[R]]
;
%shl = shl <2 x i16> %x, <i16 1, i16 1>
Expand All @@ -75,13 +65,11 @@ define <2 x i16> @fshl_v2i16_constant_splat_undef1(<2 x i16> %x, <2 x i16> %y) {
ret <2 x i16> %r
}

; TODO: Non-power-of-2 vector types are allowed.
; Non-power-of-2 vector types are allowed.

define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) {
; CHECK-LABEL: @fshr_v2i17_constant_splat(
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], <i17 12, i17 12>
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], <i17 5, i17 5>
; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]]
; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> <i17 5, i17 5>)
; CHECK-NEXT: ret <2 x i17> [[R]]
;
%shr = lshr <2 x i17> %x, <i17 12, i17 12>
Expand All @@ -92,9 +80,7 @@ define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) {

define <2 x i17> @fshr_v2i17_constant_splat_undef0(<2 x i17> %x, <2 x i17> %y) {
; CHECK-LABEL: @fshr_v2i17_constant_splat_undef0(
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], <i17 12, i17 undef>
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], <i17 undef, i17 5>
; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]]
; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> <i17 5, i17 5>)
; CHECK-NEXT: ret <2 x i17> [[R]]
;
%shr = lshr <2 x i17> %x, <i17 12, i17 undef>
Expand All @@ -105,9 +91,7 @@ define <2 x i17> @fshr_v2i17_constant_splat_undef0(<2 x i17> %x, <2 x i17> %y) {

define <2 x i17> @fshr_v2i17_constant_splat_undef1(<2 x i17> %x, <2 x i17> %y) {
; CHECK-LABEL: @fshr_v2i17_constant_splat_undef1(
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], <i17 12, i17 undef>
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], <i17 5, i17 undef>
; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]]
; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> <i17 5, i17 5>)
; CHECK-NEXT: ret <2 x i17> [[R]]
;
%shr = lshr <2 x i17> %x, <i17 12, i17 undef>
Expand Down

0 comments on commit e1d4ca0

Please sign in to comment.