Skip to content

Commit

Permalink
[AMDGPU] Implement IR variant of isFMAFasterThanFMulAndFAdd
Browse files Browse the repository at this point in the history
Change-Id: I2484db303227da9aa53cc8842283c4ba6a332b3a
  • Loading branch information
chinmaydd committed Jan 6, 2025
1 parent 8f17c90 commit 3d97f5c
Show file tree
Hide file tree
Showing 3 changed files with 246 additions and 0 deletions.
58 changes: 58 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5731,6 +5731,33 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
return false;
}

// Refer to comments added to the MIR variant of isFMAFasterThanFMulAndFAdd for
// specific details.
bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
Type *Ty) const {
SIModeRegisterDefaults Mode = SIModeRegisterDefaults(F, *Subtarget);
switch (Ty->getScalarSizeInBits()) {
case 32: {
if (!Subtarget->hasMadMacF32Insts())
return Subtarget->hasFastFMAF32();

if (Mode.FP32Denormals != DenormalMode::getPreserveSign())
return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts();

return Subtarget->hasFastFMAF32() && Subtarget->hasDLInsts();
}
case 64:
return true;
case 16:
return Subtarget->has16BitInsts() &&
Mode.FP64FP16Denormals != DenormalMode::getPreserveSign();
default:
break;
}

return false;
}

bool SITargetLowering::isFMADLegal(const MachineInstr &MI, LLT Ty) const {
if (!Ty.isScalar())
return false;
Expand Down Expand Up @@ -16945,6 +16972,37 @@ bool SITargetLowering::checkForPhysRegDependency(
return false;
}

/// Check if it is profitable to hoist instruction in then/else to if.
/// Not profitable if I and it's user can form a FMA instruction
/// because we prefer FMSUB/FMADD.
bool SITargetLowering::isProfitableToHoist(Instruction *I) const {
if (!I->hasOneUse())
return true;

Instruction *User = I->user_back();
// TODO: Add more patterns that are not profitable to hoist
switch (I->getOpcode()) {
case Instruction::FMul: {
if (User->getOpcode() != Instruction::FSub &&
User->getOpcode() != Instruction::FAdd)
return true;

const TargetOptions &Options = getTargetMachine().Options;
const Function *F = I->getFunction();
const DataLayout &DL = F->getDataLayout();
Type *Ty = User->getOperand(0)->getType();

return !isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) ||
(Options.AllowFPOpFusion != FPOpFusion::Fast &&
!Options.UnsafeFPMath) ||
!isFMAFasterThanFMulAndFAdd(*F, Ty);
}
default:
return true;
}
return true;
}

void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
Instruction *AI) const {
// Given: atomicrmw fadd ptr %addr, float %val ordering
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
EVT VT) const override;
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
const LLT Ty) const override;
bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override;
bool isFMADLegal(const MachineInstr &MI, const LLT Ty) const override;

Expand Down Expand Up @@ -535,6 +536,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
const TargetInstrInfo *TII, unsigned &PhysReg,
int &Cost) const override;

bool isProfitableToHoist(Instruction *I) const override;

bool isKnownNeverNaNForTargetNode(SDValue Op,
const SelectionDAG &DAG,
bool SNaN = false,
Expand Down
185 changes: 185 additions & 0 deletions llvm/test/CodeGen/AMDGPU/is-profitable-to-hoist-ir.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes=simplifycfg -verify-machineinstrs -hoist-common-insts=true -mtriple=amdgcn-- -mcpu=gfx1030 -fp-contract=fast < %s | FileCheck -check-prefix=GFX -check-prefix=GFX-FP-CONTRACT %s
; RUN: opt -S -passes=simplifycfg -verify-machineinstrs -hoist-common-insts=true -mtriple=amdgcn-- -mcpu=gfx1030 -enable-unsafe-fp-math --denormal-fp-math=ieee < %s | FileCheck -check-prefix=GFX -check-prefix=GFX-UNSAFE-FP-IEEE %s
; RUN: opt -S -passes=simplifycfg -verify-machineinstrs -hoist-common-insts=true -mtriple=amdgcn-- -mcpu=gfx1030 -enable-unsafe-fp-math --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX -check-prefix=GFX-UNSAFE-FP-PRESERVE %s

define double @_branch(ptr dereferenceable(8) %x, ptr dereferenceable(8) %y, ptr dereferenceable(8) %a) #0 {
; GFX-LABEL: define double @_branch(
; GFX-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
; GFX-NEXT: [[ENTRY:.*:]]
; GFX-NEXT: [[TMP0:%.*]] = load double, ptr [[Y]], align 8
; GFX-NEXT: [[CMP:%.*]] = fcmp oeq double [[TMP0]], 0.000000e+00
; GFX-NEXT: [[TMP1:%.*]] = load double, ptr [[X]], align 8
; GFX-NEXT: [[TMP2:%.*]] = load double, ptr [[A]], align 8
; GFX-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
; GFX: [[COMMON_RET:.*]]:
; GFX-NEXT: [[COMMON_RET_OP:%.*]] = phi double [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
; GFX-NEXT: ret double [[COMMON_RET_OP]]
; GFX: [[IF_THEN]]:
; GFX-NEXT: [[MUL]] = fmul fast double [[TMP1]], [[TMP2]]
; GFX-NEXT: [[ADD:%.*]] = fadd fast double 1.000000e+00, [[MUL]]
; GFX-NEXT: br label %[[COMMON_RET]]
; GFX: [[IF_ELSE]]:
; GFX-NEXT: [[MUL1:%.*]] = fmul fast double [[TMP1]], [[TMP2]]
; GFX-NEXT: [[SUB]] = fsub fast double [[MUL1]], [[TMP0]]
; GFX-NEXT: br label %[[COMMON_RET]]
;
entry:
%0 = load double, ptr %y, align 8
%cmp = fcmp oeq double %0, 0.000000e+00
%1 = load double, ptr %x, align 8
br i1 %cmp, label %if.then, label %if.else

if.then: ; preds = %entry
%2 = load double, ptr %a, align 8
%mul = fmul fast double %1, %2
%add = fadd fast double 1.000000e+00, %mul
ret double %mul

if.else: ; preds = %entry
%3 = load double, ptr %a, align 8
%mul1 = fmul fast double %1, %3
%sub = fsub fast double %mul1, %0
ret double %sub
}

define float @_branch2(ptr dereferenceable(8) %x, ptr dereferenceable(8) %y, ptr dereferenceable(8) %a) #0 {
; GFX-LABEL: define float @_branch2(
; GFX-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
; GFX-NEXT: [[ENTRY:.*:]]
; GFX-NEXT: [[TMP0:%.*]] = load float, ptr [[Y]], align 8
; GFX-NEXT: [[CMP:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00
; GFX-NEXT: [[TMP1:%.*]] = load float, ptr [[X]], align 8
; GFX-NEXT: [[TMP2:%.*]] = load float, ptr [[A]], align 8
; GFX-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
; GFX: [[COMMON_RET:.*]]:
; GFX-NEXT: [[COMMON_RET_OP:%.*]] = phi float [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
; GFX-NEXT: ret float [[COMMON_RET_OP]]
; GFX: [[IF_THEN]]:
; GFX-NEXT: [[MUL]] = fmul fast float [[TMP1]], [[TMP2]]
; GFX-NEXT: [[ADD:%.*]] = fadd fast float 1.000000e+00, [[MUL]]
; GFX-NEXT: br label %[[COMMON_RET]]
; GFX: [[IF_ELSE]]:
; GFX-NEXT: [[MUL1:%.*]] = fmul fast float [[TMP1]], [[TMP2]]
; GFX-NEXT: [[SUB]] = fsub fast float [[MUL1]], [[TMP0]]
; GFX-NEXT: br label %[[COMMON_RET]]
;
entry:
%0 = load float, ptr %y, align 8
%cmp = fcmp oeq float %0, 0.000000e+00
%1 = load float, ptr %x, align 8
br i1 %cmp, label %if.then, label %if.else


if.then: ; preds = %entry
%2 = load float, ptr %a, align 8
%mul = fmul fast float %1, %2
%add = fadd fast float 1.000000e+00, %mul
ret float %mul

if.else: ; preds = %entry
%3 = load float, ptr %a, align 8
%mul1 = fmul fast float %1, %3
%sub = fsub fast float %mul1, %0
ret float %sub
}

define half @_branch3(ptr dereferenceable(8) %x, ptr dereferenceable(8) %y, ptr dereferenceable(8) %a) #0 {
; GFX-CONTRACT-LABEL: define half @_branchr32(
; GFX-CONTRACT-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
; GFX-CONTRACT-NEXT: [[ENTRY:.*:]]
; GFX-CONTRACT-NEXT: [[TMP0:%.*]] = load half, ptr [[Y]], align 8
; GFX-CONTRACT-NEXT: [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
; GFX-CONTRACT-NEXT: [[TMP1:%.*]] = load half, ptr [[X]], align 8
; GFX-CONTRACT-NEXT: [[TMP2:%.*]] = load half, ptr [[A]], align 8
; GFX-CONTRACT-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
; GFX-CONTRACT: [[COMMON_RET:.*]]:
; GFX-CONTRACT-NEXT: [[COMMON_RET_OP:%.*]] = phi half [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
; GFX-CONTRACT-NEXT: ret half [[COMMON_RET_OP]]
; GFX-CONTRACT: [[IF_THEN]]:
; GFX-CONTRACT-NEXT: [[MUL]] = fmul fast half [[TMP1]], [[TMP2]]
; GFX-CONTRACT-NEXT: [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
; GFX-CONTRACT-NEXT: br label %[[COMMON_RET]]
; GFX-CONTRACT: [[IF_ELSE]]:
; GFX-CONTRACT-NEXT: [[MUL1:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
; GFX-CONTRACT-NEXT: [[SUB]] = fsub fast half [[MUL1]], [[TMP0]]
; GFX-CONTRACT-NEXT: br label %[[COMMON_RET]]
;
; GFX-FP-CONTRACT-LABEL: define half @_branch3(
; GFX-FP-CONTRACT-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
; GFX-FP-CONTRACT-NEXT: [[ENTRY:.*:]]
; GFX-FP-CONTRACT-NEXT: [[TMP0:%.*]] = load half, ptr [[Y]], align 8
; GFX-FP-CONTRACT-NEXT: [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
; GFX-FP-CONTRACT-NEXT: [[TMP1:%.*]] = load half, ptr [[X]], align 8
; GFX-FP-CONTRACT-NEXT: [[TMP2:%.*]] = load half, ptr [[A]], align 8
; GFX-FP-CONTRACT-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
; GFX-FP-CONTRACT: [[COMMON_RET:.*]]:
; GFX-FP-CONTRACT-NEXT: [[COMMON_RET_OP:%.*]] = phi half [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
; GFX-FP-CONTRACT-NEXT: ret half [[COMMON_RET_OP]]
; GFX-FP-CONTRACT: [[IF_THEN]]:
; GFX-FP-CONTRACT-NEXT: [[MUL]] = fmul fast half [[TMP1]], [[TMP2]]
; GFX-FP-CONTRACT-NEXT: [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
; GFX-FP-CONTRACT-NEXT: br label %[[COMMON_RET]]
; GFX-FP-CONTRACT: [[IF_ELSE]]:
; GFX-FP-CONTRACT-NEXT: [[MUL1:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
; GFX-FP-CONTRACT-NEXT: [[SUB]] = fsub fast half [[MUL1]], [[TMP0]]
; GFX-FP-CONTRACT-NEXT: br label %[[COMMON_RET]]
;
; GFX-UNSAFE-FP-IEEE-LABEL: define half @_branch3(
; GFX-UNSAFE-FP-IEEE-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
; GFX-UNSAFE-FP-IEEE-NEXT: [[ENTRY:.*:]]
; GFX-UNSAFE-FP-IEEE-NEXT: [[TMP0:%.*]] = load half, ptr [[Y]], align 8
; GFX-UNSAFE-FP-IEEE-NEXT: [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
; GFX-UNSAFE-FP-IEEE-NEXT: [[TMP1:%.*]] = load half, ptr [[X]], align 8
; GFX-UNSAFE-FP-IEEE-NEXT: [[TMP2:%.*]] = load half, ptr [[A]], align 8
; GFX-UNSAFE-FP-IEEE-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
; GFX-UNSAFE-FP-IEEE: [[COMMON_RET:.*]]:
; GFX-UNSAFE-FP-IEEE-NEXT: [[COMMON_RET_OP:%.*]] = phi half [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
; GFX-UNSAFE-FP-IEEE-NEXT: ret half [[COMMON_RET_OP]]
; GFX-UNSAFE-FP-IEEE: [[IF_THEN]]:
; GFX-UNSAFE-FP-IEEE-NEXT: [[MUL]] = fmul fast half [[TMP1]], [[TMP2]]
; GFX-UNSAFE-FP-IEEE-NEXT: [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
; GFX-UNSAFE-FP-IEEE-NEXT: br label %[[COMMON_RET]]
; GFX-UNSAFE-FP-IEEE: [[IF_ELSE]]:
; GFX-UNSAFE-FP-IEEE-NEXT: [[MUL1:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
; GFX-UNSAFE-FP-IEEE-NEXT: [[SUB]] = fsub fast half [[MUL1]], [[TMP0]]
; GFX-UNSAFE-FP-IEEE-NEXT: br label %[[COMMON_RET]]
;
; GFX-UNSAFE-FP-PRESERVE-LABEL: define half @_branch3(
; GFX-UNSAFE-FP-PRESERVE-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[ENTRY:.*:]]
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[TMP0:%.*]] = load half, ptr [[Y]], align 8
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[TMP1:%.*]] = load half, ptr [[X]], align 8
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[TMP2:%.*]] = load half, ptr [[A]], align 8
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[MUL:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
; GFX-UNSAFE-FP-PRESERVE-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
; GFX-UNSAFE-FP-PRESERVE: [[COMMON_RET:.*]]:
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[COMMON_RET_OP:%.*]] = phi half [ [[MUL]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
; GFX-UNSAFE-FP-PRESERVE-NEXT: ret half [[COMMON_RET_OP]]
; GFX-UNSAFE-FP-PRESERVE: [[IF_THEN]]:
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
; GFX-UNSAFE-FP-PRESERVE-NEXT: br label %[[COMMON_RET]]
; GFX-UNSAFE-FP-PRESERVE: [[IF_ELSE]]:
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[SUB]] = fsub fast half [[MUL]], [[TMP0]]
; GFX-UNSAFE-FP-PRESERVE-NEXT: br label %[[COMMON_RET]]
;
entry:
%0 = load half, ptr %y, align 8
%cmp = fcmp oeq half %0, 0.000000e+00
%1 = load half, ptr %x, align 8
br i1 %cmp, label %if.then, label %if.else

if.then: ; preds = %entry
%2 = load half, ptr %a, align 8
%mul = fmul fast half %1, %2
%add = fadd fast half 1.000000e+00, %mul
ret half %mul

if.else: ; preds = %entry
%3 = load half, ptr %a, align 8
%mul1 = fmul fast half %1, %3
%sub = fsub fast half %mul1, %0
ret half %sub
}

0 comments on commit 3d97f5c

Please sign in to comment.