Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Implement IR variant of isFMAFasterThanFMulAndFAdd #121465

Merged
merged 1 commit into from
Jan 10, 2025

Conversation

chinmaydd
Copy link
Contributor

Fixes #108751 . Thanks @Shoreshen for helping out with the test case.

@llvmbot
Copy link
Member

llvmbot commented Jan 2, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Chinmay Deshpande (chinmaydd)

Changes

Fixes #108751 . Thanks @Shoreshen for helping out with the test case.


Full diff: https://github.com/llvm/llvm-project/pull/121465.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+58)
  • (modified) llvm/lib/Target/AMDGPU/SIISelLowering.h (+3)
  • (added) llvm/test/CodeGen/AMDGPU/is-profitable-to-hoist-ir.ll (+185)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 58b061f5c1af0d..e610f2627d2cd8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5728,6 +5728,33 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
   return false;
 }
 
+// Refer to comments added to the MIR variant of isFMAFasterThanFMulAndFAdd for
+// specific details.
+bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
+                                                  Type *Ty) const {
+  SIModeRegisterDefaults Mode = SIModeRegisterDefaults(F, *Subtarget);
+  switch (Ty->getScalarSizeInBits()) {
+  case 32: {
+    if (!Subtarget->hasMadMacF32Insts())
+      return Subtarget->hasFastFMAF32();
+
+    if (Mode.FP32Denormals != DenormalMode::getPreserveSign())
+      return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts();
+
+    return Subtarget->hasFastFMAF32() && Subtarget->hasDLInsts();
+  }
+  case 64:
+    return true;
+  case 16:
+    return Subtarget->has16BitInsts() &&
+           Mode.FP64FP16Denormals != DenormalMode::getPreserveSign();
+  default:
+    break;
+  }
+
+  return false;
+}
+
 bool SITargetLowering::isFMADLegal(const MachineInstr &MI, LLT Ty) const {
   if (!Ty.isScalar())
     return false;
@@ -16942,6 +16969,37 @@ bool SITargetLowering::checkForPhysRegDependency(
   return false;
 }
 
+/// Check if it is profitable to hoist instruction in then/else to if.
+/// Not profitable if I and it's user can form a FMA instruction
+/// because we prefer FMSUB/FMADD.
+bool SITargetLowering::isProfitableToHoist(Instruction *I) const {
+  if (!I->hasOneUse())
+    return true;
+
+  Instruction *User = I->user_back();
+  // TODO: Add more patterns that are not profitable to hoist
+  switch (I->getOpcode()) {
+  case Instruction::FMul: {
+    if (User->getOpcode() != Instruction::FSub &&
+        User->getOpcode() != Instruction::FAdd)
+      return true;
+
+    const TargetOptions &Options = getTargetMachine().Options;
+    const Function *F = I->getFunction();
+    const DataLayout &DL = F->getDataLayout();
+    Type *Ty = User->getOperand(0)->getType();
+
+    return !isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) ||
+           (Options.AllowFPOpFusion != FPOpFusion::Fast &&
+            !Options.UnsafeFPMath) ||
+           !isFMAFasterThanFMulAndFAdd(*F, Ty);
+  }
+  default:
+    return true;
+  }
+  return true;
+}
+
 void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
     Instruction *AI) const {
   // Given: atomicrmw fadd ptr %addr, float %val ordering
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 631f26542bbe6d..731fb5d79a90d4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -457,6 +457,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
                                   EVT VT) const override;
   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
                                   const LLT Ty) const override;
+  bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
   bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override;
   bool isFMADLegal(const MachineInstr &MI, const LLT Ty) const override;
 
@@ -536,6 +537,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
                                  const TargetInstrInfo *TII, unsigned &PhysReg,
                                  int &Cost) const override;
 
+  bool isProfitableToHoist(Instruction *I) const override;
+
   bool isKnownNeverNaNForTargetNode(SDValue Op,
                                     const SelectionDAG &DAG,
                                     bool SNaN = false,
diff --git a/llvm/test/CodeGen/AMDGPU/is-profitable-to-hoist-ir.ll b/llvm/test/CodeGen/AMDGPU/is-profitable-to-hoist-ir.ll
new file mode 100644
index 00000000000000..3c204fda38d458
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/is-profitable-to-hoist-ir.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=simplifycfg -verify-machineinstrs -hoist-common-insts=true -mtriple=amdgcn-- -mcpu=gfx1030 -fp-contract=fast < %s | FileCheck -check-prefix=GFX -check-prefix=GFX-FP-CONTRACT %s
+; RUN: opt -S -passes=simplifycfg -verify-machineinstrs -hoist-common-insts=true -mtriple=amdgcn-- -mcpu=gfx1030 -enable-unsafe-fp-math --denormal-fp-math=ieee < %s | FileCheck -check-prefix=GFX -check-prefix=GFX-UNSAFE-FP-IEEE %s
+; RUN: opt -S -passes=simplifycfg -verify-machineinstrs -hoist-common-insts=true -mtriple=amdgcn-- -mcpu=gfx1030 -enable-unsafe-fp-math --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX -check-prefix=GFX-UNSAFE-FP-PRESERVE %s
+
+define double @_branch(ptr dereferenceable(8) %x, ptr dereferenceable(8) %y, ptr dereferenceable(8) %a) #0 {
+; GFX-LABEL: define double @_branch(
+; GFX-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX-NEXT:  [[ENTRY:.*:]]
+; GFX-NEXT:    [[TMP0:%.*]] = load double, ptr [[Y]], align 8
+; GFX-NEXT:    [[CMP:%.*]] = fcmp oeq double [[TMP0]], 0.000000e+00
+; GFX-NEXT:    [[TMP1:%.*]] = load double, ptr [[X]], align 8
+; GFX-NEXT:    [[TMP2:%.*]] = load double, ptr [[A]], align 8
+; GFX-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX:       [[COMMON_RET:.*]]:
+; GFX-NEXT:    [[COMMON_RET_OP:%.*]] = phi double [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-NEXT:    ret double [[COMMON_RET_OP]]
+; GFX:       [[IF_THEN]]:
+; GFX-NEXT:    [[MUL]] = fmul fast double [[TMP1]], [[TMP2]]
+; GFX-NEXT:    [[ADD:%.*]] = fadd fast double 1.000000e+00, [[MUL]]
+; GFX-NEXT:    br label %[[COMMON_RET]]
+; GFX:       [[IF_ELSE]]:
+; GFX-NEXT:    [[MUL1:%.*]] = fmul fast double [[TMP1]], [[TMP2]]
+; GFX-NEXT:    [[SUB]] = fsub fast double [[MUL1]], [[TMP0]]
+; GFX-NEXT:    br label %[[COMMON_RET]]
+;
+entry:
+  %0 = load double, ptr %y, align 8
+  %cmp = fcmp oeq double %0, 0.000000e+00
+  %1 = load double, ptr %x, align 8
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %2 = load double, ptr %a, align 8
+  %mul = fmul fast double %1, %2
+  %add = fadd fast double 1.000000e+00, %mul
+  ret double %mul
+
+if.else:                                          ; preds = %entry
+  %3 = load double, ptr %a, align 8
+  %mul1 = fmul fast double %1, %3
+  %sub = fsub fast double %mul1, %0
+  ret double %sub
+}
+
+define float @_branch2(ptr dereferenceable(8) %x, ptr dereferenceable(8) %y, ptr dereferenceable(8) %a) #0 {
+; GFX-LABEL: define float @_branch2(
+; GFX-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+; GFX-NEXT:  [[ENTRY:.*:]]
+; GFX-NEXT:    [[TMP0:%.*]] = load float, ptr [[Y]], align 8
+; GFX-NEXT:    [[CMP:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00
+; GFX-NEXT:    [[TMP1:%.*]] = load float, ptr [[X]], align 8
+; GFX-NEXT:    [[TMP2:%.*]] = load float, ptr [[A]], align 8
+; GFX-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX:       [[COMMON_RET:.*]]:
+; GFX-NEXT:    [[COMMON_RET_OP:%.*]] = phi float [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-NEXT:    ret float [[COMMON_RET_OP]]
+; GFX:       [[IF_THEN]]:
+; GFX-NEXT:    [[MUL]] = fmul fast float [[TMP1]], [[TMP2]]
+; GFX-NEXT:    [[ADD:%.*]] = fadd fast float 1.000000e+00, [[MUL]]
+; GFX-NEXT:    br label %[[COMMON_RET]]
+; GFX:       [[IF_ELSE]]:
+; GFX-NEXT:    [[MUL1:%.*]] = fmul fast float [[TMP1]], [[TMP2]]
+; GFX-NEXT:    [[SUB]] = fsub fast float [[MUL1]], [[TMP0]]
+; GFX-NEXT:    br label %[[COMMON_RET]]
+;
+entry:
+  %0 = load float, ptr %y, align 8
+  %cmp = fcmp oeq float %0, 0.000000e+00
+  %1 = load float, ptr %x, align 8
+  br i1 %cmp, label %if.then, label %if.else
+
+
+if.then:                                          ; preds = %entry
+  %2 = load float, ptr %a, align 8
+  %mul = fmul fast float %1, %2
+  %add = fadd fast float 1.000000e+00, %mul
+  ret float %mul
+
+if.else:                                          ; preds = %entry
+  %3 = load float, ptr %a, align 8
+  %mul1 = fmul fast float %1, %3
+  %sub = fsub fast float %mul1, %0
+  ret float %sub
+}
+
+define half @_branch3(ptr dereferenceable(8) %x, ptr dereferenceable(8) %y, ptr dereferenceable(8) %a) #0 {
+; GFX-CONTRACT-LABEL: define half @_branchr32(
+; GFX-CONTRACT-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+; GFX-CONTRACT-NEXT:  [[ENTRY:.*:]]
+; GFX-CONTRACT-NEXT:    [[TMP0:%.*]] = load half, ptr [[Y]], align 8
+; GFX-CONTRACT-NEXT:    [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
+; GFX-CONTRACT-NEXT:    [[TMP1:%.*]] = load half, ptr [[X]], align 8
+; GFX-CONTRACT-NEXT:    [[TMP2:%.*]] = load half, ptr [[A]], align 8
+; GFX-CONTRACT-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX-CONTRACT:       [[COMMON_RET:.*]]:
+; GFX-CONTRACT-NEXT:    [[COMMON_RET_OP:%.*]] = phi half [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-CONTRACT-NEXT:    ret half [[COMMON_RET_OP]]
+; GFX-CONTRACT:       [[IF_THEN]]:
+; GFX-CONTRACT-NEXT:    [[MUL]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-CONTRACT-NEXT:    [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
+; GFX-CONTRACT-NEXT:    br label %[[COMMON_RET]]
+; GFX-CONTRACT:       [[IF_ELSE]]:
+; GFX-CONTRACT-NEXT:    [[MUL1:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-CONTRACT-NEXT:    [[SUB]] = fsub fast half [[MUL1]], [[TMP0]]
+; GFX-CONTRACT-NEXT:    br label %[[COMMON_RET]]
+;
+; GFX-FP-CONTRACT-LABEL: define half @_branch3(
+; GFX-FP-CONTRACT-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+; GFX-FP-CONTRACT-NEXT:  [[ENTRY:.*:]]
+; GFX-FP-CONTRACT-NEXT:    [[TMP0:%.*]] = load half, ptr [[Y]], align 8
+; GFX-FP-CONTRACT-NEXT:    [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
+; GFX-FP-CONTRACT-NEXT:    [[TMP1:%.*]] = load half, ptr [[X]], align 8
+; GFX-FP-CONTRACT-NEXT:    [[TMP2:%.*]] = load half, ptr [[A]], align 8
+; GFX-FP-CONTRACT-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX-FP-CONTRACT:       [[COMMON_RET:.*]]:
+; GFX-FP-CONTRACT-NEXT:    [[COMMON_RET_OP:%.*]] = phi half [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-FP-CONTRACT-NEXT:    ret half [[COMMON_RET_OP]]
+; GFX-FP-CONTRACT:       [[IF_THEN]]:
+; GFX-FP-CONTRACT-NEXT:    [[MUL]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-FP-CONTRACT-NEXT:    [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
+; GFX-FP-CONTRACT-NEXT:    br label %[[COMMON_RET]]
+; GFX-FP-CONTRACT:       [[IF_ELSE]]:
+; GFX-FP-CONTRACT-NEXT:    [[MUL1:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-FP-CONTRACT-NEXT:    [[SUB]] = fsub fast half [[MUL1]], [[TMP0]]
+; GFX-FP-CONTRACT-NEXT:    br label %[[COMMON_RET]]
+;
+; GFX-UNSAFE-FP-IEEE-LABEL: define half @_branch3(
+; GFX-UNSAFE-FP-IEEE-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+; GFX-UNSAFE-FP-IEEE-NEXT:  [[ENTRY:.*:]]
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[TMP0:%.*]] = load half, ptr [[Y]], align 8
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[TMP1:%.*]] = load half, ptr [[X]], align 8
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[TMP2:%.*]] = load half, ptr [[A]], align 8
+; GFX-UNSAFE-FP-IEEE-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX-UNSAFE-FP-IEEE:       [[COMMON_RET:.*]]:
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[COMMON_RET_OP:%.*]] = phi half [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-UNSAFE-FP-IEEE-NEXT:    ret half [[COMMON_RET_OP]]
+; GFX-UNSAFE-FP-IEEE:       [[IF_THEN]]:
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[MUL]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
+; GFX-UNSAFE-FP-IEEE-NEXT:    br label %[[COMMON_RET]]
+; GFX-UNSAFE-FP-IEEE:       [[IF_ELSE]]:
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[MUL1:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[SUB]] = fsub fast half [[MUL1]], [[TMP0]]
+; GFX-UNSAFE-FP-IEEE-NEXT:    br label %[[COMMON_RET]]
+;
+; GFX-UNSAFE-FP-PRESERVE-LABEL: define half @_branch3(
+; GFX-UNSAFE-FP-PRESERVE-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+; GFX-UNSAFE-FP-PRESERVE-NEXT:  [[ENTRY:.*:]]
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    [[TMP0:%.*]] = load half, ptr [[Y]], align 8
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    [[TMP1:%.*]] = load half, ptr [[X]], align 8
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    [[TMP2:%.*]] = load half, ptr [[A]], align 8
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    [[MUL:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX-UNSAFE-FP-PRESERVE:       [[COMMON_RET:.*]]:
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    [[COMMON_RET_OP:%.*]] = phi half [ [[MUL]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    ret half [[COMMON_RET_OP]]
+; GFX-UNSAFE-FP-PRESERVE:       [[IF_THEN]]:
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    br label %[[COMMON_RET]]
+; GFX-UNSAFE-FP-PRESERVE:       [[IF_ELSE]]:
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    [[SUB]] = fsub fast half [[MUL]], [[TMP0]]
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    br label %[[COMMON_RET]]
+;
+entry:
+  %0 = load half, ptr %y, align 8
+  %cmp = fcmp oeq half %0, 0.000000e+00
+  %1 = load half, ptr %x, align 8
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %2 = load half, ptr %a, align 8
+  %mul = fmul fast half %1, %2
+  %add = fadd fast half 1.000000e+00, %mul
+  ret half %mul
+
+if.else:                                          ; preds = %entry
+  %3 = load half, ptr %a, align 8
+  %mul1 = fmul fast half %1, %3
+  %sub = fsub fast half %mul1, %0
+  ret half %sub
+}
+

llvm/test/CodeGen/AMDGPU/is-profitable-to-hoist-ir.ll Outdated Show resolved Hide resolved
llvm/test/CodeGen/AMDGPU/is-profitable-to-hoist-ir.ll Outdated Show resolved Hide resolved
llvm/lib/Target/AMDGPU/SIISelLowering.cpp Outdated Show resolved Hide resolved
llvm/lib/Target/AMDGPU/SIISelLowering.cpp Outdated Show resolved Hide resolved
llvm/lib/Target/AMDGPU/SIISelLowering.cpp Outdated Show resolved Hide resolved
llvm/lib/Target/AMDGPU/SIISelLowering.cpp Outdated Show resolved Hide resolved
llvm/test/CodeGen/AMDGPU/is-profitable-to-hoist-ir.ll Outdated Show resolved Hide resolved
llvm/test/CodeGen/AMDGPU/is-profitable-to-hoist-ir.ll Outdated Show resolved Hide resolved
llvm/test/CodeGen/AMDGPU/is-profitable-to-hoist-ir.ll Outdated Show resolved Hide resolved
Copy link
Contributor

@arsenm arsenm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Diff isn't showing the test changes

@chinmaydd
Copy link
Contributor Author

I'll update the test once the pre-commit PR lands.

@arsenm
Copy link
Contributor

arsenm commented Jan 9, 2025

I'll update the test once the pre-commit PR lands.

Should use stacked PRs instead of manually referring to dependent PRs

@chinmaydd
Copy link
Contributor Author

Right, I'll keep that in mind. Thanks @arsenm .

llvm/lib/Target/AMDGPU/SIISelLowering.cpp Outdated Show resolved Hide resolved
llvm/lib/Target/AMDGPU/SIISelLowering.cpp Outdated Show resolved Hide resolved
Change-Id: I4e515a1ca6c792500ea8a946e17dc6145e0ecedc
Type *Ty) const {
switch (Ty->getScalarSizeInBits()) {
case 16: {
SIModeRegisterDefaults Mode = SIModeRegisterDefaults(F, *Subtarget);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should probably try to defer parsing the attribute as late as possible (i.e. check the features first, and only check this if the mode matters for the subtarget)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will handle this in a follow-up PR (handling modifiers) soon

@chinmaydd chinmaydd merged commit 211bcf6 into llvm:main Jan 10, 2025
8 checks passed
@chinmaydd chinmaydd deleted the chinmaydd/fma-fix branch January 10, 2025 03:35
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jan 10, 2025

LLVM Buildbot has detected a new failure on builder clang-cmake-x86_64-avx512-win running on avx512-intel64-win while building llvm at step 4 "cmake stage 1".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/81/builds/3489

Here is the relevant piece of the build log for the reference
Step 4 (cmake stage 1) failure: 'cmake -G ...' (failure)
'cmake' is not recognized as an internal or external command,
operable program or batch file.

@llvm-ci
Copy link
Collaborator

llvm-ci commented Jan 10, 2025

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-expensive-checks-debian running on gribozavr4 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/11743

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
Input file: /b/1/llvm-clang-x86_64-expensive-checks-debian/build/test/tools/llvm-gsymutil/ARM_AArch64/Output/macho-merged-funcs-dwarf.yaml.tmp.dSYM
Output file (aarch64): /b/1/llvm-clang-x86_64-expensive-checks-debian/build/test/tools/llvm-gsymutil/ARM_AArch64/Output/macho-merged-funcs-dwarf.yaml.tmp.default.gSYM
Loaded 3 functions from DWARF.
Loaded 3 functions from symbol table.
warning: same address range contains different debug info. Removing:
[0x0000000000000248 - 0x0000000000000270): Name=0x00000047
addr=0x0000000000000248, file=  3, line=  5
addr=0x0000000000000254, file=  3, line=  7
addr=0x0000000000000258, file=  3, line=  9
addr=0x000000000000025c, file=  3, line=  8
addr=0x0000000000000260, file=  3, line= 11
addr=0x0000000000000264, file=  3, line= 10
addr=0x0000000000000268, file=  3, line=  6


In favor of this one:
[0x0000000000000248 - 0x0000000000000270): Name=0x00000001
addr=0x0000000000000248, file=  1, line=  5
addr=0x0000000000000254, file=  1, line=  7
addr=0x0000000000000258, file=  1, line=  9
addr=0x000000000000025c, file=  1, line=  8
addr=0x0000000000000260, file=  1, line= 11
addr=0x0000000000000264, file=  1, line= 10
addr=0x0000000000000268, file=  1, line=  6


warning: same address range contains different debug info. Removing:
[0x0000000000000248 - 0x0000000000000270): Name=0x00000001
addr=0x0000000000000248, file=  1, line=  5
addr=0x0000000000000254, file=  1, line=  7
addr=0x0000000000000258, file=  1, line=  9
addr=0x000000000000025c, file=  1, line=  8
addr=0x0000000000000260, file=  1, line= 11
addr=0x0000000000000264, file=  1, line= 10
addr=0x0000000000000268, file=  1, line=  6


In favor of this one:
[0x0000000000000248 - 0x0000000000000270): Name=0x00000030
addr=0x0000000000000248, file=  2, line=  5
addr=0x0000000000000254, file=  2, line=  7
addr=0x0000000000000258, file=  2, line=  9
addr=0x000000000000025c, file=  2, line=  8
addr=0x0000000000000260, file=  2, line= 11
addr=0x0000000000000264, file=  2, line= 10
...

BaiXilin pushed a commit to BaiXilin/llvm-fix-vnni-instr-types that referenced this pull request Jan 12, 2025
Mel-Chen pushed a commit to Mel-Chen/llvm-project that referenced this pull request Jan 13, 2025
DKLoehr pushed a commit to DKLoehr/llvm-project that referenced this pull request Jan 17, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

AMDGPU should implement IR version of isFMAFasterThanFMulAndFAdd
4 participants