[llvm-mca][AArch64] Add AArch64 version of clearsSuperRegisters. (llv…

…m#92548) This patch overrides the clearsSuperRegisters method defined in MCInstrAnalysis to identify register writes that clear the upper portion of all super-registers on AArch64 architecture. On AArch64, a write to a general-purpose register of 32-bit data size is defined to use the lower 32-bits of the register and zero extend the upper 32-bits. Similarly, SIMD and FP instructions operating on scalar data only access the lower bits of the SIMD&FP register. The unused upper bits are cleared to zero on a write. This also applies to SIMD vector registers when the element size in bits multiplied by the number of lanes is lower than 128. The upper 64 bits of the vector register are cleared to zero on a write.
Xilinx · May 22, 2024 · 267de85 · 267de85
1 parent 7d9634e
commit 267de85
Show file tree

Hide file tree

Showing 3 changed files with 1,652 additions and 0 deletions.
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -430,6 +430,55 @@ class AArch64MCInstrAnalysis : public MCInstrAnalysis {
     return false;
   }
 
+  bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst,
+                            APInt &Mask) const override {
+    const MCInstrDesc &Desc = Info->get(Inst.getOpcode());
+    unsigned NumDefs = Desc.getNumDefs();
+    unsigned NumImplicitDefs = Desc.implicit_defs().size();
+    assert(Mask.getBitWidth() == NumDefs + NumImplicitDefs &&
+           "Unexpected number of bits in the mask!");
+    // 32-bit General Purpose Register class.
+    const MCRegisterClass &GPR32RC = MRI.getRegClass(AArch64::GPR32RegClassID);
+    // Floating Point Register classes.
+    const MCRegisterClass &FPR8RC = MRI.getRegClass(AArch64::FPR8RegClassID);
+    const MCRegisterClass &FPR16RC = MRI.getRegClass(AArch64::FPR16RegClassID);
+    const MCRegisterClass &FPR32RC = MRI.getRegClass(AArch64::FPR32RegClassID);
+    const MCRegisterClass &FPR64RC = MRI.getRegClass(AArch64::FPR64RegClassID);
+    const MCRegisterClass &FPR128RC =
+        MRI.getRegClass(AArch64::FPR128RegClassID);
+
+    auto ClearsSuperReg = [=](unsigned RegID) {
+      // An update to the lower 32 bits of a 64 bit integer register is
+      // architecturally defined to zero extend the upper 32 bits on a write.
+      if (GPR32RC.contains(RegID))
+        return true;
+      // SIMD&FP instructions operating on scalar data only acccess the lower
+      // bits of a register, the upper bits are zero extended on a write. For
+      // SIMD vector registers smaller than 128-bits, the upper 64-bits of the
+      // register are zero extended on a write.
+      // When VL is higher than 128 bits, any write to a SIMD&FP register sets
+      // bits higher than 128 to zero.
+      return FPR8RC.contains(RegID) || FPR16RC.contains(RegID) ||
+             FPR32RC.contains(RegID) || FPR64RC.contains(RegID) ||
+             FPR128RC.contains(RegID);
+    };
+
+    Mask.clearAllBits();
+    for (unsigned I = 0, E = NumDefs; I < E; ++I) {
+      const MCOperand &Op = Inst.getOperand(I);
+      if (ClearsSuperReg(Op.getReg()))
+        Mask.setBit(I);
+    }
+
+    for (unsigned I = 0, E = NumImplicitDefs; I < E; ++I) {
+      const MCPhysReg Reg = Desc.implicit_defs()[I];
+      if (ClearsSuperReg(Reg))
+        Mask.setBit(NumDefs + I);
+    }
+
+    return Mask.getBoolValue();
+  }
+
   std::vector<std::pair<uint64_t, uint64_t>>
   findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
                  const Triple &TargetTriple) const override {