[RISCV] fix SP recovery in a function epilogue #110809

dlav-sc · 2024-10-02T09:34:58Z

Currently, in the cases when fp register is presented and sp register is adjusted at the second time, sp recovery in a function epilogue isn't performed in the best way, for example:

lui a0, 2
sub sp, s0, a0
addi a0, a0, -2044
add sp, sp, a0

This patch improves sp register recovery in such cases and the code snippet above becomes:

addi sp, s0, -2044

llvmbot · 2024-10-02T09:35:35Z

@llvm/pr-subscribers-backend-risc-v

Author: None (dlav-sc)

Changes

This patch fixes SP register recovery in the function epilogue.

Full diff: https://github.com/llvm/llvm-project/pull/110809.diff

8 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVFrameLowering.cpp (+68-38)
(modified) llvm/lib/Target/RISCV/RISCVFrameLowering.h (+6)
(modified) llvm/test/CodeGen/RISCV/branch-relaxation.ll (+4-16)
(modified) llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir (+1-3)
(modified) llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll (+1-3)
(modified) llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir (+1-3)
(modified) llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir (+1-3)
(modified) llvm/test/CodeGen/RISCV/stack-realignment.ll (+12-40)

diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index b0c525ea8c2996..7cbd1a35b25839 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -755,6 +755,19 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
   }
 }
 
+void RISCVFrameLowering::deallocateStack(MachineFunction &MF,
+                                         MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MBBI,
+                                         const DebugLoc &DL, uint64_t StackSize,
+                                         int64_t CFAOffset) const {
+  const RISCVRegisterInfo *RI = STI.getRegisterInfo();
+
+  Register SPReg = getSPReg(STI);
+
+  RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize),
+                MachineInstr::FrameDestroy, getStackAlign());
+}
+
 void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
                                       MachineBasicBlock &MBB) const {
   const RISCVRegisterInfo *RI = STI.getRegisterInfo();
@@ -786,59 +799,70 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
       --MBBI;
   }
 
-  const auto &CSI = getUnmanagedCSI(MF, MFI.getCalleeSavedInfo());
+  const auto &CSI = MFI.getCalleeSavedInfo();
 
   // Skip to before the restores of scalar callee-saved registers
   // FIXME: assumes exactly one instruction is used to restore each
   // callee-saved register.
-  auto LastFrameDestroy = MBBI;
-  if (!CSI.empty())
-    LastFrameDestroy = std::prev(MBBI, CSI.size());
+  auto LastFrameDestroy = std::prev(MBBI, getUnmanagedCSI(MF, CSI).size());
 
-  uint64_t RealStackSize = getStackSizeWithRVVPadding(MF);
-  uint64_t StackSize = RealStackSize - RVFI->getReservedSpillsSize();
-  uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize();
+  uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
+  uint64_t RealStackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount
+                                               : getStackSizeWithRVVPadding(MF);
+  uint64_t StackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount
+                                           : getStackSizeWithRVVPadding(MF) -
+                                                 RVFI->getReservedSpillsSize();
+  uint64_t FPOffset = FirstSPAdjustAmount ? FirstSPAdjustAmount
+                                          : getStackSizeWithRVVPadding(MF) -
+                                                RVFI->getVarArgsSaveSize();
   uint64_t RVVStackSize = RVFI->getRVVStackSize();
 
-  // Restore the stack pointer using the value of the frame pointer. Only
-  // necessary if the stack pointer was modified, meaning the stack size is
-  // unknown.
-  //
-  // In order to make sure the stack point is right through the EH region,
-  // we also need to restore stack pointer from the frame pointer if we
-  // don't preserve stack space within prologue/epilogue for outgoing variables,
-  // normally it's just checking the variable sized object is present or not
-  // is enough, but we also don't preserve that at prologue/epilogue when
-  // have vector objects in stack.
-  if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
-      !hasReservedCallFrame(MF)) {
-    assert(hasFP(MF) && "frame pointer should not have been eliminated");
-    RI->adjustReg(MBB, LastFrameDestroy, DL, SPReg, FPReg,
-                  StackOffset::getFixed(-FPOffset),
-                  MachineInstr::FrameDestroy, getStackAlign());
-  } else {
-    if (RVVStackSize)
+  bool RestoreFP = RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
+                   !hasReservedCallFrame(MF);
+
+  if (RVVStackSize) {
+    // If restoreFP the stack pointer will be restored using the frame pointer
+    // value.
+    if (!RestoreFP) {
       adjustStackForRVV(MF, MBB, LastFrameDestroy, DL, RVVStackSize,
                         MachineInstr::FrameDestroy);
+    }
   }
 
-  uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
   if (FirstSPAdjustAmount) {
     uint64_t SecondSPAdjustAmount =
         getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount;
     assert(SecondSPAdjustAmount > 0 &&
            "SecondSPAdjustAmount should be greater than zero");
 
-    RI->adjustReg(MBB, LastFrameDestroy, DL, SPReg, SPReg,
-                  StackOffset::getFixed(SecondSPAdjustAmount),
-                  MachineInstr::FrameDestroy, getStackAlign());
+    // If restoreFP the stack pointer will be restored using the frame pointer
+    // value.
+    if (!RestoreFP) {
+      RI->adjustReg(MBB, LastFrameDestroy, DL, SPReg, SPReg,
+                    StackOffset::getFixed(SecondSPAdjustAmount),
+                    MachineInstr::FrameDestroy, getStackAlign());
+    }
   }
 
-  if (FirstSPAdjustAmount)
-    StackSize = FirstSPAdjustAmount;
+  // Restore the stack pointer using the value of the frame pointer. Only
+  // necessary if the stack pointer was modified, meaning the stack size is
+  // unknown.
+  //
+  // In order to make sure the stack point is right through the EH region,
+  // we also need to restore stack pointer from the frame pointer if we
+  // don't preserve stack space within prologue/epilogue for outgoing variables,
+  // normally it's just checking the variable sized object is present or not
+  // is enough, but we also don't preserve that at prologue/epilogue when
+  // have vector objects in stack.
+  if (RestoreFP) {
+    RI->adjustReg(MBB, LastFrameDestroy, DL, SPReg, FPReg,
+                  StackOffset::getFixed(-FPOffset), MachineInstr::FrameDestroy,
+                  getStackAlign());
+  }
 
-  if (RVFI->isPushable(MF) && MBBI != MBB.end() &&
-      MBBI->getOpcode() == RISCV::CM_POP) {
+  bool ApplyPop = RVFI->isPushable(MF) && MBBI != MBB.end() &&
+                  MBBI->getOpcode() == RISCV::CM_POP;
+  if (ApplyPop) {
     // Use available stack adjustment in pop instruction to deallocate stack
     // space. Align the stack size down to a multiple of 16. This is needed for
     // RVE.
@@ -846,14 +870,19 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
     uint64_t Spimm = std::min(alignDown(StackSize, 16), (uint64_t)48);
     MBBI->getOperand(1).setImm(Spimm);
     StackSize -= Spimm;
-  }
 
-  // Deallocate stack
-  if (StackSize != 0) {
-    RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize),
-                  MachineInstr::FrameDestroy, getStackAlign());
+    if (StackSize != 0)
+      deallocateStack(MF, MBB, MBBI, DL, StackSize,
+                      /*stack_adj of cm.pop instr*/ RealStackSize - StackSize);
+
+    MBBI = std::next(MBBI);
   }
 
+  // Deallocate stack if StackSize isn't a zero and if we didn't already do it
+  // during cm.pop handling.
+  if (StackSize != 0 && !ApplyPop)
+    deallocateStack(MF, MBB, MBBI, DL, StackSize, 0);
+
   // Emit epilogue for shadow call stack.
   emitSCSEpilogue(MF, MBB, MBBI, DL);
 }
@@ -1566,6 +1595,7 @@ void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI(
     int FI = CS.getFrameIdx();
     if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector) {
       MCRegister BaseReg = TRI.getSubReg(CS.getReg(), RISCV::sub_vrm1_0);
+
       // If it's not a grouped vector register, it doesn't have subregister, so
       // the base register is just itself.
       if (BaseReg == RISCV::NoRegister)
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 28ab4aff3b9d51..89f95f2aa04aa6 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -88,9 +88,15 @@ class RISCVFrameLowering : public TargetFrameLowering {
   void adjustStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB,
                          MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
                          int64_t Amount, MachineInstr::MIFlag Flag) const;
+
   void emitCalleeSavedRVVPrologCFI(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MI,
                                    bool HasFP) const;
+
+  void deallocateStack(MachineFunction &MF, MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+                       uint64_t StackSize, int64_t CFAOffset) const;
+
   std::pair<int64_t, Align>
   assignRVVStackObjectOffsets(MachineFunction &MF) const;
 };
diff --git a/llvm/test/CodeGen/RISCV/branch-relaxation.ll b/llvm/test/CodeGen/RISCV/branch-relaxation.ll
index 3d48dc9637eaed..ec77d54da116d3 100644
--- a/llvm/test/CodeGen/RISCV/branch-relaxation.ll
+++ b/llvm/test/CodeGen/RISCV/branch-relaxation.ll
@@ -824,10 +824,7 @@ define void @relax_jal_spill_32_adjust_spill_slot() {
 ; CHECK-RV32-NEXT:    #APP
 ; CHECK-RV32-NEXT:    # reg use t6
 ; CHECK-RV32-NEXT:    #NO_APP
-; CHECK-RV32-NEXT:    lui a0, 2
-; CHECK-RV32-NEXT:    sub sp, s0, a0
-; CHECK-RV32-NEXT:    addi a0, a0, -2032
-; CHECK-RV32-NEXT:    add sp, sp, a0
+; CHECK-RV32-NEXT:    addi sp, s0, -2032
 ; CHECK-RV32-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    lw s1, 2020(sp) # 4-byte Folded Reload
@@ -1073,10 +1070,7 @@ define void @relax_jal_spill_32_adjust_spill_slot() {
 ; CHECK-RV64-NEXT:    #APP
 ; CHECK-RV64-NEXT:    # reg use t6
 ; CHECK-RV64-NEXT:    #NO_APP
-; CHECK-RV64-NEXT:    lui a0, 2
-; CHECK-RV64-NEXT:    sub sp, s0, a0
-; CHECK-RV64-NEXT:    addiw a0, a0, -2032
-; CHECK-RV64-NEXT:    add sp, sp, a0
+; CHECK-RV64-NEXT:    addi sp, s0, -2032
 ; CHECK-RV64-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
 ; CHECK-RV64-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
 ; CHECK-RV64-NEXT:    ld s1, 2008(sp) # 8-byte Folded Reload
@@ -2323,10 +2317,7 @@ define void @relax_jal_spill_64_adjust_spill_slot() {
 ; CHECK-RV32-NEXT:    #APP
 ; CHECK-RV32-NEXT:    # reg use t6
 ; CHECK-RV32-NEXT:    #NO_APP
-; CHECK-RV32-NEXT:    lui a0, 2
-; CHECK-RV32-NEXT:    sub sp, s0, a0
-; CHECK-RV32-NEXT:    addi a0, a0, -2032
-; CHECK-RV32-NEXT:    add sp, sp, a0
+; CHECK-RV32-NEXT:    addi sp, s0, -2032
 ; CHECK-RV32-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    lw s1, 2020(sp) # 4-byte Folded Reload
@@ -2560,10 +2551,7 @@ define void @relax_jal_spill_64_adjust_spill_slot() {
 ; CHECK-RV64-NEXT:    #APP
 ; CHECK-RV64-NEXT:    # reg use t6
 ; CHECK-RV64-NEXT:    #NO_APP
-; CHECK-RV64-NEXT:    lui a0, 2
-; CHECK-RV64-NEXT:    sub sp, s0, a0
-; CHECK-RV64-NEXT:    addiw a0, a0, -2032
-; CHECK-RV64-NEXT:    add sp, sp, a0
+; CHECK-RV64-NEXT:    addi sp, s0, -2032
 ; CHECK-RV64-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
 ; CHECK-RV64-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
 ; CHECK-RV64-NEXT:    ld s1, 2008(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
index 5f0e1a9b9aa24c..43fb0c10ca46f6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
@@ -46,9 +46,7 @@ body: |
     ; CHECK-NEXT: $x10 = ADDI killed $x10, -2048
     ; CHECK-NEXT: $x10 = ADDI killed $x10, -224
     ; CHECK-NEXT: VS1R_V killed renamable $v8, killed renamable $x10
-    ; CHECK-NEXT: $x2 = frame-destroy ADDI $x8, -2048
-    ; CHECK-NEXT: $x2 = frame-destroy ADDI killed $x2, -224
-    ; CHECK-NEXT: $x2 = frame-destroy ADDI $x2, 240
+    ; CHECK-NEXT: $x2 = frame-destroy ADDI $x8, -2032
     ; CHECK-NEXT: $x1 = LD $x2, 2024 :: (load (s64) from %stack.3)
     ; CHECK-NEXT: $x8 = LD $x2, 2016 :: (load (s64) from %stack.4)
     ; CHECK-NEXT: $x2 = frame-destroy ADDI $x2, 2032
diff --git a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll
index c1ce2e988fc511..c0b10be847d1ff 100644
--- a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll
@@ -109,9 +109,7 @@ define riscv_vector_cc void @local_stack_allocation_frame_pointer() "frame-point
 ; SPILL-O2-NEXT:    addi sp, sp, -480
 ; SPILL-O2-NEXT:    lbu a0, -1912(s0)
 ; SPILL-O2-NEXT:    sb a0, -1912(s0)
-; SPILL-O2-NEXT:    addi sp, s0, -2048
-; SPILL-O2-NEXT:    addi sp, sp, -464
-; SPILL-O2-NEXT:    addi sp, sp, 480
+; SPILL-O2-NEXT:    addi sp, s0, -2032
 ; SPILL-O2-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
 ; SPILL-O2-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
 ; SPILL-O2-NEXT:    addi sp, sp, 2032
diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
index c4bc794b8aeb38..1b9ce12af01f96 100644
--- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
@@ -150,9 +150,7 @@ body:             |
   ; CHECK-NEXT:   PseudoBR %bb.2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
-  ; CHECK-NEXT:   $x2 = frame-destroy ADDI $x8, -2048
-  ; CHECK-NEXT:   $x2 = frame-destroy ADDI killed $x2, -256
-  ; CHECK-NEXT:   $x2 = frame-destroy ADDI $x2, 272
+  ; CHECK-NEXT:   $x2 = frame-destroy ADDI $x8, -2032
   ; CHECK-NEXT:   $x1 = LD $x2, 2024 :: (load (s64) from %stack.3)
   ; CHECK-NEXT:   $x8 = LD $x2, 2016 :: (load (s64) from %stack.4)
   ; CHECK-NEXT:   $x18 = LD $x2, 2008 :: (load (s64) from %stack.5)
diff --git a/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir b/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir
index b4d8805b65bd8f..22a7425bf98b8e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir
@@ -33,9 +33,7 @@
   ; CHECK-NEXT:    vs1r.v v25, (a0) # Unknown-size Folded Spill
   ; CHECK-NEXT:    ld a0, 8(sp)
   ; CHECK-NEXT:    call spillslot
-  ; CHECK-NEXT:    addi sp, s0, -2048
-  ; CHECK-NEXT:    addi sp, sp, -256
-  ; CHECK-NEXT:    addi sp, sp, 272
+  ; CHECK-NEXT:    addi sp, s0, -2032
   ; CHECK-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
   ; CHECK-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
   ; CHECK-NEXT:    addi sp, sp, 2032
diff --git a/llvm/test/CodeGen/RISCV/stack-realignment.ll b/llvm/test/CodeGen/RISCV/stack-realignment.ll
index 034ebadc76af26..58876e4888a913 100644
--- a/llvm/test/CodeGen/RISCV/stack-realignment.ll
+++ b/llvm/test/CodeGen/RISCV/stack-realignment.ll
@@ -815,8 +815,7 @@ define void @caller1024() {
 ; RV32I-NEXT:    andi sp, sp, -1024
 ; RV32I-NEXT:    addi a0, sp, 1024
 ; RV32I-NEXT:    call callee
-; RV32I-NEXT:    addi sp, s0, -2048
-; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    addi sp, s0, -2032
 ; RV32I-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 2032
@@ -836,8 +835,7 @@ define void @caller1024() {
 ; RV32I-ILP32E-NEXT:    andi sp, sp, -1024
 ; RV32I-ILP32E-NEXT:    addi a0, sp, 1024
 ; RV32I-ILP32E-NEXT:    call callee
-; RV32I-ILP32E-NEXT:    addi sp, s0, -2048
-; RV32I-ILP32E-NEXT:    addi sp, sp, 4
+; RV32I-ILP32E-NEXT:    addi sp, s0, -2044
 ; RV32I-ILP32E-NEXT:    lw ra, 2040(sp) # 4-byte Folded Reload
 ; RV32I-ILP32E-NEXT:    lw s0, 2036(sp) # 4-byte Folded Reload
 ; RV32I-ILP32E-NEXT:    addi sp, sp, 2044
@@ -857,8 +855,7 @@ define void @caller1024() {
 ; RV64I-NEXT:    andi sp, sp, -1024
 ; RV64I-NEXT:    addi a0, sp, 1024
 ; RV64I-NEXT:    call callee
-; RV64I-NEXT:    addi sp, s0, -2048
-; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    addi sp, s0, -2032
 ; RV64I-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 2032
@@ -878,8 +875,7 @@ define void @caller1024() {
 ; RV64I-LP64E-NEXT:    andi sp, sp, -1024
 ; RV64I-LP64E-NEXT:    addi a0, sp, 1024
 ; RV64I-LP64E-NEXT:    call callee
-; RV64I-LP64E-NEXT:    addi sp, s0, -2048
-; RV64I-LP64E-NEXT:    addi sp, sp, 8
+; RV64I-LP64E-NEXT:    addi sp, s0, -2040
 ; RV64I-LP64E-NEXT:    ld ra, 2032(sp) # 8-byte Folded Reload
 ; RV64I-LP64E-NEXT:    ld s0, 2024(sp) # 8-byte Folded Reload
 ; RV64I-LP64E-NEXT:    addi sp, sp, 2040
@@ -959,10 +955,7 @@ define void @caller2048() {
 ; RV32I-NEXT:    addi a0, sp, 2047
 ; RV32I-NEXT:    addi a0, a0, 1
 ; RV32I-NEXT:    call callee
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    sub sp, s0, a0
-; RV32I-NEXT:    addi sp, sp, 2032
-; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    addi sp, s0, -2032
 ; RV32I-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 2032
@@ -984,10 +977,7 @@ define void @caller2048() {
 ; RV32I-ILP32E-NEXT:    addi a0, sp, 2047
 ; RV32I-ILP32E-NEXT:    addi a0, a0, 1
 ; RV32I-ILP32E-NEXT:    call callee
-; RV32I-ILP32E-NEXT:    lui a0, 1
-; RV32I-ILP32E-NEXT:    sub sp, s0, a0
-; RV32I-ILP32E-NEXT:    addi sp, sp, 2044
-; RV32I-ILP32E-NEXT:    addi sp, sp, 8
+; RV32I-ILP32E-NEXT:    addi sp, s0, -2044
 ; RV32I-ILP32E-NEXT:    lw ra, 2040(sp) # 4-byte Folded Reload
 ; RV32I-ILP32E-NEXT:    lw s0, 2036(sp) # 4-byte Folded Reload
 ; RV32I-ILP32E-NEXT:    addi sp, sp, 2044
@@ -1009,10 +999,7 @@ define void @caller2048() {
 ; RV64I-NEXT:    addi a0, sp, 2047
 ; RV64I-NEXT:    addi a0, a0, 1
 ; RV64I-NEXT:    call callee
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    sub sp, s0, a0
-; RV64I-NEXT:    addi sp, sp, 2032
-; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    addi sp, s0, -2032
 ; RV64I-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 2032
@@ -1034,10 +1021,7 @@ define void @caller2048() {
 ; RV64I-LP64E-NEXT:    addi a0, sp, 2047
 ; RV64I-LP64E-NEXT:    addi a0, a0, 1
 ; RV64I-LP64E-NEXT:    call callee
-; RV64I-LP64E-NEXT:    lui a0, 1
-; RV64I-LP64E-NEXT:    sub sp, s0, a0
-; RV64I-LP64E-NEXT:    addi sp, sp, 2040
-; RV64I-LP64E-NEXT:    addi sp, sp, 16
+; RV64I-LP64E-NEXT:    addi sp, s0, -2040
 ; RV64I-LP64E-NEXT:    ld ra, 2032(sp) # 8-byte Folded Reload
 ; RV64I-LP64E-NEXT:    ld s0, 2024(sp) # 8-byte Folded Reload
 ; RV64I-LP64E-NEXT:    addi sp, sp, 2040
@@ -1119,10 +1103,7 @@ define void @caller4096() {
 ; RV32I-NEXT:    lui a0, 1
 ; RV32I-NEXT:    add a0, sp, a0
 ; RV32I-NEXT:    call callee
-; RV32I-NEXT:    lui a0, 2
-; RV32I-NEXT:    sub sp, s0, a0
-; RV32I-NEXT:    addi a0, a0, -2032
-; RV32I-NEXT:    add sp, sp, a0
+; RV32I-NEXT:    addi sp, s0, -2032
 ; RV32I-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 2032
@@ -1146,10 +1127,7 @@ define void @caller4096() {
 ; RV32I-ILP32E-NEXT:    lui a0, 1
 ; RV32I-ILP32E-NEXT:    add a0, sp, a0
 ; RV32I-ILP32E-NEXT:    call callee
-; RV32I-ILP32E-NEXT:    lui a0, 2
-; RV32I-ILP32E-NEXT:    sub sp, s0, a0
-; RV32I-ILP32E-NEXT:    addi a0, a0, -2044
-; RV32I-ILP32E-NEXT:    add sp, sp, a0
+; RV32I-ILP32E-NEXT:    addi sp, s0, -2044
 ; RV32I-ILP32E-NEXT:    lw ra, 2040(sp) # 4-byte Folded Reload
 ; RV32I-ILP32E-NEXT:    lw s0, 2036(sp) # 4-byte Folded Reload
 ; RV32I-ILP32E-NEXT:    addi sp, sp, 2044
@@ -1173,10 +1151,7 @@ define void @caller4096() {
 ; RV64I-NEXT:    lui a0, 1
 ; RV64I-NEXT:    add a0, sp, a0
 ; RV64I-NEXT:    call callee
-; RV64I-NEXT:    lui a0, 2
-; RV64I-NEXT:    sub sp, s0, a0
-; RV64I-NEXT:    addiw a0, a0, -2032
-; RV64I-NEXT:    add sp, sp, a0
+; RV64I-NEXT:    addi sp, s0, -2032
 ; RV64I-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 2032
@@ -1200,10 +1175,7 @@ define void @caller4096() {
 ; RV64I-LP64E-NEXT:    lui a0, 1
 ; RV64I-LP64E-NEXT:    add a0, sp, a0
 ; RV64I-LP64E-NEXT:    call callee
-; RV64I-LP64E-NEXT:    lui a0, 2
-; RV64I-LP64E-NEXT:    sub sp, s0, a0
-; RV64I-LP64E-NEXT:    addiw a0, a0, -2040
-; RV64I-LP64E-NEXT:    add sp, sp, a0
+; RV64I-LP64E-NEXT:    addi sp, s0, -2040
 ; RV64I-LP64E-NEXT:    ld ra, 2032(sp) # 8-byte Folded Reload
 ; RV64I-LP64E-NEXT:    ld s0, 2024(sp) # 8-byte Folded Reload
 ; RV64I-LP64E-NEXT:    addi sp, sp, 2040

dlav-sc · 2024-10-02T09:49:40Z

@topperc @kito-cheng FYI

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

llvm/lib/Target/RISCV/RISCVFrameLowering.h

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

kito-cheng

LGTM :)

topperc · 2024-10-02T16:25:50Z

This patch needs a better description. Is it fixing a correctness issue or just making code more optimal?

dlav-sc · 2024-10-02T20:33:36Z

This patch needs a better description. Is it fixing a correctness issue or just making code more optimal?

I've updated the description

topperc · 2024-10-03T00:11:35Z

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

+                   !hasReservedCallFrame(MF);
+
+  if (RVVStackSize) {
+    // If restoreFP the stack pointer will be restored using the frame pointer


restoreFP -> RestoreFP

topperc · 2024-10-03T00:11:47Z

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

+    assert(SecondSPAdjustAmount > 0 &&
+           "SecondSPAdjustAmount should be greater than zero");
+
+    // If restoreFP the stack pointer will be restored using the frame pointer


restoreFP -> RestoreFP

topperc · 2024-10-03T00:14:14Z

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

+  if (RVVStackSize) {
+    // If restoreFP the stack pointer will be restored using the frame pointer
+    // value.
+    if (!RestoreFP) {


Drop curly braces

topperc · 2024-10-03T00:14:17Z

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

+
+    // If restoreFP the stack pointer will be restored using the frame pointer
+    // value.
+    if (!RestoreFP) {


Drop curly braces

This patch fixes SP register recovery in the function epilogue.

topperc

LGTM

dlav-sc · 2024-10-04T09:21:47Z

Thanks for taking a look at the patch

Currently, in the cases when fp register is presented and sp register is adjusted at the second time, sp recovery in a function epilogue isn't performed in the best way, for example: ``` lui a0, 2 sub sp, s0, a0 addi a0, a0, -2044 add sp, sp, a0 ``` This patch improves sp register recovery in such cases and the code snippet above becomes: ``` addi sp, s0, -2044 ```

bscarlet · 2024-10-23T19:49:55Z

I believe this change is the cause of a miscompilation I'm seeing (automatically linked above). Please take a look, and kindly revert the change until that issue is resolved.

joanahalili · 2024-10-25T14:13:54Z

Hello,
This commit is causing a large number of targets failing on our end, for which there is also a reproducer posted on the issue above #113488. Could you please have a look and possibly revert?
If there is no activity on the commit we will have to revert ourselves to unblock us.

dlav-sc · 2024-10-28T12:54:54Z

@bscarlet, @joanahalili
Hi, I couldn't reproduce the issue.

Here is what I've tried to do:

clone https://github.com/llvm/llvm-project.git
git checkout 7be2ce7
compile your program, save resulting assembly
git revert 7be2ce7
compile your program, save resulting assembly
in both instances the code is the same: assembly.txt

It does not look like my patch is the reason for the failure. Besides,
my patch should not even touch function prologue.

Could you please elaborate how can I reproduce the issue using existing codebase?
Or is my attempt to reproduce incorrect?

bscarlet · 2024-10-28T14:37:34Z

Could you please include the complete flags you use to build LLVM so that I can reproduce your entire procedure.

Did you use the flags I included in my reproduction case in your steps 3 & 5?

dlav-sc · 2024-10-28T16:09:53Z

Oh, I'm sorry, I've compiled your example using the wrong clang, my bad.

Anyway, using the right version at this time I've received almost identical snippets: with_patch.txt, without_patch.txt. In fact, they only differ in the clang commit hash, so you can ensure that the first one was compiled with my patch and the second one without it.

Could you please include the complete flags you use to build LLVM so that I can reproduce your entire procedure.

$ cmake -S llvm -B build -GNinja -DLLVM_ENABLE_PROJECTS='clang' -DCMAKE_BUILD_TYPE=Release
$ cd build
$ cmake --build ./ -t clang

Did you use the flags I included in my reproduction case in your steps 3 & 5?

Yep

$ bin/clang -S -O1 -mrvv-vector-bits=512 -march=rv64gcv1p0 --sysroot=<path>/<to>/riscv-gcc/sysroot \
--gcc-toolchain=<path>/<to>/riscv-gcc --target=riscv64-unknown-linux riscv-varargs-crash.cc \
-o riscv-varargs-crash.s

bscarlet · 2024-10-28T16:32:20Z

We can now confirm that despite initial appearances, this patch does not seem to be the culprit. I've closed the corresponding issue. Apologies for the false alarm, and thank you for helping investigate.

eaeltsin · 2024-10-29T21:51:26Z

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

+  uint64_t StackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount
+                                           : getStackSizeWithRVVPadding(MF) -
+                                                 RVFI->getReservedSpillsSize();
+  uint64_t FPOffset = FirstSPAdjustAmount ? FirstSPAdjustAmount


Can you please double-check that this line is correct?

When I change it to

uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize();

our tests pass.

(Looks like FP offset should always depend on getVarArgsSaveSize(), also if FirstSPAdjustAmount != 0?)

Could you provide a reproduction, please?

Could you follow the steps from my comment #110809 (comment) and ensure that the first executable crashes and the second one doesn't.

eaeltsin · 2024-10-30T17:12:27Z

I'm not sure about crashing, but looking at disassembler at https://godbolt.org/z/z8Kehr5ob:

The new code for function foo has prologue (line 68):

        addi    sp, sp, -496
        sd      ra, 424(sp)
        sd      s0, 416(sp)
        addi    s0, sp, 432

and epilogue (line 91):

        addi    sp, s0, -496  <---- should be 432
        ld      ra, 424(sp)
        ld      s0, 416(sp)
        addi    sp, sp, 496
        ret

eaeltsin · 2024-10-30T19:37:48Z

@dlav-sc is the above sufficient?

Can you please take a look soon? This is blocking our internal testing.

dlav-sc · 2024-10-30T21:33:08Z

@dlav-sc is the above sufficient?

Yes, I've reproduced your problem and preparing a fix now, I hope I could open PR in an hour.

Thank you for your example, I'll make a test from it.

This patch fixes sp recovery in the epilogue in varargs functions when fp register is presented and second sp adjustment is applied. Source of the issue: #110809

llvmbot added the backend:RISC-V label Oct 2, 2024

dlav-sc mentioned this pull request Oct 2, 2024

[RISCV][CFI] add function epilogue cfi information #110234

Closed

kito-cheng reviewed Oct 2, 2024

View reviewed changes

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/RISCV/RISCVFrameLowering.h Outdated Show resolved Hide resolved

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp Show resolved Hide resolved

dlav-sc force-pushed the users/dlav-sc/riscv-sp-recovery branch 5 times, most recently from a58cf3f to ac123f9 Compare October 2, 2024 11:40

kito-cheng approved these changes Oct 2, 2024

View reviewed changes

topperc reviewed Oct 3, 2024

View reviewed changes

dlav-sc force-pushed the users/dlav-sc/riscv-sp-recovery branch from ac123f9 to 536cbea Compare October 3, 2024 00:25

dlav-sc added 2 commits October 3, 2024 03:25

[RISCV] fix SP recovery in the function epilogue

b68703c

This patch fixes SP register recovery in the function epilogue.

[RISCV] update tests

536cbea

topperc approved these changes Oct 3, 2024

View reviewed changes

dlav-sc merged commit 7be2ce7 into main Oct 4, 2024
8 checks passed

dlav-sc deleted the users/dlav-sc/riscv-sp-recovery branch October 4, 2024 09:22

bscarlet mentioned this pull request Oct 23, 2024

riscv64 apparent miscompilation causing segfault #113488

Closed

eaeltsin reviewed Oct 29, 2024

View reviewed changes

dlav-sc mentioned this pull request Oct 30, 2024

[RISCV] fix SP recovery in varargs functions #114316

Merged

dlav-sc added a commit that referenced this pull request Nov 6, 2024

[RISCV] fix SP recovery in varargs functions (#114316)

83f92c3

This patch fixes sp recovery in the epilogue in varargs functions when fp register is presented and second sp adjustment is applied. Source of the issue: #110809

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[RISCV] fix SP recovery in a function epilogue #110809

[RISCV] fix SP recovery in a function epilogue #110809

dlav-sc commented Oct 2, 2024 •

edited

Loading

llvmbot commented Oct 2, 2024

dlav-sc commented Oct 2, 2024

kito-cheng left a comment

topperc commented Oct 2, 2024

dlav-sc commented Oct 2, 2024

topperc Oct 3, 2024

dlav-sc Oct 3, 2024

topperc Oct 3, 2024

dlav-sc Oct 3, 2024

topperc Oct 3, 2024

dlav-sc Oct 3, 2024

topperc Oct 3, 2024

dlav-sc Oct 3, 2024

topperc left a comment

dlav-sc commented Oct 4, 2024

bscarlet commented Oct 23, 2024

joanahalili commented Oct 25, 2024

dlav-sc commented Oct 28, 2024

bscarlet commented Oct 28, 2024

dlav-sc commented Oct 28, 2024 •

edited

Loading

bscarlet commented Oct 28, 2024

eaeltsin Oct 29, 2024 •

edited

Loading

dlav-sc Oct 30, 2024

eaeltsin commented Oct 30, 2024 •

edited

Loading

eaeltsin commented Oct 30, 2024

dlav-sc commented Oct 30, 2024

[RISCV] fix SP recovery in a function epilogue #110809

[RISCV] fix SP recovery in a function epilogue #110809

Conversation

dlav-sc commented Oct 2, 2024 • edited Loading

llvmbot commented Oct 2, 2024

dlav-sc commented Oct 2, 2024

kito-cheng left a comment

Choose a reason for hiding this comment

topperc commented Oct 2, 2024

dlav-sc commented Oct 2, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

topperc left a comment

Choose a reason for hiding this comment

dlav-sc commented Oct 4, 2024

bscarlet commented Oct 23, 2024

joanahalili commented Oct 25, 2024

dlav-sc commented Oct 28, 2024

bscarlet commented Oct 28, 2024

dlav-sc commented Oct 28, 2024 • edited Loading

bscarlet commented Oct 28, 2024

eaeltsin Oct 29, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

eaeltsin commented Oct 30, 2024 • edited Loading

eaeltsin commented Oct 30, 2024

dlav-sc commented Oct 30, 2024

dlav-sc commented Oct 2, 2024 •

edited

Loading

dlav-sc commented Oct 28, 2024 •

edited

Loading

eaeltsin Oct 29, 2024 •

edited

Loading

eaeltsin commented Oct 30, 2024 •

edited

Loading