From 152838793e2179abbd46444df10d035bfa499709 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Thu, 1 Feb 2024 20:50:20 +0800 Subject: [PATCH] [RISCV][NFC] Simplify calls.ll and autogenerate checks for tail-calls.ll Split out from #78417. Reviewers: topperc, asb, kito-cheng Reviewed By: asb Pull Request: https://github.com/llvm/llvm-project/pull/79248 --- llvm/test/CodeGen/RISCV/calls.ll | 357 +++++++++----------------- llvm/test/CodeGen/RISCV/tail-calls.ll | 228 ++++++++++++---- 2 files changed, 294 insertions(+), 291 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/calls.ll b/llvm/test/CodeGen/RISCV/calls.ll index 365f255dd82447..58b10cf53971f2 100644 --- a/llvm/test/CodeGen/RISCV/calls.ll +++ b/llvm/test/CodeGen/RISCV/calls.ll @@ -1,29 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s ; RUN: llc -relocation-model=pic -mtriple=riscv32 -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV32I-PIC %s +; RUN: | FileCheck -check-prefixes=CHECK,RV32I-PIC %s declare i32 @external_function(i32) define i32 @test_call_external(i32 %a) nounwind { -; RV32I-LABEL: test_call_external: -; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call external_function -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret -; -; RV32I-PIC-LABEL: test_call_external: -; RV32I-PIC: # %bb.0: -; RV32I-PIC-NEXT: addi sp, sp, -16 -; RV32I-PIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-PIC-NEXT: call external_function -; RV32I-PIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-PIC-NEXT: addi sp, sp, 16 -; RV32I-PIC-NEXT: ret +; CHECK-LABEL: test_call_external: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: call external_function +; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %1 = call i32 @external_function(i32 %a) ret i32 %1 } @@ -31,85 +22,51 @@ define i32 @test_call_external(i32 %a) nounwind { declare dso_local i32 @dso_local_function(i32) define i32 @test_call_dso_local(i32 %a) nounwind { -; RV32I-LABEL: test_call_dso_local: -; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call dso_local_function -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret -; -; RV32I-PIC-LABEL: test_call_dso_local: -; RV32I-PIC: # %bb.0: -; RV32I-PIC-NEXT: addi sp, sp, -16 -; RV32I-PIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-PIC-NEXT: call dso_local_function -; RV32I-PIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-PIC-NEXT: addi sp, sp, 16 -; RV32I-PIC-NEXT: ret +; CHECK-LABEL: test_call_dso_local: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: call dso_local_function +; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %1 = call i32 @dso_local_function(i32 %a) ret i32 %1 } define i32 @defined_function(i32 %a) nounwind { -; RV32I-LABEL: defined_function: -; RV32I: # %bb.0: -; RV32I-NEXT: addi a0, a0, 1 -; RV32I-NEXT: ret -; -; RV32I-PIC-LABEL: defined_function: -; RV32I-PIC: # %bb.0: -; RV32I-PIC-NEXT: addi a0, a0, 1 -; RV32I-PIC-NEXT: ret +; CHECK-LABEL: defined_function: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: ret %1 = add i32 %a, 1 ret i32 %1 } define i32 @test_call_defined(i32 %a) nounwind { -; RV32I-LABEL: test_call_defined: -; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call defined_function -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret -; -; RV32I-PIC-LABEL: test_call_defined: -; RV32I-PIC: # %bb.0: -; RV32I-PIC-NEXT: addi sp, sp, -16 -; RV32I-PIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-PIC-NEXT: call defined_function -; RV32I-PIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-PIC-NEXT: addi sp, sp, 16 -; RV32I-PIC-NEXT: ret +; CHECK-LABEL: test_call_defined: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: call defined_function +; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %1 = call i32 @defined_function(i32 %a) ret i32 %1 } define i32 @test_call_indirect(ptr %a, i32 %b) nounwind { -; RV32I-LABEL: test_call_indirect: -; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: jalr a2 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret -; -; RV32I-PIC-LABEL: test_call_indirect: -; RV32I-PIC: # %bb.0: -; RV32I-PIC-NEXT: addi sp, sp, -16 -; RV32I-PIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-PIC-NEXT: mv a2, a0 -; RV32I-PIC-NEXT: mv a0, a1 -; RV32I-PIC-NEXT: jalr a2 -; RV32I-PIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-PIC-NEXT: addi sp, sp, 16 -; RV32I-PIC-NEXT: ret +; CHECK-LABEL: test_call_indirect: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: jalr a2 +; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %1 = call i32 %a(i32 %b) ret i32 %1 } @@ -117,39 +74,22 @@ define i32 @test_call_indirect(ptr %a, i32 %b) nounwind { ; Make sure we don't use t0 as the source for jalr as that is a hint to pop the ; return address stack on some microarchitectures. define i32 @test_call_indirect_no_t0(ptr %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) nounwind { -; RV32I-LABEL: test_call_indirect_no_t0: -; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv t1, a0 -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: mv a1, a2 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: mv a3, a4 -; RV32I-NEXT: mv a4, a5 -; RV32I-NEXT: mv a5, a6 -; RV32I-NEXT: mv a6, a7 -; RV32I-NEXT: jalr t1 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret -; -; RV32I-PIC-LABEL: test_call_indirect_no_t0: -; RV32I-PIC: # %bb.0: -; RV32I-PIC-NEXT: addi sp, sp, -16 -; RV32I-PIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-PIC-NEXT: mv t1, a0 -; RV32I-PIC-NEXT: mv a0, a1 -; RV32I-PIC-NEXT: mv a1, a2 -; RV32I-PIC-NEXT: mv a2, a3 -; RV32I-PIC-NEXT: mv a3, a4 -; RV32I-PIC-NEXT: mv a4, a5 -; RV32I-PIC-NEXT: mv a5, a6 -; RV32I-PIC-NEXT: mv a6, a7 -; RV32I-PIC-NEXT: jalr t1 -; RV32I-PIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-PIC-NEXT: addi sp, sp, 16 -; RV32I-PIC-NEXT: ret +; CHECK-LABEL: test_call_indirect_no_t0: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: mv t1, a0 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: mv a3, a4 +; CHECK-NEXT: mv a4, a5 +; CHECK-NEXT: mv a5, a6 +; CHECK-NEXT: mv a6, a7 +; CHECK-NEXT: jalr t1 +; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %1 = call i32 %a(i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) ret i32 %1 } @@ -158,45 +98,27 @@ define i32 @test_call_indirect_no_t0(ptr %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 ; introduced when compiling with optimisation. define fastcc i32 @fastcc_function(i32 %a, i32 %b) nounwind { -; RV32I-LABEL: fastcc_function: -; RV32I: # %bb.0: -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: ret -; -; RV32I-PIC-LABEL: fastcc_function: -; RV32I-PIC: # %bb.0: -; RV32I-PIC-NEXT: add a0, a0, a1 -; RV32I-PIC-NEXT: ret +; CHECK-LABEL: fastcc_function: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: ret %1 = add i32 %a, %b ret i32 %1 } define i32 @test_call_fastcc(i32 %a, i32 %b) nounwind { -; RV32I-LABEL: test_call_fastcc: -; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: call fastcc_function -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret -; -; RV32I-PIC-LABEL: test_call_fastcc: -; RV32I-PIC: # %bb.0: -; RV32I-PIC-NEXT: addi sp, sp, -16 -; RV32I-PIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-PIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32I-PIC-NEXT: mv s0, a0 -; RV32I-PIC-NEXT: call fastcc_function -; RV32I-PIC-NEXT: mv a0, s0 -; RV32I-PIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-PIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32I-PIC-NEXT: addi sp, sp, 16 -; RV32I-PIC-NEXT: ret +; CHECK-LABEL: test_call_fastcc: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: call fastcc_function +; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %1 = call fastcc i32 @fastcc_function(i32 %a, i32 %b) ret i32 %a } @@ -204,107 +126,64 @@ define i32 @test_call_fastcc(i32 %a, i32 %b) nounwind { declare i32 @external_many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind define i32 @test_call_external_many_args(i32 %a) nounwind { -; RV32I-LABEL: test_call_external_many_args: -; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: mv a3, a0 -; RV32I-NEXT: mv a4, a0 -; RV32I-NEXT: mv a5, a0 -; RV32I-NEXT: mv a6, a0 -; RV32I-NEXT: mv a7, a0 -; RV32I-NEXT: call external_many_args -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret -; -; RV32I-PIC-LABEL: test_call_external_many_args: -; RV32I-PIC: # %bb.0: -; RV32I-PIC-NEXT: addi sp, sp, -16 -; RV32I-PIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-PIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32I-PIC-NEXT: mv s0, a0 -; RV32I-PIC-NEXT: sw a0, 4(sp) -; RV32I-PIC-NEXT: sw a0, 0(sp) -; RV32I-PIC-NEXT: mv a1, a0 -; RV32I-PIC-NEXT: mv a2, a0 -; RV32I-PIC-NEXT: mv a3, a0 -; RV32I-PIC-NEXT: mv a4, a0 -; RV32I-PIC-NEXT: mv a5, a0 -; RV32I-PIC-NEXT: mv a6, a0 -; RV32I-PIC-NEXT: mv a7, a0 -; RV32I-PIC-NEXT: call external_many_args -; RV32I-PIC-NEXT: mv a0, s0 -; RV32I-PIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-PIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32I-PIC-NEXT: addi sp, sp, 16 -; RV32I-PIC-NEXT: ret +; CHECK-LABEL: test_call_external_many_args: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: sw a0, 4(sp) +; CHECK-NEXT: sw a0, 0(sp) +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: mv a5, a0 +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a0 +; CHECK-NEXT: call external_many_args +; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %1 = call i32 @external_many_args(i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a) ret i32 %a } define i32 @defined_many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 %j) nounwind { -; RV32I-LABEL: defined_many_args: -; RV32I: # %bb.0: -; RV32I-NEXT: lw a0, 4(sp) -; RV32I-NEXT: addi a0, a0, 1 -; RV32I-NEXT: ret -; -; RV32I-PIC-LABEL: defined_many_args: -; RV32I-PIC: # %bb.0: -; RV32I-PIC-NEXT: lw a0, 4(sp) -; RV32I-PIC-NEXT: addi a0, a0, 1 -; RV32I-PIC-NEXT: ret +; CHECK-LABEL: defined_many_args: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a0, 4(sp) +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: ret %added = add i32 %j, 1 ret i32 %added } define i32 @test_call_defined_many_args(i32 %a) nounwind { -; RV32I-LABEL: test_call_defined_many_args: -; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: mv a3, a0 -; RV32I-NEXT: mv a4, a0 -; RV32I-NEXT: mv a5, a0 -; RV32I-NEXT: mv a6, a0 -; RV32I-NEXT: mv a7, a0 -; RV32I-NEXT: call defined_many_args -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret -; -; RV32I-PIC-LABEL: test_call_defined_many_args: -; RV32I-PIC: # %bb.0: -; RV32I-PIC-NEXT: addi sp, sp, -16 -; RV32I-PIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-PIC-NEXT: sw a0, 4(sp) -; RV32I-PIC-NEXT: sw a0, 0(sp) -; RV32I-PIC-NEXT: mv a1, a0 -; RV32I-PIC-NEXT: mv a2, a0 -; RV32I-PIC-NEXT: mv a3, a0 -; RV32I-PIC-NEXT: mv a4, a0 -; RV32I-PIC-NEXT: mv a5, a0 -; RV32I-PIC-NEXT: mv a6, a0 -; RV32I-PIC-NEXT: mv a7, a0 -; RV32I-PIC-NEXT: call defined_many_args -; RV32I-PIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-PIC-NEXT: addi sp, sp, 16 -; RV32I-PIC-NEXT: ret +; CHECK-LABEL: test_call_defined_many_args: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw a0, 4(sp) +; CHECK-NEXT: sw a0, 0(sp) +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: mv a5, a0 +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a0 +; CHECK-NEXT: call defined_many_args +; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %1 = call i32 @defined_many_args(i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a) ret i32 %1 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32I: {{.*}} +; RV32I-PIC: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll index e3079424230bcc..87d69bfad38c2b 100644 --- a/llvm/test/CodeGen/RISCV/tail-calls.ll +++ b/llvm/test/CodeGen/RISCV/tail-calls.ll @@ -1,11 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple riscv32-unknown-linux-gnu -o - %s | FileCheck %s ; RUN: llc -mtriple riscv32-unknown-elf -o - %s | FileCheck %s ; Perform tail call optimization for global address. declare i32 @callee_tail(i32 %i) define i32 @caller_tail(i32 %i) nounwind { -; CHECK-LABEL: caller_tail -; CHECK: tail callee_tail +; CHECK-LABEL: caller_tail: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tail callee_tail entry: %r = tail call i32 @callee_tail(i32 %i) ret i32 %r @@ -15,10 +17,16 @@ entry: @dest = global [2 x i8] zeroinitializer declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1) define void @caller_extern(ptr %src) optsize { +; CHECK-LABEL: caller_extern: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, %hi(dest) +; CHECK-NEXT: addi a1, a1, %lo(dest) +; CHECK-NEXT: li a2, 7 +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: mv a1, a3 +; CHECK-NEXT: tail memcpy entry: -; CHECK: caller_extern -; CHECK-NOT: call memcpy -; CHECK: tail memcpy tail call void @llvm.memcpy.p0.p0.i32(ptr @dest, ptr %src, i32 7, i1 false) ret void } @@ -26,10 +34,16 @@ entry: ; Perform tail call optimization for external symbol. @dest_pgso = global [2 x i8] zeroinitializer define void @caller_extern_pgso(ptr %src) !prof !14 { +; CHECK-LABEL: caller_extern_pgso: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, %hi(dest_pgso) +; CHECK-NEXT: addi a1, a1, %lo(dest_pgso) +; CHECK-NEXT: li a2, 7 +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: mv a1, a3 +; CHECK-NEXT: tail memcpy entry: -; CHECK: caller_extern_pgso -; CHECK-NOT: call memcpy -; CHECK: tail memcpy tail call void @llvm.memcpy.p0.p0.i32(ptr @dest_pgso, ptr %src, i32 7, i1 false) ret void } @@ -38,19 +52,19 @@ entry: declare void @callee_indirect1() declare void @callee_indirect2() define void @caller_indirect_tail(i32 %a) nounwind { -; CHECK-LABEL: caller_indirect_tail -; CHECK-NOT: call callee_indirect1 -; CHECK-NOT: call callee_indirect2 -; CHECK-NOT: tail callee_indirect1 -; CHECK-NOT: tail callee_indirect2 - -; CHECK: lui a0, %hi(callee_indirect2) -; CHECK-NEXT: addi t1, a0, %lo(callee_indirect2) -; CHECK-NEXT: jr t1 - -; CHECK: lui a0, %hi(callee_indirect1) -; CHECK-NEXT: addi t1, a0, %lo(callee_indirect1) -; CHECK-NEXT: jr t1 +; CHECK-LABEL: caller_indirect_tail: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: beqz a0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: lui a0, %hi(callee_indirect2) +; CHECK-NEXT: addi t1, a0, %lo(callee_indirect2) +; CHECK-NEXT: jr t1 +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: lui a0, %hi(callee_indirect1) +; CHECK-NEXT: addi t1, a0, %lo(callee_indirect1) +; CHECK-NEXT: jr t1 + + entry: %tobool = icmp eq i32 %a, 0 %callee = select i1 %tobool, ptr @callee_indirect1, ptr @callee_indirect2 @@ -79,9 +93,21 @@ define i32 @caller_indirect_no_t0(ptr %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5 ; Do not tail call optimize functions with varargs passed by stack. declare i32 @callee_varargs(i32, ...) define void @caller_varargs(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: caller_varargs -; CHECK-NOT: tail callee_varargs -; CHECK: call callee_varargs +; CHECK-LABEL: caller_varargs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw a0, 0(sp) +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: mv a5, a1 +; CHECK-NEXT: mv a6, a1 +; CHECK-NEXT: mv a7, a0 +; CHECK-NEXT: call callee_varargs +; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret entry: %call = tail call i32 (i32, ...) @callee_varargs(i32 %a, i32 %b, i32 %b, i32 %a, i32 %a, i32 %b, i32 %b, i32 %a, i32 %a) ret void @@ -90,9 +116,26 @@ entry: ; Do not tail call optimize if stack is used to pass parameters. declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) nounwind { -; CHECK-LABEL: caller_args -; CHECK-NOT: tail callee_args -; CHECK: call callee_args +; CHECK-LABEL: caller_args: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 32(sp) +; CHECK-NEXT: lw t1, 36(sp) +; CHECK-NEXT: lw t2, 40(sp) +; CHECK-NEXT: lw t3, 44(sp) +; CHECK-NEXT: lw t4, 48(sp) +; CHECK-NEXT: lw t5, 52(sp) +; CHECK-NEXT: sw t5, 20(sp) +; CHECK-NEXT: sw t4, 16(sp) +; CHECK-NEXT: sw t3, 12(sp) +; CHECK-NEXT: sw t2, 8(sp) +; CHECK-NEXT: sw t1, 4(sp) +; CHECK-NEXT: sw t0, 0(sp) +; CHECK-NEXT: call callee_args +; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret entry: %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) ret i32 %r @@ -101,9 +144,20 @@ entry: ; Do not tail call optimize if parameters need to be passed indirectly. declare i32 @callee_indirect_args(fp128 %a) define void @caller_indirect_args() nounwind { -; CHECK-LABEL: caller_indirect_args -; CHECK-NOT: tail callee_indirect_args -; CHECK: call callee_indirect_args +; CHECK-LABEL: caller_indirect_args: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; CHECK-NEXT: lui a0, 262128 +; CHECK-NEXT: sw a0, 12(sp) +; CHECK-NEXT: sw zero, 8(sp) +; CHECK-NEXT: sw zero, 4(sp) +; CHECK-NEXT: mv a0, sp +; CHECK-NEXT: sw zero, 0(sp) +; CHECK-NEXT: call callee_indirect_args +; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret entry: %call = tail call i32 @callee_indirect_args(fp128 0xL00000000000000003FFF000000000000) ret void @@ -112,8 +166,9 @@ entry: ; Perform tail call optimization for external weak symbol. declare extern_weak void @callee_weak() define void @caller_weak() nounwind { -; CHECK-LABEL: caller_weak -; CHECK: tail callee_weak +; CHECK-LABEL: caller_weak: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tail callee_weak entry: tail call void @callee_weak() ret void @@ -123,24 +178,66 @@ entry: ; return to the hardware. Tail-calling another function would probably break ; this. declare void @callee_irq() -define void @caller_irq() #0 { -; CHECK-LABEL: caller_irq -; CHECK-NOT: tail callee_irq -; CHECK: call callee_irq +define void @caller_irq() nounwind "interrupt"="machine" { +; CHECK-LABEL: caller_irq: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -64 +; CHECK-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw t0, 56(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw t1, 52(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw t2, 48(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw a1, 40(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw a2, 36(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw a3, 32(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw a4, 28(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw a5, 24(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw a6, 20(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw a7, 16(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw t3, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw t4, 8(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw t5, 4(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw t6, 0(sp) # 4-byte Folded Spill +; CHECK-NEXT: call callee_irq +; CHECK-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw t0, 56(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw t1, 52(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw t2, 48(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw a1, 40(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw a2, 36(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw a3, 32(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw a4, 28(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw a5, 24(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw a6, 20(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw a7, 16(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw t3, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw t4, 8(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw t5, 4(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw t6, 0(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: mret entry: tail call void @callee_irq() ret void } -attributes #0 = { "interrupt"="machine" } ; Byval parameters hand the function a pointer directly into the stack area ; we want to reuse during a tail call. Do not tail call optimize functions with ; byval parameters. declare i32 @callee_byval(ptr byval(ptr) %a) define i32 @caller_byval() nounwind { -; CHECK-LABEL: caller_byval -; CHECK-NOT: tail callee_byval -; CHECK: call callee_byval +; CHECK-LABEL: caller_byval: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw a0, 8(sp) +; CHECK-NEXT: sw a0, 4(sp) +; CHECK-NEXT: addi a0, sp, 4 +; CHECK-NEXT: call callee_byval +; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret entry: %a = alloca ptr %r = tail call i32 @callee_byval(ptr byval(ptr) %a) @@ -153,9 +250,16 @@ entry: declare void @callee_struct(ptr sret(%struct.A) %a) define void @caller_nostruct() nounwind { -; CHECK-LABEL: caller_nostruct -; CHECK-NOT: tail callee_struct -; CHECK: call callee_struct +; CHECK-LABEL: caller_nostruct: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: lui a0, %hi(a) +; CHECK-NEXT: addi a0, a0, %lo(a) +; CHECK-NEXT: call callee_struct +; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret entry: tail call void @callee_struct(ptr sret(%struct.A) @a) ret void @@ -164,9 +268,14 @@ entry: ; Do not tail call optimize if caller uses structret semantics. declare void @callee_nostruct() define void @caller_struct(ptr sret(%struct.A) %a) nounwind { -; CHECK-LABEL: caller_struct -; CHECK-NOT: tail callee_nostruct -; CHECK: call callee_nostruct +; CHECK-LABEL: caller_struct: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: call callee_nostruct +; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret entry: tail call void @callee_nostruct() ret void @@ -175,8 +284,13 @@ entry: ; Do not tail call optimize if disabled. define i32 @disable_tail_calls(i32 %i) nounwind "disable-tail-calls"="true" { ; CHECK-LABEL: disable_tail_calls: -; CHECK-NOT: tail callee_nostruct -; CHECK: call callee_tail +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: call callee_tail +; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret entry: %rv = tail call i32 @callee_tail(i32 %i) ret i32 %rv @@ -189,10 +303,20 @@ declare i32 @test2() declare i32 @test3() define i32 @duplicate_returns(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: duplicate_returns: -; CHECK: tail test2 -; CHECK: tail test -; CHECK: tail test1 -; CHECK: tail test3 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: beqz a0, .LBB14_4 +; CHECK-NEXT: # %bb.1: # %if.else +; CHECK-NEXT: beqz a1, .LBB14_5 +; CHECK-NEXT: # %bb.2: # %if.else4 +; CHECK-NEXT: bge a1, a0, .LBB14_6 +; CHECK-NEXT: # %bb.3: # %if.then6 +; CHECK-NEXT: tail test2 +; CHECK-NEXT: .LBB14_4: # %if.then +; CHECK-NEXT: tail test +; CHECK-NEXT: .LBB14_5: # %if.then2 +; CHECK-NEXT: tail test1 +; CHECK-NEXT: .LBB14_6: # %if.else8 +; CHECK-NEXT: tail test3 entry: %cmp = icmp eq i32 %a, 0 br i1 %cmp, label %if.then, label %if.else