Skip to content

Commit

Permalink
x86: use SSE registers to return float values
Browse files Browse the repository at this point in the history
  • Loading branch information
RalfJung committed Jan 12, 2025
1 parent 1a6422e commit 1c0655d
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 58 deletions.
9 changes: 5 additions & 4 deletions compiler/rustc_target/src/callconv/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
/// Pass this argument directly instead. Should NOT be used!
/// Only exists because of past ABI mistakes that will take time to fix
/// (see <https://github.com/rust-lang/rust/issues/115666>).
#[track_caller]
pub fn make_direct_deprecated(&mut self) {
match self.mode {
PassMode::Indirect { .. } => {
Expand All @@ -401,6 +402,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {

/// Pass this argument indirectly, by passing a (thin or wide) pointer to the argument instead.
/// This is valid for both sized and unsized arguments.
#[track_caller]
pub fn make_indirect(&mut self) {
match self.mode {
PassMode::Direct(_) | PassMode::Pair(_, _) => {
Expand All @@ -415,6 +417,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {

/// Same as `make_indirect`, but for arguments that are ignored. Only needed for ABIs that pass
/// ZSTs indirectly.
#[track_caller]
pub fn make_indirect_from_ignore(&mut self) {
match self.mode {
PassMode::Ignore => {
Expand Down Expand Up @@ -773,9 +776,9 @@ impl<'a, Ty> FnAbi<'a, Ty> {

if arg_idx.is_none()
&& arg.layout.size > Pointer(AddressSpace::DATA).size(cx) * 2
&& !matches!(arg.layout.backend_repr, BackendRepr::Vector { .. })
&& arg.layout.is_aggregate()
{
// Return values larger than 2 registers using a return area
// Return aggregate values larger than 2 registers using a return area
// pointer. LLVM and Cranelift disagree about how to return
// values that don't fit in the registers designated for return
// values. LLVM will force the entire return value to be passed
Expand Down Expand Up @@ -813,8 +816,6 @@ impl<'a, Ty> FnAbi<'a, Ty> {
// rustc_target already ensure any return value which doesn't
// fit in the available amount of return registers is passed in
// the right way for the current target.
// The adjustment is also not necessary nor desired for types with
// a vector representation; those are handled below.
arg.make_indirect();
continue;
}
Expand Down
12 changes: 9 additions & 3 deletions compiler/rustc_target/src/callconv/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ use crate::abi::call::{ArgAttribute, FnAbi, PassMode, Reg, RegKind};
use crate::abi::{
AddressSpace, Align, BackendRepr, Float, HasDataLayout, Pointer, TyAbiInterface, TyAndLayout,
};
use crate::spec::HasTargetSpec;
use crate::spec::abi::Abi as SpecAbi;
use crate::spec::{HasTargetSpec, RustAbi};

#[derive(PartialEq)]
pub(crate) enum Flavor {
Expand Down Expand Up @@ -234,8 +234,14 @@ where
_ => false, // anyway not passed via registers on x86
};
if has_float {
if fn_abi.ret.layout.size <= Pointer(AddressSpace::DATA).size(cx) {
// Same size or smaller than pointer, return in a register.
if cx.target_spec().rust_abi == Some(RustAbi::X86Sse2)
&& fn_abi.ret.layout.backend_repr.is_scalar()
&& fn_abi.ret.layout.size.bits() <= 128
{
// This is a single scalar that fits into an SSE register.
fn_abi.ret.cast_to(Reg { kind: RegKind::Vector, size: fn_abi.ret.layout.size });
} else if fn_abi.ret.layout.size <= Pointer(AddressSpace::DATA).size(cx) {
// Same size or smaller than pointer, return in an integer register.
fn_abi.ret.cast_to(Reg { kind: RegKind::Integer, size: fn_abi.ret.layout.size });
} else {
// Larger than a pointer, return indirectly.
Expand Down
110 changes: 59 additions & 51 deletions tests/assembly/x86-return-float.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,30 @@
//@ assembly-output: emit-asm
//@ only-x86
// FIXME(#114479): LLVM miscompiles loading and storing `f32` and `f64` when SSE is disabled.
// There's no compiletest directive to ignore a test on i586 only, so just always explicitly enable
// SSE2.
// Use the same target CPU as `i686` so that LLVM orders the instructions in the same order.
//@ compile-flags: -Ctarget-feature=+sse2 -Ctarget-cpu=pentium4
//@ needs-llvm-components: x86
//@ revisions: sse nosse
//@[sse] compile-flags: --target i686-unknown-linux-gnu
// We make SSE available but don't use it for the ABI.
//@[nosse] compile-flags: --target i586-unknown-linux-gnu -Ctarget-feature=+sse2 -Ctarget-cpu=pentium4

// Force frame pointers to make ASM more consistent between targets
//@ compile-flags: -O -C force-frame-pointers
//@ filecheck-flags: --implicit-check-not fld --implicit-check-not fst
//@ revisions: normal win
//@[normal] ignore-windows
//@[win] only-windows

#![crate_type = "lib"]
#![feature(f16, f128)]
#![feature(no_core, lang_items, rustc_attrs, repr_simd)]
#![no_core]
#![crate_type = "lib"]

#[lang = "sized"]
trait Sized {}

#[lang = "copy"]
trait Copy {}

impl Copy for f16 {}
impl Copy for f32 {}
impl Copy for f64 {}
impl Copy for f128 {}
impl Copy for usize {}

// Tests that returning `f32` and `f64` with the "Rust" ABI on 32-bit x86 doesn't use the x87
// floating point stack, as loading and storing `f32`s and `f64`s to and from the x87 stack quietens
Expand All @@ -24,7 +35,8 @@
// CHECK-LABEL: return_f32:
#[no_mangle]
pub fn return_f32(x: f32) -> f32 {
// CHECK: movl {{.*}}(%ebp), %eax
// sse: movss {{.*}}(%ebp), %xmm0
// nosse: movl {{.*}}(%ebp), %eax
// CHECK-NOT: ax
// CHECK: retl
x
Expand All @@ -33,9 +45,11 @@ pub fn return_f32(x: f32) -> f32 {
// CHECK-LABEL: return_f64:
#[no_mangle]
pub fn return_f64(x: f64) -> f64 {
// CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
// CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL:.*]]
// CHECK-NEXT: movsd %[[VAL]], (%[[PTR]])
// nosse: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
// nosse-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL:.*]]
// nosse-NEXT: movsd %[[VAL]], (%[[PTR]])
// sse: movsd {{.*}}(%ebp), %xmm0
// sse-NOT: ax
// CHECK: retl
x
}
Expand Down Expand Up @@ -148,7 +162,8 @@ pub unsafe fn call_f32(x: &mut f32) {
}
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
// CHECK: calll {{()|_}}get_f32
// CHECK-NEXT: movl %eax, (%[[PTR]])
// sse-NEXT: movss %xmm0, (%[[PTR]])
// nosse-NEXT: movl %eax, (%[[PTR]])
*x = get_f32();
}

Expand All @@ -160,8 +175,9 @@ pub unsafe fn call_f64(x: &mut f64) {
}
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
// CHECK: calll {{()|_}}get_f64
// CHECK: movsd {{.*}}(%{{ebp|esp}}), %[[VAL:.*]]
// CHECK-NEXT: movsd %[[VAL:.*]], (%[[PTR]])
// sse: movlps %xmm0, (%[[PTR]])
// nosse: movsd {{.*}}(%{{ebp|esp}}), %[[VAL:.*]]
// nosse-NEXT: movsd %[[VAL:.*]], (%[[PTR]])
*x = get_f64();
}

Expand Down Expand Up @@ -190,10 +206,8 @@ pub unsafe fn call_f64_f64(x: &mut (f64, f64)) {
}
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
// CHECK: calll {{()|_}}get_f64_f64
// normal: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
// normal-NEXT: movsd [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
// win: movsd (%esp), %[[VAL1:.*]]
// win-NEXT: movsd 8(%esp), %[[VAL2:.*]]
// CHECK: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
// CHECK-NEXT: movsd [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
// CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
// CHECK-NEXT: movsd %[[VAL2]], 8(%[[PTR]])
*x = get_f64_f64();
Expand All @@ -207,13 +221,10 @@ pub unsafe fn call_f32_f64(x: &mut (f32, f64)) {
}
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
// CHECK: calll {{()|_}}get_f32_f64
// normal: movss [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
// normal-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]]
// win: movss (%esp), %[[VAL1:.*]]
// win-NEXT: movsd 8(%esp), %[[VAL2:.*]]
// CHECK: movss [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
// CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]]
// CHECK-NEXT: movss %[[VAL1]], (%[[PTR]])
// normal-NEXT: movsd %[[VAL2]], 4(%[[PTR]])
// win-NEXT: movsd %[[VAL2]], 8(%[[PTR]])
// CHECK-NEXT: movsd %[[VAL2]], 4(%[[PTR]])
*x = get_f32_f64();
}

Expand All @@ -225,10 +236,8 @@ pub unsafe fn call_f64_f32(x: &mut (f64, f32)) {
}
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
// CHECK: calll {{()|_}}get_f64_f32
// normal: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
// normal-NEXT: movss [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
// win: movsd (%esp), %[[VAL1:.*]]
// win-NEXT: movss 8(%esp), %[[VAL2:.*]]
// CHECK: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
// CHECK-NEXT: movss [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
// CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
// CHECK-NEXT: movss %[[VAL2]], 8(%[[PTR]])
*x = get_f64_f32();
Expand Down Expand Up @@ -257,10 +266,8 @@ pub unsafe fn call_f64_other(x: &mut (f64, usize)) {
}
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
// CHECK: calll {{()|_}}get_f64_other
// normal: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
// normal-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
// win: movsd (%esp), %[[VAL1:.*]]
// win-NEXT: movl 8(%esp), %[[VAL2:.*]]
// CHECK: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
// CHECK-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
// CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
// CHECK-NEXT: movl %[[VAL2]], 8(%[[PTR]])
*x = get_f64_other();
Expand Down Expand Up @@ -289,13 +296,10 @@ pub unsafe fn call_other_f64(x: &mut (usize, f64)) {
}
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
// CHECK: calll {{()|_}}get_other_f64
// normal: movl [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
// normal-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]]
// win: movl (%esp), %[[VAL1:.*]]
// win-NEXT: movsd 8(%esp), %[[VAL2:.*]]
// CHECK: movl [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
// CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]]
// CHECK-NEXT: movl %[[VAL1]], (%[[PTR]])
// normal-NEXT: movsd %[[VAL2]], 4(%[[PTR]])
// win-NEXT: movsd %[[VAL2]], 8(%[[PTR]])
// CHECK-NEXT: movsd %[[VAL2]], 4(%[[PTR]])
*x = get_other_f64();
}

Expand All @@ -307,7 +311,8 @@ pub unsafe fn call_other_f64(x: &mut (usize, f64)) {
pub fn return_f16(x: f16) -> f16 {
// CHECK: pushl %ebp
// CHECK: movl %esp, %ebp
// CHECK: movzwl 8(%ebp), %eax
// nosse: movzwl 8(%ebp), %eax
// sse: pinsrw $0, 8(%ebp), %xmm0
// CHECK: popl %ebp
// CHECK: retl
x
Expand All @@ -316,15 +321,18 @@ pub fn return_f16(x: f16) -> f16 {
// CHECK-LABEL: return_f128:
#[no_mangle]
pub fn return_f128(x: f128) -> f128 {
// CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
// CHECK-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
// CHECK-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
// CHECK-NEXT: movl [[#%d,OFFSET+12]](%ebp), %[[VAL3:.*]]
// CHECK-NEXT: movl [[#%d,OFFSET+16]](%ebp), %[[VAL4:.*]]
// CHECK-NEXT: movl %[[VAL4:.*]] 12(%[[PTR]])
// CHECK-NEXT: movl %[[VAL3:.*]] 8(%[[PTR]])
// CHECK-NEXT: movl %[[VAL2:.*]] 4(%[[PTR]])
// CHECK-NEXT: movl %[[VAL1:.*]] (%[[PTR]])
// CHECK: pushl %ebp
// sse: movaps [[#%d,OFFSET:]](%ebp), %xmm0
// nosse: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
// nosse-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
// nosse-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
// nosse-NEXT: movl [[#%d,OFFSET+12]](%ebp), %[[VAL3:.*]]
// nosse-NEXT: movl [[#%d,OFFSET+16]](%ebp), %[[VAL4:.*]]
// nosse-NEXT: movl %[[VAL4:.*]] 12(%[[PTR]])
// nosse-NEXT: movl %[[VAL3:.*]] 8(%[[PTR]])
// nosse-NEXT: movl %[[VAL2:.*]] 4(%[[PTR]])
// nosse-NEXT: movl %[[VAL1:.*]] (%[[PTR]])
// CHECK: popl %ebp
// CHECK: retl
x
}

0 comments on commit 1c0655d

Please sign in to comment.