Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't over-optimize the abi layout #93405

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 0 additions & 34 deletions compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1101,40 +1101,6 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
ret.write_cvalue(fx, CValue::by_val(res, ret.layout()));
};

raw_eq, <T>(v lhs_ref, v rhs_ref) {
fn type_by_size(size: Size) -> Option<Type> {
Type::int(size.bits().try_into().ok()?)
}

let size = fx.layout_of(T).layout.size;
// FIXME add and use emit_small_memcmp
let is_eq_value =
if size == Size::ZERO {
// No bytes means they're trivially equal
fx.bcx.ins().iconst(types::I8, 1)
} else if let Some(clty) = type_by_size(size) {
// Can't use `trusted` for these loads; they could be unaligned.
let mut flags = MemFlags::new();
flags.set_notrap();
let lhs_val = fx.bcx.ins().load(clty, flags, lhs_ref, 0);
let rhs_val = fx.bcx.ins().load(clty, flags, rhs_ref, 0);
let eq = fx.bcx.ins().icmp(IntCC::Equal, lhs_val, rhs_val);
fx.bcx.ins().bint(types::I8, eq)
} else {
// Just call `memcmp` (like slices do in core) when the
// size is too large or it's not a power-of-two.
let signed_bytes = i64::try_from(size.bytes()).unwrap();
let bytes_val = fx.bcx.ins().iconst(fx.pointer_type, signed_bytes);
let params = vec![AbiParam::new(fx.pointer_type); 3];
let returns = vec![AbiParam::new(types::I32)];
let args = &[lhs_ref, rhs_ref, bytes_val];
let cmp = fx.lib_call("memcmp", params, returns, args)[0];
let eq = fx.bcx.ins().icmp_imm(IntCC::Equal, cmp, 0);
fx.bcx.ins().bint(types::I8, eq)
};
ret.write_cvalue(fx, CValue::by_val(is_eq_value, ret.layout()));
};

black_box, (c a) {
// FIXME implement black_box semantics
ret.write_cvalue(fx, a);
Expand Down
43 changes: 1 addition & 42 deletions compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@ mod simd;
use gccjit::{ComparisonOp, Function, RValue, ToRValue, Type, UnaryOp};
use rustc_codegen_ssa::MemFlags;
use rustc_codegen_ssa::base::wants_msvc_seh;
use rustc_codegen_ssa::common::{IntPredicate, span_invalid_monomorphization_error};
use rustc_codegen_ssa::common::span_invalid_monomorphization_error;
use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
use rustc_codegen_ssa::mir::place::PlaceRef;
use rustc_codegen_ssa::traits::{ArgAbiMethods, BaseTypeMethods, BuilderMethods, ConstMethods, IntrinsicCallMethods};
use rustc_middle::bug;
use rustc_middle::ty::{self, Instance, Ty};
use rustc_middle::ty::layout::LayoutOf;
use rustc_span::{Span, Symbol, symbol::kw, sym};
use rustc_target::abi::HasDataLayout;
use rustc_target::abi::call::{ArgAbi, FnAbi, PassMode};
use rustc_target::spec::PanicStrategy;

Expand Down Expand Up @@ -268,46 +267,6 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
}
}

sym::raw_eq => {
use rustc_target::abi::Abi::*;
let tp_ty = substs.type_at(0);
let layout = self.layout_of(tp_ty).layout;
let _use_integer_compare = match layout.abi {
Scalar(_) | ScalarPair(_, _) => true,
Uninhabited | Vector { .. } => false,
Aggregate { .. } => {
// For rusty ABIs, small aggregates are actually passed
// as `RegKind::Integer` (see `FnAbi::adjust_for_abi`),
// so we re-use that same threshold here.
layout.size <= self.data_layout().pointer_size * 2
}
};

let a = args[0].immediate();
let b = args[1].immediate();
if layout.size.bytes() == 0 {
self.const_bool(true)
}
/*else if use_integer_compare {
let integer_ty = self.type_ix(layout.size.bits()); // FIXME(antoyo): LLVM creates an integer of 96 bits for [i32; 3], but gcc doesn't support this, so it creates an integer of 128 bits.
let ptr_ty = self.type_ptr_to(integer_ty);
let a_ptr = self.bitcast(a, ptr_ty);
let a_val = self.load(integer_ty, a_ptr, layout.align.abi);
let b_ptr = self.bitcast(b, ptr_ty);
let b_val = self.load(integer_ty, b_ptr, layout.align.abi);
self.icmp(IntPredicate::IntEQ, a_val, b_val)
}*/
else {
let void_ptr_type = self.context.new_type::<*const ()>();
let a_ptr = self.bitcast(a, void_ptr_type);
let b_ptr = self.bitcast(b, void_ptr_type);
let n = self.context.new_cast(None, self.const_usize(layout.size.bytes()), self.sizet_type);
let builtin = self.context.get_builtin_function("memcmp");
let cmp = self.context.new_call(None, builtin, &[a_ptr, b_ptr, n]);
self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0))
}
}

sym::black_box => {
args[0].val.store(self, result);

Expand Down
5 changes: 0 additions & 5 deletions compiler/rustc_codegen_llvm/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,6 @@ impl<'ll> CodegenCx<'ll, '_> {
let t_i32 = self.type_i32();
let t_i64 = self.type_i64();
let t_i128 = self.type_i128();
let t_isize = self.type_isize();
let t_f32 = self.type_f32();
let t_f64 = self.type_f64();

Expand Down Expand Up @@ -816,10 +815,6 @@ impl<'ll> CodegenCx<'ll, '_> {
ifn!("llvm.assume", fn(i1) -> void);
ifn!("llvm.prefetch", fn(i8p, t_i32, t_i32, t_i32) -> void);

// This isn't an "LLVM intrinsic", but LLVM's optimization passes
// recognize it like one and we assume it exists in `core::slice::cmp`
ifn!("memcmp", fn(i8p, i8p, t_isize) -> t_i32);

// variadic intrinsics
ifn!("llvm.va_start", fn(i8p) -> void);
ifn!("llvm.va_end", fn(i8p) -> void);
Expand Down
37 changes: 0 additions & 37 deletions compiler/rustc_codegen_llvm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,43 +297,6 @@ impl<'ll, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'_, 'll, 'tcx> {
}
}

sym::raw_eq => {
use abi::Abi::*;
let tp_ty = substs.type_at(0);
let layout = self.layout_of(tp_ty).layout;
let use_integer_compare = match layout.abi {
Scalar(_) | ScalarPair(_, _) => true,
Uninhabited | Vector { .. } => false,
Aggregate { .. } => {
// For rusty ABIs, small aggregates are actually passed
// as `RegKind::Integer` (see `FnAbi::adjust_for_abi`),
// so we re-use that same threshold here.
layout.size <= self.data_layout().pointer_size * 2
}
};

let a = args[0].immediate();
let b = args[1].immediate();
if layout.size.bytes() == 0 {
self.const_bool(true)
} else if use_integer_compare {
let integer_ty = self.type_ix(layout.size.bits());
let ptr_ty = self.type_ptr_to(integer_ty);
let a_ptr = self.bitcast(a, ptr_ty);
let a_val = self.load(integer_ty, a_ptr, layout.align.abi);
let b_ptr = self.bitcast(b, ptr_ty);
let b_val = self.load(integer_ty, b_ptr, layout.align.abi);
self.icmp(IntPredicate::IntEQ, a_val, b_val)
} else {
let i8p_ty = self.type_i8p();
let a_ptr = self.bitcast(a, i8p_ty);
let b_ptr = self.bitcast(b, i8p_ty);
let n = self.const_usize(layout.size.bytes());
let cmp = self.call_intrinsic("memcmp", &[a_ptr, b_ptr, n]);
self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0))
}
}

sym::black_box => {
args[0].val.store(self, result);

Expand Down
19 changes: 0 additions & 19 deletions compiler/rustc_const_eval/src/interpret/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -478,10 +478,6 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
throw_ub_format!("`assume` intrinsic called with `false`");
}
}
sym::raw_eq => {
let result = self.raw_eq_intrinsic(&args[0], &args[1])?;
self.write_scalar(result, dest)?;
}
_ => return Ok(false),
}

Expand Down Expand Up @@ -590,19 +586,4 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
let bytes = std::iter::repeat(byte).take(len.bytes_usize());
self.memory.write_bytes(dst, bytes)
}

pub(crate) fn raw_eq_intrinsic(
&mut self,
lhs: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::PointerTag>,
rhs: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::PointerTag>,
) -> InterpResult<'tcx, Scalar<M::PointerTag>> {
let layout = self.layout_of(lhs.layout.ty.builtin_deref(true).unwrap().ty)?;
assert!(!layout.is_unsized());

let lhs = self.read_pointer(lhs)?;
let rhs = self.read_pointer(rhs)?;
let lhs_bytes = self.memory.read_bytes(lhs, layout.size)?;
let rhs_bytes = self.memory.read_bytes(rhs, layout.size)?;
Ok(Scalar::from_bool(lhs_bytes == rhs_bytes))
}
}
31 changes: 13 additions & 18 deletions compiler/rustc_middle/src/ty/layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use rustc_session::{config::OptLevel, DataTypeKind, FieldInfo, SizeKind, Variant
use rustc_span::symbol::Symbol;
use rustc_span::{Span, DUMMY_SP};
use rustc_target::abi::call::{
ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, Conv, FnAbi, PassMode, Reg, RegKind,
ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, Conv, FnAbi, PassMode,
};
use rustc_target::abi::*;
use rustc_target::spec::{abi::Abi as SpecAbi, HasTargetSpec, PanicStrategy, Target};
Expand Down Expand Up @@ -3182,7 +3182,17 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> {
}

match arg.layout.abi {
Abi::Aggregate { .. } => {}
Abi::Aggregate { .. } => {
// Pass and return structures up to 2 pointers in size by value,
// matching `ScalarPair`. LLVM will usually pass these in 2 registers
// which is more efficient than by-ref.
let max_by_val_size = Pointer.size(self) * 2;
let size = arg.layout.size;

if arg.layout.is_unsized() || size > max_by_val_size {
arg.make_indirect();
}
}

// This is a fun case! The gist of what this is doing is
// that we want callers and callees to always agree on the
Expand All @@ -3208,24 +3218,9 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> {
&& self.tcx.sess.target.simd_types_indirect =>
{
arg.make_indirect();
return;
}

_ => return,
}

// Pass and return structures up to 2 pointers in size by value, matching `ScalarPair`.
// LLVM will usually pass these in 2 registers, which is more efficient than by-ref.
let max_by_val_size = Pointer.size(self) * 2;
let size = arg.layout.size;

if arg.layout.is_unsized() || size > max_by_val_size {
arg.make_indirect();
} else {
// We want to pass small aggregates as immediates, but using
// a LLVM aggregate type for this leads to bad optimizations,
// so we pick an appropriately sized integer type instead.
arg.cast_to(Reg { kind: RegKind::Integer, size });
_ => {}
}
};
fixup(&mut fn_abi.ret);
Expand Down
1 change: 0 additions & 1 deletion compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1069,7 +1069,6 @@ symbols! {
quote,
range_inclusive_new,
raw_dylib,
raw_eq,
raw_identifiers,
raw_ref_op,
re_rebalance_coherence,
Expand Down
7 changes: 0 additions & 7 deletions compiler/rustc_typeck/src/check/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -387,13 +387,6 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) {

sym::nontemporal_store => (1, vec![tcx.mk_mut_ptr(param(0)), param(0)], tcx.mk_unit()),

sym::raw_eq => {
let br = ty::BoundRegion { var: ty::BoundVar::from_u32(0), kind: ty::BrAnon(0) };
let param_ty =
tcx.mk_imm_ref(tcx.mk_region(ty::ReLateBound(ty::INNERMOST, br)), param(0));
(1, vec![param_ty; 2], tcx.types.bool)
}

sym::black_box => (1, vec![param(0)], param(0)),

sym::const_eval_select => (4, vec![param(0), param(1), param(2)], param(3)),
Expand Down
91 changes: 2 additions & 89 deletions library/core/src/array/equality.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
use crate::convert::TryInto;
use crate::num::{NonZeroI128, NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI8, NonZeroIsize};
use crate::num::{NonZeroU128, NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize};

#[stable(feature = "rust1", since = "1.0.0")]
impl<A, B, const N: usize> PartialEq<[B; N]> for [A; N]
Expand All @@ -9,11 +7,11 @@ where
{
#[inline]
fn eq(&self, other: &[B; N]) -> bool {
SpecArrayEq::spec_eq(self, other)
self[..] == other[..]
}
#[inline]
fn ne(&self, other: &[B; N]) -> bool {
SpecArrayEq::spec_ne(self, other)
self[..] != other[..]
}
}

Expand Down Expand Up @@ -129,88 +127,3 @@ where

#[stable(feature = "rust1", since = "1.0.0")]
impl<T: Eq, const N: usize> Eq for [T; N] {}

trait SpecArrayEq<Other, const N: usize>: Sized {
fn spec_eq(a: &[Self; N], b: &[Other; N]) -> bool;
fn spec_ne(a: &[Self; N], b: &[Other; N]) -> bool;
}

impl<T: PartialEq<Other>, Other, const N: usize> SpecArrayEq<Other, N> for T {
default fn spec_eq(a: &[Self; N], b: &[Other; N]) -> bool {
a[..] == b[..]
}
default fn spec_ne(a: &[Self; N], b: &[Other; N]) -> bool {
a[..] != b[..]
}
}

impl<T: IsRawEqComparable<U>, U, const N: usize> SpecArrayEq<U, N> for T {
fn spec_eq(a: &[T; N], b: &[U; N]) -> bool {
// SAFETY: This is why `IsRawEqComparable` is an `unsafe trait`.
unsafe {
let b = &*b.as_ptr().cast::<[T; N]>();
crate::intrinsics::raw_eq(a, b)
}
}
fn spec_ne(a: &[T; N], b: &[U; N]) -> bool {
!Self::spec_eq(a, b)
}
}

/// `U` exists on here mostly because `min_specialization` didn't let me
/// repeat the `T` type parameter in the above specialization, so instead
/// the `T == U` constraint comes from the impls on this.
/// # Safety
/// - Neither `Self` nor `U` has any padding.
/// - `Self` and `U` have the same layout.
/// - `Self: PartialEq<U>` is byte-wise (this means no floats, among other things)
#[rustc_specialization_trait]
unsafe trait IsRawEqComparable<U>: PartialEq<U> {}

macro_rules! is_raw_eq_comparable {
($($t:ty),+ $(,)?) => {$(
unsafe impl IsRawEqComparable<$t> for $t {}
)+};
}

// SAFETY: All the ordinary integer types allow all bit patterns as distinct values
is_raw_eq_comparable!(u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize);

// SAFETY: bool and char have *niches*, but no *padding*, so this is sound
is_raw_eq_comparable!(bool, char);

// SAFETY: Similarly, the non-zero types have a niche, but no undef,
// and they compare like their underlying numeric type.
is_raw_eq_comparable!(
NonZeroU8,
NonZeroU16,
NonZeroU32,
NonZeroU64,
NonZeroU128,
NonZeroUsize,
NonZeroI8,
NonZeroI16,
NonZeroI32,
NonZeroI64,
NonZeroI128,
NonZeroIsize,
);

// SAFETY: The NonZero types have the "null" optimization guaranteed, and thus
// are also safe to equality-compare bitwise inside an `Option`.
// The way `PartialOrd` is defined for `Option` means that this wouldn't work
// for `<` or `>` on the signed types, but since we only do `==` it's fine.
is_raw_eq_comparable!(
Option<NonZeroU8>,
Option<NonZeroU16>,
Option<NonZeroU32>,
Option<NonZeroU64>,
Option<NonZeroU128>,
Option<NonZeroUsize>,
Option<NonZeroI8>,
Option<NonZeroI16>,
Option<NonZeroI32>,
Option<NonZeroI64>,
Option<NonZeroI128>,
Option<NonZeroIsize>,
);
Loading