Skip to content

Commit

Permalink
De-LLVM the unchecked shifts [MCP#693]
Browse files Browse the repository at this point in the history
This is just one part of the MCP, but it's the one that IMHO removes the most noise from the standard library code.

Seems net simpler this way, since MIR already supported heterogeneous shifts anyway, and thus it's not more work for backends than before.
  • Loading branch information
scottmcm committed Mar 30, 2024
1 parent 69fa40c commit 122df8a
Show file tree
Hide file tree
Showing 36 changed files with 433 additions and 575 deletions.
32 changes: 28 additions & 4 deletions compiler/rustc_codegen_ssa/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::back::write::{
compute_per_cgu_lto_type, start_async_codegen, submit_codegened_module_to_llvm,
submit_post_lto_module_to_llvm, submit_pre_lto_module_to_llvm, ComputedLtoType, OngoingCodegen,
};
use crate::common::{IntPredicate, RealPredicate, TypeKind};
use crate::common::{self, IntPredicate, RealPredicate, TypeKind};
use crate::errors;
use crate::meth;
use crate::mir;
Expand Down Expand Up @@ -33,7 +33,7 @@ use rustc_middle::mir::mono::{CodegenUnit, CodegenUnitNameBuilder, MonoItem};
use rustc_middle::query::Providers;
use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf, TyAndLayout};
use rustc_middle::ty::{self, Instance, Ty, TyCtxt};
use rustc_session::config::{self, CrateType, EntryFnType, OutputType};
use rustc_session::config::{self, CrateType, EntryFnType, OptLevel, OutputType};
use rustc_session::Session;
use rustc_span::symbol::sym;
use rustc_span::Symbol;
Expand Down Expand Up @@ -300,14 +300,32 @@ pub fn coerce_unsized_into<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
}
}

pub fn cast_shift_expr_rhs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
/// Shifts in MIR are all allowed to have mismatched LHS & RHS types.
///
/// This does all the appropriate conversions needed to pass it to the builder's
/// shift methods, which are UB for out-of-range shifts.
///
/// If `is_unchecked` is false, this masks the RHS to ensure it stays in-bounds.
/// For 32- and 64-bit types, this matches the semantics
/// of Java. (See related discussion on #1877 and #10183.)
///
/// If `is_unchecked` is true, this does no masking, and adds sufficient `assume`
/// calls or operation flags to preserve as much freedom to optimize as possible.
pub fn build_shift_expr_rhs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
bx: &mut Bx,
lhs: Bx::Value,
rhs: Bx::Value,
mut rhs: Bx::Value,
is_unchecked: bool,
) -> Bx::Value {
// Shifts may have any size int on the rhs
let mut rhs_llty = bx.cx().val_ty(rhs);
let mut lhs_llty = bx.cx().val_ty(lhs);

let mask = common::shift_mask_val(bx, lhs_llty, rhs_llty, false);
if !is_unchecked {
rhs = bx.and(rhs, mask);
}

if bx.cx().type_kind(rhs_llty) == TypeKind::Vector {
rhs_llty = bx.cx().element_type(rhs_llty)
}
Expand All @@ -317,6 +335,12 @@ pub fn cast_shift_expr_rhs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
let rhs_sz = bx.cx().int_width(rhs_llty);
let lhs_sz = bx.cx().int_width(lhs_llty);
if lhs_sz < rhs_sz {
if is_unchecked && bx.sess().opts.optimize != OptLevel::No {
// FIXME: Use `trunc nuw` once that's available
let inrange = bx.icmp(IntPredicate::IntULE, rhs, mask);
bx.assume(inrange);
}

bx.trunc(rhs, lhs_llty)
} else if lhs_sz > rhs_sz {
// We zero-extend even if the RHS is signed. So e.g. `(x: i32) << -1i8` will zero-extend the
Expand Down
41 changes: 1 addition & 40 deletions compiler/rustc_codegen_ssa/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
use rustc_hir::LangItem;
use rustc_middle::mir;
use rustc_middle::ty::Instance;
use rustc_middle::ty::{self, layout::TyAndLayout, Ty, TyCtxt};
use rustc_middle::ty::{self, layout::TyAndLayout, TyCtxt};
use rustc_span::Span;

use crate::base;
use crate::traits::*;

#[derive(Copy, Clone)]
Expand Down Expand Up @@ -128,44 +127,6 @@ pub fn build_langcall<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
(bx.fn_abi_of_instance(instance, ty::List::empty()), bx.get_fn_addr(instance), instance)
}

// To avoid UB from LLVM, these two functions mask RHS with an
// appropriate mask unconditionally (i.e., the fallback behavior for
// all shifts). For 32- and 64-bit types, this matches the semantics
// of Java. (See related discussion on #1877 and #10183.)

pub fn build_masked_lshift<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
bx: &mut Bx,
lhs: Bx::Value,
rhs: Bx::Value,
) -> Bx::Value {
let rhs = base::cast_shift_expr_rhs(bx, lhs, rhs);
// #1877, #10183: Ensure that input is always valid
let rhs = shift_mask_rhs(bx, rhs);
bx.shl(lhs, rhs)
}

pub fn build_masked_rshift<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
bx: &mut Bx,
lhs_t: Ty<'tcx>,
lhs: Bx::Value,
rhs: Bx::Value,
) -> Bx::Value {
let rhs = base::cast_shift_expr_rhs(bx, lhs, rhs);
// #1877, #10183: Ensure that input is always valid
let rhs = shift_mask_rhs(bx, rhs);
let is_signed = lhs_t.is_signed();
if is_signed { bx.ashr(lhs, rhs) } else { bx.lshr(lhs, rhs) }
}

fn shift_mask_rhs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
bx: &mut Bx,
rhs: Bx::Value,
) -> Bx::Value {
let rhs_llty = bx.val_ty(rhs);
let shift_val = shift_mask_val(bx, rhs_llty, rhs_llty, false);
bx.and(rhs, shift_val)
}

pub fn shift_mask_val<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
bx: &mut Bx,
llty: Bx::Type,
Expand Down
12 changes: 5 additions & 7 deletions compiler/rustc_codegen_ssa/src/mir/rvalue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use super::place::PlaceRef;
use super::{FunctionCx, LocalRef};

use crate::base;
use crate::common::{self, IntPredicate};
use crate::common::IntPredicate;
use crate::traits::*;
use crate::MemFlags;

Expand Down Expand Up @@ -860,14 +860,12 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
bx.inbounds_gep(llty, lhs, &[rhs])
}
}
mir::BinOp::Shl => common::build_masked_lshift(bx, lhs, rhs),
mir::BinOp::ShlUnchecked => {
let rhs = base::cast_shift_expr_rhs(bx, lhs, rhs);
mir::BinOp::Shl | mir::BinOp::ShlUnchecked => {
let rhs = base::build_shift_expr_rhs(bx, lhs, rhs, op == mir::BinOp::ShlUnchecked);
bx.shl(lhs, rhs)
}
mir::BinOp::Shr => common::build_masked_rshift(bx, input_ty, lhs, rhs),
mir::BinOp::ShrUnchecked => {
let rhs = base::cast_shift_expr_rhs(bx, lhs, rhs);
mir::BinOp::Shr | mir::BinOp::ShrUnchecked => {
let rhs = base::build_shift_expr_rhs(bx, lhs, rhs, op == mir::BinOp::ShrUnchecked);
if is_signed { bx.ashr(lhs, rhs) } else { bx.lshr(lhs, rhs) }
}
mir::BinOp::Ne
Expand Down
5 changes: 3 additions & 2 deletions compiler/rustc_hir_analysis/src/check/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -454,9 +454,10 @@ pub fn check_intrinsic_type(
sym::unchecked_div | sym::unchecked_rem | sym::exact_div => {
(1, 0, vec![param(0), param(0)], param(0))
}
sym::unchecked_shl | sym::unchecked_shr | sym::rotate_left | sym::rotate_right => {
(1, 0, vec![param(0), param(0)], param(0))
sym::unchecked_shl | sym::unchecked_shr => {
(2, 0, vec![param(0), param(1)], param(0))
}
sym::rotate_left | sym::rotate_right => (1, 0, vec![param(0), param(0)], param(0)),
sym::unchecked_add | sym::unchecked_sub | sym::unchecked_mul => {
(1, 0, vec![param(0), param(0)], param(0))
}
Expand Down
16 changes: 16 additions & 0 deletions compiler/rustc_middle/src/mir/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1442,6 +1442,22 @@ pub enum BinOp {
Offset,
}

impl BinOp {
/// If there's an `Unchecked` version of this `BinOp`, return that.
/// Otherwise return `None`.
pub fn to_unchecked(self) -> Option<Self> {
use BinOp::*;
Some(match self {
Add => AddUnchecked,
Sub => SubUnchecked,
Mul => MulUnchecked,
Shl => ShlUnchecked,
Shr => ShrUnchecked,
_ => return None,
})
}
}

// Some nodes are used a lot. Make sure they don't unintentionally get bigger.
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
mod size_asserts {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,14 @@ impl<'tcx, 'body> ParseCtxt<'tcx, 'body> {
)),
)
},
@call(mir_unchecked, args) => {
parse_by_kind!(self, args[0], _, "binary op",
ExprKind::Binary { op, lhs, rhs } => Ok(Rvalue::BinaryOp(
op.to_unchecked().unwrap_or_else(|| bug!("No unchecked version of {op:?}")),
Box::new((self.parse_operand(*lhs)?, self.parse_operand(*rhs)?)),
)),
)
},
@call(mir_offset, args) => {
let ptr = self.parse_operand(args[0])?;
let offset = self.parse_operand(args[1])?;
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1168,6 +1168,7 @@ symbols! {
mir_static_mut,
mir_storage_dead,
mir_storage_live,
mir_unchecked,
mir_unreachable,
mir_unwind_cleanup,
mir_unwind_continue,
Expand Down
6 changes: 4 additions & 2 deletions library/core/src/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2224,18 +2224,20 @@ extern "rust-intrinsic" {
/// Safe wrappers for this intrinsic are available on the integer
/// primitives via the `checked_shl` method. For example,
/// [`u32::checked_shl`]
#[cfg(not(bootstrap))]
#[rustc_const_stable(feature = "const_int_unchecked", since = "1.40.0")]
#[rustc_nounwind]
pub fn unchecked_shl<T: Copy>(x: T, y: T) -> T;
pub fn unchecked_shl<T: Copy, U: Copy>(x: T, y: U) -> T;
/// Performs an unchecked right shift, resulting in undefined behavior when
/// `y < 0` or `y >= N`, where N is the width of T in bits.
///
/// Safe wrappers for this intrinsic are available on the integer
/// primitives via the `checked_shr` method. For example,
/// [`u32::checked_shr`]
#[cfg(not(bootstrap))]
#[rustc_const_stable(feature = "const_int_unchecked", since = "1.40.0")]
#[rustc_nounwind]
pub fn unchecked_shr<T: Copy>(x: T, y: T) -> T;
pub fn unchecked_shr<T: Copy, U: Copy>(x: T, y: U) -> T;

/// Returns the result of an unchecked addition, resulting in
/// undefined behavior when `x + y > T::MAX` or `x + y < T::MIN`.
Expand Down
2 changes: 2 additions & 0 deletions library/core/src/intrinsics/mir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@
//! - Unary and binary operations use their normal Rust syntax - `a * b`, `!c`, etc.
//! - The binary operation `Offset` can be created via [`Offset`].
//! - Checked binary operations are represented by wrapping the associated binop in [`Checked`].
//! - Unchecked binary operations are represented by wrapping the associated binop in [`Unchecked`].
//! - Array repetition syntax (`[foo; 10]`) creates the associated rvalue.
//!
//! #### Terminators
Expand Down Expand Up @@ -359,6 +360,7 @@ define!("mir_storage_dead", fn StorageDead<T>(local: T));
define!("mir_assume", fn Assume(operand: bool));
define!("mir_deinit", fn Deinit<T>(place: T));
define!("mir_checked", fn Checked<T>(binop: T) -> (T, bool));
define!("mir_unchecked", fn Unchecked<T>(binop: T) -> T);
define!("mir_len", fn Len<T>(place: T) -> usize);
define!("mir_copy_for_deref", fn CopyForDeref<T>(place: T) -> T);
define!("mir_retag", fn Retag<T>(place: T));
Expand Down
32 changes: 24 additions & 8 deletions library/core/src/num/int_macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1253,10 +1253,18 @@ macro_rules! int_impl {
#[inline(always)]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub const unsafe fn unchecked_shl(self, rhs: u32) -> Self {
// SAFETY: the caller must uphold the safety contract for
// `unchecked_shl`.
// Any legal shift amount is losslessly representable in the self type.
unsafe { intrinsics::unchecked_shl(self, conv_rhs_for_unchecked_shift!($SelfT, rhs)) }
#[cfg(bootstrap)]
{
// For bootstrapping, just use built-in primitive shift.
// panicking is a legal manifestation of UB
self << rhs
}
#[cfg(not(bootstrap))]
{
// SAFETY: the caller must uphold the safety contract for
// `unchecked_shl`.
unsafe { intrinsics::unchecked_shl(self, rhs) }
}
}

/// Checked shift right. Computes `self >> rhs`, returning `None` if `rhs` is
Expand Down Expand Up @@ -1336,10 +1344,18 @@ macro_rules! int_impl {
#[inline(always)]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub const unsafe fn unchecked_shr(self, rhs: u32) -> Self {
// SAFETY: the caller must uphold the safety contract for
// `unchecked_shr`.
// Any legal shift amount is losslessly representable in the self type.
unsafe { intrinsics::unchecked_shr(self, conv_rhs_for_unchecked_shift!($SelfT, rhs)) }
#[cfg(bootstrap)]
{
// For bootstrapping, just use built-in primitive shift.
// panicking is a legal manifestation of UB
self >> rhs
}
#[cfg(not(bootstrap))]
{
// SAFETY: the caller must uphold the safety contract for
// `unchecked_shr`.
unsafe { intrinsics::unchecked_shr(self, rhs) }
}
}

/// Checked absolute value. Computes `self.abs()`, returning `None` if
Expand Down
11 changes: 0 additions & 11 deletions library/core/src/num/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,17 +286,6 @@ macro_rules! widening_impl {
};
}

macro_rules! conv_rhs_for_unchecked_shift {
($SelfT:ty, $x:expr) => {{
// If the `as` cast will truncate, ensure we still tell the backend
// that the pre-truncation value was also small.
if <$SelfT>::BITS < 32 {
intrinsics::assume($x <= (<$SelfT>::MAX as u32));
}
$x as $SelfT
}};
}

impl i8 {
int_impl! {
Self = i8,
Expand Down
32 changes: 24 additions & 8 deletions library/core/src/num/uint_macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1313,10 +1313,18 @@ macro_rules! uint_impl {
#[inline(always)]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub const unsafe fn unchecked_shl(self, rhs: u32) -> Self {
// SAFETY: the caller must uphold the safety contract for
// `unchecked_shl`.
// Any legal shift amount is losslessly representable in the self type.
unsafe { intrinsics::unchecked_shl(self, conv_rhs_for_unchecked_shift!($SelfT, rhs)) }
#[cfg(bootstrap)]
{
// For bootstrapping, just use built-in primitive shift.
// panicking is a legal manifestation of UB
self << rhs
}
#[cfg(not(bootstrap))]
{
// SAFETY: the caller must uphold the safety contract for
// `unchecked_shl`.
unsafe { intrinsics::unchecked_shl(self, rhs) }
}
}

/// Checked shift right. Computes `self >> rhs`, returning `None`
Expand Down Expand Up @@ -1396,10 +1404,18 @@ macro_rules! uint_impl {
#[inline(always)]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub const unsafe fn unchecked_shr(self, rhs: u32) -> Self {
// SAFETY: the caller must uphold the safety contract for
// `unchecked_shr`.
// Any legal shift amount is losslessly representable in the self type.
unsafe { intrinsics::unchecked_shr(self, conv_rhs_for_unchecked_shift!($SelfT, rhs)) }
#[cfg(bootstrap)]
{
// For bootstrapping, just use built-in primitive shift.
// panicking is a legal manifestation of UB
self >> rhs
}
#[cfg(not(bootstrap))]
{
// SAFETY: the caller must uphold the safety contract for
// `unchecked_shr`.
unsafe { intrinsics::unchecked_shr(self, rhs) }
}
}

/// Checked exponentiation. Computes `self.pow(exp)`, returning `None` if
Expand Down
14 changes: 12 additions & 2 deletions library/core/src/ptr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1781,9 +1781,19 @@ pub(crate) const unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usiz
// FIXME(#75598): Direct use of these intrinsics improves codegen significantly at opt-level <=
// 1, where the method versions of these operations are not inlined.
use intrinsics::{
assume, cttz_nonzero, exact_div, mul_with_overflow, unchecked_rem, unchecked_shl,
unchecked_shr, unchecked_sub, wrapping_add, wrapping_mul, wrapping_sub,
assume, cttz_nonzero, exact_div, mul_with_overflow, unchecked_rem, unchecked_sub,
wrapping_add, wrapping_mul, wrapping_sub,
};
#[cfg(bootstrap)]
const unsafe fn unchecked_shl(value: usize, shift: usize) -> usize {
value << shift
}
#[cfg(bootstrap)]
const unsafe fn unchecked_shr(value: usize, shift: usize) -> usize {
value >> shift
}
#[cfg(not(bootstrap))]
use intrinsics::{unchecked_shl, unchecked_shr};

/// Calculate multiplicative modular inverse of `x` modulo `m`.
///
Expand Down
Loading

0 comments on commit 122df8a

Please sign in to comment.