Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test the parallel compiler #109776

Closed
wants to merge 11 commits into from
Closed
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_ssa/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -686,7 +686,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
// This likely is a temporary measure. Once we don't have to support the
// non-parallel compiler anymore, we can compile CGUs end-to-end in
// parallel and get rid of the complicated scheduling logic.
let mut pre_compiled_cgus = if tcx.sess.threads() > 1 {
let mut pre_compiled_cgus = if rustc_data_structures::sync::is_dyn_thread_safe() {
tcx.sess.time("compile_first_CGU_batch", || {
// Try to find one CGU to compile per thread.
let cgus: Vec<_> = cgu_reuse
Expand Down
25 changes: 25 additions & 0 deletions compiler/rustc_const_eval/src/interpret/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,21 @@ pub struct AllocRef<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes = Box<[
tcx: TyCtxt<'tcx>,
alloc_id: AllocId,
}

// FIXME: More information in <https://github.com/rust-lang/rust/pull/106930>.
unsafe impl<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes> Send
for AllocRef<'a, 'tcx, Prov, Extra, Bytes>
where
Allocation<Prov, Extra, Bytes>: Sync,
{
}
unsafe impl<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes> Sync
for AllocRef<'a, 'tcx, Prov, Extra, Bytes>
where
Allocation<Prov, Extra, Bytes>: Sync,
{
}

/// A reference to some allocation that was already bounds-checked for the given region
/// and had the on-access machine hooks run.
pub struct AllocRefMut<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes = Box<[u8]>> {
Expand All @@ -130,6 +145,16 @@ pub struct AllocRefMut<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes = Bo
alloc_id: AllocId,
}

// FIXME: More information in <https://github.com/rust-lang/rust/pull/106930>.
impl<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes> !Send
for AllocRefMut<'a, 'tcx, Prov, Extra, Bytes>
{
}
impl<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes> !Sync
for AllocRefMut<'a, 'tcx, Prov, Extra, Bytes>
{
}

impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
pub fn new() -> Self {
Memory {
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_const_eval/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Rust MIR: a lowered representation of Rust.
#![feature(try_blocks)]
#![feature(yeet_expr)]
#![feature(if_let_guard)]
#![feature(negative_impls)]
#![recursion_limit = "256"]

#[macro_use]
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_data_structures/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,4 @@ features = [
memmap2 = "0.2.1"

[features]
rustc_use_parallel_compiler = ["indexmap/rustc-rayon", "rustc-rayon", "rustc-rayon-core"]
rustc_use_parallel_compiler = ["indexmap/rustc-rayon", "rustc-rayon", "rustc-rayon-core"]
268 changes: 219 additions & 49 deletions compiler/rustc_data_structures/src/sharded.rs
Original file line number Diff line number Diff line change
@@ -1,69 +1,236 @@
use crate::fx::{FxHashMap, FxHasher};
use crate::sync::{CacheAligned, Lock, LockGuard};
use crate::sync::{DynSync, LockLike};
use parking_lot::{Mutex, MutexGuard};
use std::borrow::Borrow;
use std::cell::{RefCell, RefMut};
use std::collections::hash_map::RawEntryMut;
use std::hash::{Hash, Hasher};
use std::mem;

#[cfg(parallel_compiler)]
// 32 shards is sufficient to reduce contention on an 8-core Ryzen 7 1700,
// but this should be tested on higher core count CPUs. How the `Sharded` type gets used
// may also affect the ideal number of shards.
const SHARD_BITS: usize = 5;
pub trait Shard {
type Impl<T>: ShardImpl<T>;
}

#[cfg(not(parallel_compiler))]
const SHARD_BITS: usize = 0;
pub trait ShardImpl<T> {
type Lock: LockLike<T>;

pub const SHARDS: usize = 1 << SHARD_BITS;
fn new(value: impl FnMut() -> T) -> Self;

fn get_shard_by_value<K: Hash + ?Sized>(&self, _val: &K) -> &Self::Lock;

fn get_shard_by_hash(&self, _hash: u64) -> &Self::Lock;

fn lock_shards(&self) -> Vec<<Self::Lock as LockLike<T>>::LockGuard<'_>>;

fn try_lock_shards(&self) -> Option<Vec<<Self::Lock as LockLike<T>>::LockGuard<'_>>>;
}

#[derive(Default)]
pub struct SingleShard;

impl Shard for SingleShard {
type Impl<T> = SingleShardImpl<T>;
}

/// An array of cache-line aligned inner locked structures with convenience methods.
pub struct Sharded<T> {
shards: [CacheAligned<Lock<T>>; SHARDS],
pub struct SingleShardImpl<T> {
shard: RefCell<T>,
}

impl<T: Default> Default for Sharded<T> {
impl<T: Default> Default for SingleShardImpl<T> {
#[inline]
fn default() -> Self {
Self { shard: RefCell::new(T::default()) }
}
}

impl<T> ShardImpl<T> for SingleShardImpl<T> {
type Lock = RefCell<T>;

#[inline]
fn new(mut value: impl FnMut() -> T) -> Self {
SingleShardImpl { shard: RefCell::new(value()) }
}

#[inline]
fn get_shard_by_value<K: Hash + ?Sized>(&self, _val: &K) -> &RefCell<T> {
&self.shard
}

#[inline]
fn get_shard_by_hash(&self, _hash: u64) -> &RefCell<T> {
&self.shard
}

fn lock_shards(&self) -> Vec<RefMut<'_, T>> {
vec![self.shard.lock()]
}

fn try_lock_shards(&self) -> Option<Vec<RefMut<'_, T>>> {
Some(vec![self.shard.try_lock()?])
}
}

const SHARD_BITS: usize = 5;

pub const SHARDS: usize = 1 << SHARD_BITS;

#[derive(Default)]
pub struct Sharded;

impl Shard for Sharded {
type Impl<T> = ShardedImpl<T>;
}

#[derive(Default)]
#[repr(align(64))]
pub struct CacheAligned<T>(pub T);

pub struct ShardedImpl<T> {
shards: [CacheAligned<Mutex<T>>; SHARDS],
}

impl<T: Default> Default for ShardedImpl<T> {
#[inline]
fn default() -> Self {
Self::new(T::default)
}
}

impl<T> Sharded<T> {
impl<T> ShardImpl<T> for ShardedImpl<T> {
type Lock = Mutex<T>;

#[inline]
pub fn new(mut value: impl FnMut() -> T) -> Self {
Sharded { shards: [(); SHARDS].map(|()| CacheAligned(Lock::new(value()))) }
fn new(mut value: impl FnMut() -> T) -> Self {
ShardedImpl { shards: [(); SHARDS].map(|()| CacheAligned(Mutex::new(value()))) }
}

/// The shard is selected by hashing `val` with `FxHasher`.
#[inline]
pub fn get_shard_by_value<K: Hash + ?Sized>(&self, val: &K) -> &Lock<T> {
if SHARDS == 1 { &self.shards[0].0 } else { self.get_shard_by_hash(make_hash(val)) }
fn get_shard_by_value<K: Hash + ?Sized>(&self, val: &K) -> &Mutex<T> {
self.get_shard_by_hash(make_hash(val))
}

#[inline]
pub fn get_shard_by_hash(&self, hash: u64) -> &Lock<T> {
fn get_shard_by_hash(&self, hash: u64) -> &Mutex<T> {
&self.shards[get_shard_index_by_hash(hash)].0
}

fn lock_shards(&self) -> Vec<MutexGuard<'_, T>> {
(0..SHARDS).map(|i| self.shards[i].0.lock()).collect()
}

fn try_lock_shards(&self) -> Option<Vec<MutexGuard<'_, T>>> {
(0..SHARDS).map(|i| self.shards[i].0.try_lock()).collect()
}
}

pub struct DynSharded<T> {
single_thread: bool,
single_shard: RefCell<T>,
parallel_shard: ShardedImpl<T>,
}

#[cfg(parallel_compiler)]
unsafe impl<T> DynSync for DynSharded<T> {}

impl<T: Default> Default for DynSharded<T> {
#[inline]
pub fn get_shard_by_index(&self, i: usize) -> &Lock<T> {
&self.shards[i].0
fn default() -> Self {
let single_thread = !crate::sync::is_dyn_thread_safe();
DynSharded {
single_thread,
single_shard: RefCell::new(T::default()),
parallel_shard: ShardedImpl::default(),
}
}
}

pub fn lock_shards(&self) -> Vec<LockGuard<'_, T>> {
(0..SHARDS).map(|i| self.shards[i].0.lock()).collect()
impl<T: Default> DynSharded<T> {
pub fn new(mut value: impl FnMut() -> T) -> Self {
if !crate::sync::is_dyn_thread_safe() {
DynSharded {
single_thread: true,
single_shard: RefCell::new(value()),
parallel_shard: ShardedImpl::default(),
}
} else {
DynSharded {
single_thread: false,
single_shard: RefCell::new(T::default()),
parallel_shard: ShardedImpl::new(value),
}
}
}

pub fn try_lock_shards(&self) -> Option<Vec<LockGuard<'_, T>>> {
(0..SHARDS).map(|i| self.shards[i].0.try_lock()).collect()
/// The shard is selected by hashing `val` with `FxHasher`.
#[inline]
pub fn with_get_shard_by_value<K: Hash + ?Sized, F: FnOnce(&mut T) -> R, R>(
&self,
val: &K,
f: F,
) -> R {
if self.single_thread {
let mut lock = self.single_shard.borrow_mut();
f(&mut *lock)
} else {
let mut lock = self.parallel_shard.get_shard_by_value(val).lock();
f(&mut *lock)
}
}

#[inline]
pub fn with_get_shard_by_hash<F: FnOnce(&mut T) -> R, R>(&self, hash: u64, f: F) -> R {
if self.single_thread {
let mut lock = self.single_shard.borrow_mut();
f(&mut *lock)
} else {
let mut lock = self.parallel_shard.get_shard_by_hash(hash).lock();
f(&mut *lock)
}
}

#[inline]
pub fn with_lock_shards<F: FnMut(&mut T) -> R, R>(&self, mut f: F) -> Vec<R> {
if self.single_thread {
let mut lock = self.single_shard.borrow_mut();
vec![f(&mut *lock)]
} else {
(0..SHARDS).map(|i| f(&mut *self.parallel_shard.shards[i].0.lock())).collect()
}
}

#[inline]
pub fn with_try_lock_shards<F: FnMut(&mut T) -> R, R>(&self, mut f: F) -> Option<Vec<R>> {
if self.single_thread {
let mut lock = self.single_shard.try_borrow_mut().ok()?;
Some(vec![f(&mut *lock)])
} else {
(0..SHARDS)
.map(|i| {
let mut shard = self.parallel_shard.shards[i].0.try_lock()?;
Some(f(&mut *shard))
})
.collect()
}
}

#[inline]
pub fn get_lock_by_value<K: Hash + ?Sized>(&self, val: &K) -> &Mutex<T> {
self.parallel_shard.get_shard_by_value(val)
}

#[inline]
pub fn get_borrow_by_value<K: Hash + ?Sized>(&self, _val: &K) -> &RefCell<T> {
&self.single_shard
}
}

pub type ShardedHashMap<K, V> = Sharded<FxHashMap<K, V>>;
pub type ShardedHashMap<K, V> = DynSharded<FxHashMap<K, V>>;

impl<K: Eq, V> ShardedHashMap<K, V> {
pub fn len(&self) -> usize {
self.lock_shards().iter().map(|shard| shard.len()).sum()
self.with_lock_shards(|shard| shard.len()).into_iter().sum()
}
}

Expand All @@ -75,17 +242,18 @@ impl<K: Eq + Hash + Copy> ShardedHashMap<K, ()> {
Q: Hash + Eq,
{
let hash = make_hash(value);
let mut shard = self.get_shard_by_hash(hash).lock();
let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, value);

match entry {
RawEntryMut::Occupied(e) => *e.key(),
RawEntryMut::Vacant(e) => {
let v = make();
e.insert_hashed_nocheck(hash, v, ());
v
self.with_get_shard_by_hash(hash, |shard| {
let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, value);

match entry {
RawEntryMut::Occupied(e) => *e.key(),
RawEntryMut::Vacant(e) => {
let v = make();
e.insert_hashed_nocheck(hash, v, ());
v
}
}
}
})
}

#[inline]
Expand All @@ -95,17 +263,18 @@ impl<K: Eq + Hash + Copy> ShardedHashMap<K, ()> {
Q: Hash + Eq,
{
let hash = make_hash(&value);
let mut shard = self.get_shard_by_hash(hash).lock();
let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, &value);

match entry {
RawEntryMut::Occupied(e) => *e.key(),
RawEntryMut::Vacant(e) => {
let v = make(value);
e.insert_hashed_nocheck(hash, v, ());
v
self.with_get_shard_by_hash(hash, |shard| {
let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, &value);

match entry {
RawEntryMut::Occupied(e) => *e.key(),
RawEntryMut::Vacant(e) => {
let v = make(value);
e.insert_hashed_nocheck(hash, v, ());
v
}
}
}
})
}
}

Expand All @@ -117,9 +286,10 @@ pub trait IntoPointer {
impl<K: Eq + Hash + Copy + IntoPointer> ShardedHashMap<K, ()> {
pub fn contains_pointer_to<T: Hash + IntoPointer>(&self, value: &T) -> bool {
let hash = make_hash(&value);
let shard = self.get_shard_by_hash(hash).lock();
let value = value.into_pointer();
shard.raw_entry().from_hash(hash, |entry| entry.into_pointer() == value).is_some()
self.with_get_shard_by_hash(hash, |shard| {
let value = value.into_pointer();
shard.raw_entry().from_hash(hash, |entry| entry.into_pointer() == value).is_some()
})
}
}

Expand Down
Loading