From 3f20a5dff7926da1b7bfe511cc26887ddf70c0b5 Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Mon, 18 Feb 2019 10:54:16 +0100 Subject: [PATCH 1/8] Optimize copying large ranges of undefmask blocks --- src/librustc/mir/interpret/allocation.rs | 45 ++++++++++++++++++++---- src/librustc_mir/interpret/memory.rs | 22 ++++++++++-- 2 files changed, 58 insertions(+), 9 deletions(-) diff --git a/src/librustc/mir/interpret/allocation.rs b/src/librustc/mir/interpret/allocation.rs index e96392edd64bf..06d5f27ccd744 100644 --- a/src/librustc/mir/interpret/allocation.rs +++ b/src/librustc/mir/interpret/allocation.rs @@ -613,7 +613,6 @@ impl DerefMut for Relocations { //////////////////////////////////////////////////////////////////////////////// type Block = u64; -const BLOCK_SIZE: u64 = 64; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, RustcEncodable, RustcDecodable)] pub struct UndefMask { @@ -624,6 +623,8 @@ pub struct UndefMask { impl_stable_hash_for!(struct mir::interpret::UndefMask{blocks, len}); impl UndefMask { + pub const BLOCK_SIZE: u64 = 64; + pub fn new(size: Size) -> Self { let mut m = UndefMask { blocks: vec![], @@ -643,6 +644,7 @@ impl UndefMask { return Err(self.len); } + // FIXME(oli-obk): optimize this for allocations larger than a block. let idx = (start.bytes()..end.bytes()) .map(|i| Size::from_bytes(i)) .find(|&i| !self.get(i)); @@ -662,8 +664,31 @@ impl UndefMask { } pub fn set_range_inbounds(&mut self, start: Size, end: Size, new_state: bool) { - for i in start.bytes()..end.bytes() { - self.set(Size::from_bytes(i), new_state); + let (blocka, bita) = bit_index(start); + let (blockb, bitb) = bit_index(end); + if blocka == blockb { + // within a single block + for i in bita .. bitb { + self.set_bit(blocka, i, new_state); + } + return; + } + // across block boundaries + for i in bita .. Self::BLOCK_SIZE as usize { + self.set_bit(blocka, i, new_state); + } + for i in 0 .. bitb { + self.set_bit(blockb, i, new_state); + } + // fill in all the other blocks (much faster than one bit at a time) + if new_state { + for block in (blocka + 1) .. blockb { + self.blocks[block] = 0xFFFF_FFFF_FFFF_FFFF; + } + } else { + for block in (blocka + 1) .. blockb { + self.blocks[block] = 0; + } } } @@ -676,6 +701,11 @@ impl UndefMask { #[inline] pub fn set(&mut self, i: Size, new_state: bool) { let (block, bit) = bit_index(i); + self.set_bit(block, bit, new_state); + } + + #[inline] + fn set_bit(&mut self, block: usize, bit: usize, new_state: bool) { if new_state { self.blocks[block] |= 1 << bit; } else { @@ -684,11 +714,12 @@ impl UndefMask { } pub fn grow(&mut self, amount: Size, new_state: bool) { - let unused_trailing_bits = self.blocks.len() as u64 * BLOCK_SIZE - self.len.bytes(); + let unused_trailing_bits = self.blocks.len() as u64 * Self::BLOCK_SIZE - self.len.bytes(); if amount.bytes() > unused_trailing_bits { - let additional_blocks = amount.bytes() / BLOCK_SIZE + 1; + let additional_blocks = amount.bytes() / Self::BLOCK_SIZE + 1; assert_eq!(additional_blocks as usize as u64, additional_blocks); self.blocks.extend( + // FIXME(oli-obk): optimize this by repeating `new_state as Block` iter::repeat(0).take(additional_blocks as usize), ); } @@ -701,8 +732,8 @@ impl UndefMask { #[inline] fn bit_index(bits: Size) -> (usize, usize) { let bits = bits.bytes(); - let a = bits / BLOCK_SIZE; - let b = bits % BLOCK_SIZE; + let a = bits / UndefMask::BLOCK_SIZE; + let b = bits % UndefMask::BLOCK_SIZE; assert_eq!(a as usize as u64, a); assert_eq!(b as usize as u64, b); (a as usize, b as usize) diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index 88b936afaa4c1..78668c5ad875e 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -20,7 +20,7 @@ use syntax::ast::Mutability; use super::{ Pointer, AllocId, Allocation, GlobalId, AllocationExtra, EvalResult, Scalar, EvalErrorKind, AllocKind, PointerArithmetic, - Machine, AllocMap, MayLeak, ErrorHandled, InboundsCheck, + Machine, AllocMap, MayLeak, ErrorHandled, InboundsCheck, UndefMask, }; #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] @@ -785,10 +785,28 @@ impl<'a, 'mir, 'tcx, M: Machine<'a, 'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> { assert_eq!(size.bytes() as usize as u64, size.bytes()); let undef_mask = self.get(src.alloc_id)?.undef_mask.clone(); + let get = |i| undef_mask.get(src.offset + Size::from_bytes(i)); let dest_allocation = self.get_mut(dest.alloc_id)?; + // an optimization where we can just overwrite an entire range of definedness bits if + // they are going to be uniformly `1` or `0`. + if size.bytes() * repeat > UndefMask::BLOCK_SIZE { + let first = undef_mask.get(src.offset); + // check that all bits are the same as the first bit + // FIXME(oli-obk): consider making this a function on `UndefMask` and optimize it, too + if (1..size.bytes()).all(|i| get(i) == first) { + dest_allocation.undef_mask.set_range( + dest.offset, + dest.offset + size * repeat, + first, + ); + return Ok(()) + } + } + + // the default path for i in 0..size.bytes() { - let defined = undef_mask.get(src.offset + Size::from_bytes(i)); + let defined = get(i); for j in 0..repeat { dest_allocation.undef_mask.set( From 1e3d1b65c500a33993462c84ecb80136d184acb2 Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Mon, 18 Feb 2019 13:55:55 +0100 Subject: [PATCH 2/8] No magic numbers --- src/librustc/mir/interpret/allocation.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/librustc/mir/interpret/allocation.rs b/src/librustc/mir/interpret/allocation.rs index 06d5f27ccd744..18880c551ed55 100644 --- a/src/librustc/mir/interpret/allocation.rs +++ b/src/librustc/mir/interpret/allocation.rs @@ -683,7 +683,7 @@ impl UndefMask { // fill in all the other blocks (much faster than one bit at a time) if new_state { for block in (blocka + 1) .. blockb { - self.blocks[block] = 0xFFFF_FFFF_FFFF_FFFF; + self.blocks[block] = u64::max_value(); } } else { for block in (blocka + 1) .. blockb { From aa8c48a2746e85b1a2e2ff53a34f30b81920fa5f Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Tue, 19 Feb 2019 09:35:06 +0100 Subject: [PATCH 3/8] Don't try to copy relocations if there are none --- src/librustc_mir/interpret/memory.rs | 39 ++++++++++++++++------------ 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index 78668c5ad875e..8cbc404e28b31 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -700,24 +700,29 @@ impl<'a, 'mir, 'tcx, M: Machine<'a, 'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> { // relocations overlapping the edges; those would not be handled correctly). let relocations = { let relocations = self.get(src.alloc_id)?.relocations(self, src, size); - let mut new_relocations = Vec::with_capacity(relocations.len() * (length as usize)); - for i in 0..length { - new_relocations.extend( - relocations - .iter() - .map(|&(offset, reloc)| { - // compute offset for current repetition - let dest_offset = dest.offset + (i * size); - ( - // shift offsets from source allocation to destination allocation - offset + dest_offset - src.offset, - reloc, - ) - }) - ); - } + if relocations.is_empty() { + // nothing to copy, ignore even the `length` loop + Vec::new() + } else { + let mut new_relocations = Vec::with_capacity(relocations.len() * (length as usize)); + for i in 0..length { + new_relocations.extend( + relocations + .iter() + .map(|&(offset, reloc)| { + // compute offset for current repetition + let dest_offset = dest.offset + (i * size); + ( + // shift offsets from source allocation to destination allocation + offset + dest_offset - src.offset, + reloc, + ) + }) + ); + } - new_relocations + new_relocations + } }; let tcx = self.tcx.tcx; From d32b7e5b13ee833675c8fb3c905a4286690ceb15 Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Tue, 19 Feb 2019 09:43:39 +0100 Subject: [PATCH 4/8] Test the `UndefMask` type --- src/test/run-pass-fulldeps/undef_mask.rs | 26 ++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 src/test/run-pass-fulldeps/undef_mask.rs diff --git a/src/test/run-pass-fulldeps/undef_mask.rs b/src/test/run-pass-fulldeps/undef_mask.rs new file mode 100644 index 0000000000000..37c44e2df6c5e --- /dev/null +++ b/src/test/run-pass-fulldeps/undef_mask.rs @@ -0,0 +1,26 @@ +// ignore-cross-compile +// ignore-stage1 + +#![feature(rustc_private)] + +extern crate rustc; + +use rustc::mir::interpret::UndefMask; +use rustc::ty::layout::Size; + +fn main() { + let mut mask = UndefMask::new(Size::from_bytes(500)); + assert!(!mask.get(Size::from_bytes(499))); + mask.set(Size::from_bytes(499), true); + assert!(mask.get(Size::from_bytes(499))); + mask.set_range_inbounds(Size::from_bytes(100), Size::from_bytes(256), true); + for i in 0..100 { + assert!(!mask.get(Size::from_bytes(i))); + } + for i in 100..256 { + assert!(mask.get(Size::from_bytes(i))); + } + for i in 256..499 { + assert!(!mask.get(Size::from_bytes(i))); + } +} From 4ded592f60c3f1a16db56fadcd7924121ed92ce8 Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Tue, 19 Feb 2019 12:40:56 +0100 Subject: [PATCH 5/8] Use a more general approach for setting large definedness ranges --- src/librustc_mir/interpret/memory.rs | 70 ++++++++++++++++++---------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index 8cbc404e28b31..6cbb611c1a3ec 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -20,7 +20,7 @@ use syntax::ast::Mutability; use super::{ Pointer, AllocId, Allocation, GlobalId, AllocationExtra, EvalResult, Scalar, EvalErrorKind, AllocKind, PointerArithmetic, - Machine, AllocMap, MayLeak, ErrorHandled, InboundsCheck, UndefMask, + Machine, AllocMap, MayLeak, ErrorHandled, InboundsCheck, }; #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] @@ -789,38 +789,58 @@ impl<'a, 'mir, 'tcx, M: Machine<'a, 'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> { // The bits have to be saved locally before writing to dest in case src and dest overlap. assert_eq!(size.bytes() as usize as u64, size.bytes()); - let undef_mask = self.get(src.alloc_id)?.undef_mask.clone(); - let get = |i| undef_mask.get(src.offset + Size::from_bytes(i)); - let dest_allocation = self.get_mut(dest.alloc_id)?; + let undef_mask = &self.get(src.alloc_id)?.undef_mask; + + // a precomputed cache for ranges of defined/undefined bits + // 0000010010001110 will become + // [5, 1, 2, 1, 3, 3, 1] + // where each element toggles the state + let mut ranges = smallvec::SmallVec::<[u64; 1]>::new(); + let first = undef_mask.get(src.offset); + let mut cur_len = 1; + let mut cur = first; + for i in 1..size.bytes() { + // FIXME: optimize to bitshift the current undef block's bits and read the top bit + if undef_mask.get(src.offset + Size::from_bytes(i)) == cur { + cur_len += 1; + } else { + ranges.push(cur_len); + cur_len = 1; + cur = !cur; + } + } + // now fill in all the data + let dest_allocation = self.get_mut(dest.alloc_id)?; // an optimization where we can just overwrite an entire range of definedness bits if // they are going to be uniformly `1` or `0`. - if size.bytes() * repeat > UndefMask::BLOCK_SIZE { - let first = undef_mask.get(src.offset); - // check that all bits are the same as the first bit - // FIXME(oli-obk): consider making this a function on `UndefMask` and optimize it, too - if (1..size.bytes()).all(|i| get(i) == first) { - dest_allocation.undef_mask.set_range( - dest.offset, - dest.offset + size * repeat, - first, - ); - return Ok(()) - } + if ranges.is_empty() { + dest_allocation.undef_mask.set_range( + dest.offset, + dest.offset + size * repeat, + first, + ); + return Ok(()) } - // the default path - for i in 0..size.bytes() { - let defined = get(i); - - for j in 0..repeat { - dest_allocation.undef_mask.set( - dest.offset + Size::from_bytes(i + (size.bytes() * j)), - defined + // remember to fill in the trailing bits + ranges.push(cur_len); + + for mut j in 0..repeat { + j *= size.bytes(); + j += dest.offset.bytes(); + let mut cur = first; + for range in &ranges { + let old_j = j; + j += range; + dest_allocation.undef_mask.set_range_inbounds( + Size::from_bytes(old_j), + Size::from_bytes(j), + cur, ); + cur = !cur; } } - Ok(()) } } From 60fde17a293ab94c56e415f5d5dd036527b4f201 Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Wed, 20 Feb 2019 15:07:25 +0100 Subject: [PATCH 6/8] Use bit operations for setting large ranges of bits in a u64 --- src/librustc/mir/interpret/allocation.rs | 49 ++++++++++++++++-------- src/librustc_mir/interpret/memory.rs | 2 +- src/test/run-pass-fulldeps/undef_mask.rs | 2 +- 3 files changed, 35 insertions(+), 18 deletions(-) diff --git a/src/librustc/mir/interpret/allocation.rs b/src/librustc/mir/interpret/allocation.rs index 18880c551ed55..004804f7c211e 100644 --- a/src/librustc/mir/interpret/allocation.rs +++ b/src/librustc/mir/interpret/allocation.rs @@ -100,8 +100,7 @@ impl AllocationExtra<(), ()> for () { impl Allocation { /// Creates a read-only allocation initialized by the given bytes pub fn from_bytes(slice: &[u8], align: Align, extra: Extra) -> Self { - let mut undef_mask = UndefMask::new(Size::ZERO); - undef_mask.grow(Size::from_bytes(slice.len() as u64), true); + let undef_mask = UndefMask::new(Size::from_bytes(slice.len() as u64), true); Self { bytes: slice.to_owned(), relocations: Relocations::new(), @@ -121,7 +120,7 @@ impl Allocation { Allocation { bytes: vec![0; size.bytes() as usize], relocations: Relocations::new(), - undef_mask: UndefMask::new(size), + undef_mask: UndefMask::new(size, false), align, mutability: Mutability::Mutable, extra, @@ -625,12 +624,12 @@ impl_stable_hash_for!(struct mir::interpret::UndefMask{blocks, len}); impl UndefMask { pub const BLOCK_SIZE: u64 = 64; - pub fn new(size: Size) -> Self { + pub fn new(size: Size, state: bool) -> Self { let mut m = UndefMask { blocks: vec![], len: Size::ZERO, }; - m.grow(size, false); + m.grow(size, state); m } @@ -667,25 +666,40 @@ impl UndefMask { let (blocka, bita) = bit_index(start); let (blockb, bitb) = bit_index(end); if blocka == blockb { - // within a single block - for i in bita .. bitb { - self.set_bit(blocka, i, new_state); + // first set all bits but the first `bita` + // then unset the last `64 - bitb` bits + let range = if bitb == 0 { + u64::max_value() << bita + } else { + (u64::max_value() << bita) & (u64::max_value() >> (64 - bitb)) + }; + if new_state { + self.blocks[blocka] |= range; + } else { + self.blocks[blocka] &= !range; } return; } // across block boundaries - for i in bita .. Self::BLOCK_SIZE as usize { - self.set_bit(blocka, i, new_state); - } - for i in 0 .. bitb { - self.set_bit(blockb, i, new_state); - } - // fill in all the other blocks (much faster than one bit at a time) if new_state { + // set bita..64 to 1 + self.blocks[blocka] |= u64::max_value() << bita; + // set 0..bitb to 1 + if bitb != 0 { + self.blocks[blockb] |= u64::max_value() >> (64 - bitb); + } + // fill in all the other blocks (much faster than one bit at a time) for block in (blocka + 1) .. blockb { self.blocks[block] = u64::max_value(); } } else { + // set bita..64 to 0 + self.blocks[blocka] &= !(u64::max_value() << bita); + // set 0..bitb to 0 + if bitb != 0 { + self.blocks[blockb] &= !(u64::max_value() >> (64 - bitb)); + } + // fill in all the other blocks (much faster than one bit at a time) for block in (blocka + 1) .. blockb { self.blocks[block] = 0; } @@ -695,7 +709,7 @@ impl UndefMask { #[inline] pub fn get(&self, i: Size) -> bool { let (block, bit) = bit_index(i); - (self.blocks[block] & 1 << bit) != 0 + (self.blocks[block] & (1 << bit)) != 0 } #[inline] @@ -714,6 +728,9 @@ impl UndefMask { } pub fn grow(&mut self, amount: Size, new_state: bool) { + if amount.bytes() == 0 { + return; + } let unused_trailing_bits = self.blocks.len() as u64 * Self::BLOCK_SIZE - self.len.bytes(); if amount.bytes() > unused_trailing_bits { let additional_blocks = amount.bytes() / Self::BLOCK_SIZE + 1; diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index 6cbb611c1a3ec..fba0a9af21392 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -815,7 +815,7 @@ impl<'a, 'mir, 'tcx, M: Machine<'a, 'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> { // an optimization where we can just overwrite an entire range of definedness bits if // they are going to be uniformly `1` or `0`. if ranges.is_empty() { - dest_allocation.undef_mask.set_range( + dest_allocation.undef_mask.set_range_inbounds( dest.offset, dest.offset + size * repeat, first, diff --git a/src/test/run-pass-fulldeps/undef_mask.rs b/src/test/run-pass-fulldeps/undef_mask.rs index 37c44e2df6c5e..cf6e6f7231638 100644 --- a/src/test/run-pass-fulldeps/undef_mask.rs +++ b/src/test/run-pass-fulldeps/undef_mask.rs @@ -9,7 +9,7 @@ use rustc::mir::interpret::UndefMask; use rustc::ty::layout::Size; fn main() { - let mut mask = UndefMask::new(Size::from_bytes(500)); + let mut mask = UndefMask::new(Size::from_bytes(500), false); assert!(!mask.get(Size::from_bytes(499))); mask.set(Size::from_bytes(499), true); assert!(mask.get(Size::from_bytes(499))); From 1ae131211be24a337877e6dbcd9b6c52a86b9511 Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Tue, 12 Mar 2019 14:43:49 +0100 Subject: [PATCH 7/8] Explain the bits of `UndefMask` --- src/librustc/mir/interpret/allocation.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/librustc/mir/interpret/allocation.rs b/src/librustc/mir/interpret/allocation.rs index 004804f7c211e..2ce9a4a0f204d 100644 --- a/src/librustc/mir/interpret/allocation.rs +++ b/src/librustc/mir/interpret/allocation.rs @@ -613,6 +613,8 @@ impl DerefMut for Relocations { type Block = u64; +/// A bitmask where each bit refers to the byte with the same index. If the bit is `true`, the byte +/// is defined. If it is `false` the byte is undefined. #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, RustcEncodable, RustcDecodable)] pub struct UndefMask { blocks: Vec, From 2a1eb1cef1a36c6f0a9d2e347529561c1293044e Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Tue, 12 Mar 2019 15:00:12 +0100 Subject: [PATCH 8/8] Document the precomputation algorithm's purpose --- src/librustc_mir/interpret/memory.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index fba0a9af21392..6ea200d4e4fad 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -791,6 +791,13 @@ impl<'a, 'mir, 'tcx, M: Machine<'a, 'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> { let undef_mask = &self.get(src.alloc_id)?.undef_mask; + // Since we are copying `size` bytes from `src` to `dest + i * size` (`for i in 0..repeat`), + // a naive undef mask copying algorithm would repeatedly have to read the undef mask from + // the source and write it to the destination. Even if we optimized the memory accesses, + // we'd be doing all of this `repeat` times. + // Therefor we precompute a compressed version of the undef mask of the source value and + // then write it back `repeat` times without computing any more information from the source. + // a precomputed cache for ranges of defined/undefined bits // 0000010010001110 will become // [5, 1, 2, 1, 3, 3, 1]