Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce the partition_dedup/by/by_key methods for slices #54058

Merged
merged 2 commits into from
Sep 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/liballoc/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
#![feature(exact_chunks)]
#![feature(rustc_const_unstable)]
#![feature(const_vec_new)]
#![feature(slice_partition_dedup)]
#![feature(maybe_uninit)]

// Allow testing this library
Expand Down
100 changes: 11 additions & 89 deletions src/liballoc/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -947,10 +947,9 @@ impl<T> Vec<T> {
/// Removes all but the first of consecutive elements in the vector satisfying a given equality
/// relation.
///
/// The `same_bucket` function is passed references to two elements from the vector, and
/// returns `true` if the elements compare equal, or `false` if they do not. The elements are
/// passed in opposite order from their order in the vector, so if `same_bucket(a, b)` returns
/// `true`, `a` is removed.
/// The `same_bucket` function is passed references to two elements from the vector and
/// must determine if the elements compare equal. The elements are passed in opposite order
/// from their order in the slice, so if `same_bucket(a, b)` returns `true`, `a` is removed.
///
/// If the vector is sorted, this removes all duplicates.
///
Expand All @@ -964,90 +963,12 @@ impl<T> Vec<T> {
/// assert_eq!(vec, ["foo", "bar", "baz", "bar"]);
/// ```
#[stable(feature = "dedup_by", since = "1.16.0")]
pub fn dedup_by<F>(&mut self, mut same_bucket: F) where F: FnMut(&mut T, &mut T) -> bool {
unsafe {
// Although we have a mutable reference to `self`, we cannot make
// *arbitrary* changes. The `same_bucket` calls could panic, so we
// must ensure that the vector is in a valid state at all time.
//
// The way that we handle this is by using swaps; we iterate
// over all the elements, swapping as we go so that at the end
// the elements we wish to keep are in the front, and those we
// wish to reject are at the back. We can then truncate the
// vector. This operation is still O(n).
//
// Example: We start in this state, where `r` represents "next
// read" and `w` represents "next_write`.
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this is not a duplicate, so
// we swap self[r] and self[w] (no effect as r==w) and then increment both
// r and w, leaving us with:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this value is a duplicate,
// so we increment `r` but leave everything else unchanged:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this is not a duplicate,
// so swap self[r] and self[w] and advance r and w:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 2 | 1 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Not a duplicate, repeat:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 2 | 3 | 1 | 3 |
// +---+---+---+---+---+---+
// w
//
// Duplicate, advance r. End of vec. Truncate to w.

let ln = self.len();
if ln <= 1 {
return;
}

// Avoid bounds checks by using raw pointers.
let p = self.as_mut_ptr();
let mut r: usize = 1;
let mut w: usize = 1;

while r < ln {
let p_r = p.add(r);
let p_wm1 = p.add(w - 1);
if !same_bucket(&mut *p_r, &mut *p_wm1) {
if r != w {
let p_w = p_wm1.offset(1);
mem::swap(&mut *p_r, &mut *p_w);
}
w += 1;
}
r += 1;
}

self.truncate(w);
}
pub fn dedup_by<F>(&mut self, same_bucket: F) where F: FnMut(&mut T, &mut T) -> bool {
let len = {
let (dedup, _) = self.as_mut_slice().partition_dedup_by(same_bucket);
dedup.len()
};
self.truncate(len);
}

/// Appends an element to the back of a collection.
Expand Down Expand Up @@ -1533,7 +1454,8 @@ impl<'a> Drop for SetLenOnDrop<'a> {
}

impl<T: PartialEq> Vec<T> {
/// Removes consecutive repeated elements in the vector.
/// Removes consecutive repeated elements in the vector according to the
/// [`PartialEq`] trait implementation.
///
/// If the vector is sorted, this removes all duplicates.
///
Expand Down
172 changes: 172 additions & 0 deletions src/libcore/slice/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1402,6 +1402,178 @@ impl<T> [T] {
sort::quicksort(self, |a, b| f(a).lt(&f(b)));
}

/// Moves all consecutive repeated elements to the end of the slice according to the
/// [`PartialEq`] trait implementation.
///
/// Returns two slices. The first contains no consecutive repeated elements.
/// The second contains all the duplicates in no specified order.
///
/// If the slice is sorted, the first returned slice contains no duplicates.
///
/// # Examples
///
/// ```
/// #![feature(slice_partition_dedup)]
///
/// let mut slice = [1, 2, 2, 3, 3, 2, 1, 1];
///
/// let (dedup, duplicates) = slice.partition_dedup();
///
/// assert_eq!(dedup, [1, 2, 3, 2, 1]);
/// assert_eq!(duplicates, [2, 3, 1]);
/// ```
#[unstable(feature = "slice_partition_dedup", issue = "54279")]
#[inline]
pub fn partition_dedup(&mut self) -> (&mut [T], &mut [T])
where T: PartialEq
{
self.partition_dedup_by(|a, b| a == b)
}

/// Moves all but the first of consecutive elements to the end of the slice satisfying
/// a given equality relation.
///
/// Returns two slices. The first contains no consecutive repeated elements.
/// The second contains all the duplicates in no specified order.
///
/// The `same_bucket` function is passed references to two elements from the slice and
/// must determine if the elements compare equal. The elements are passed in opposite order
/// from their order in the slice, so if `same_bucket(a, b)` returns `true`, `a` is moved
/// at the end of the slice.
///
/// If the slice is sorted, the first returned slice contains no duplicates.
///
/// # Examples
///
/// ```
/// #![feature(slice_partition_dedup)]
///
/// let mut slice = ["foo", "Foo", "BAZ", "Bar", "bar", "baz", "BAZ"];
///
/// let (dedup, duplicates) = slice.partition_dedup_by(|a, b| a.eq_ignore_ascii_case(b));
///
/// assert_eq!(dedup, ["foo", "BAZ", "Bar", "baz"]);
/// assert_eq!(duplicates, ["bar", "Foo", "BAZ"]);
/// ```
#[unstable(feature = "slice_partition_dedup", issue = "54279")]
#[inline]
pub fn partition_dedup_by<F>(&mut self, mut same_bucket: F) -> (&mut [T], &mut [T])
where F: FnMut(&mut T, &mut T) -> bool
{
// Although we have a mutable reference to `self`, we cannot make
// *arbitrary* changes. The `same_bucket` calls could panic, so we
// must ensure that the slice is in a valid state at all times.
//
// The way that we handle this is by using swaps; we iterate
// over all the elements, swapping as we go so that at the end
// the elements we wish to keep are in the front, and those we
// wish to reject are at the back. We can then split the slice.
// This operation is still O(n).
//
// Example: We start in this state, where `r` represents "next
// read" and `w` represents "next_write`.
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this is not a duplicate, so
// we swap self[r] and self[w] (no effect as r==w) and then increment both
// r and w, leaving us with:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this value is a duplicate,
// so we increment `r` but leave everything else unchanged:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this is not a duplicate,
// so swap self[r] and self[w] and advance r and w:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 2 | 1 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Not a duplicate, repeat:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 2 | 3 | 1 | 3 |
// +---+---+---+---+---+---+
// w
//
// Duplicate, advance r. End of slice. Split at w.

let len = self.len();
if len <= 1 {
return (self, &mut [])
}

let ptr = self.as_mut_ptr();
let mut next_read: usize = 1;
let mut next_write: usize = 1;
Kerollmops marked this conversation as resolved.
Show resolved Hide resolved

unsafe {
// Avoid bounds checks by using raw pointers.
while next_read < len {
let ptr_read = ptr.add(next_read);
let prev_ptr_write = ptr.add(next_write - 1);
if !same_bucket(&mut *ptr_read, &mut *prev_ptr_write) {
if next_read != next_write {
let ptr_write = prev_ptr_write.offset(1);
mem::swap(&mut *ptr_read, &mut *ptr_write);
}
next_write += 1;
}
next_read += 1;
}
}

self.split_at_mut(next_write)
}

/// Moves all but the first of consecutive elements to the end of the slice that resolve
/// to the same key.
///
/// Returns two slices. The first contains no consecutive repeated elements.
/// The second contains all the duplicates in no specified order.
///
/// If the slice is sorted, the first returned slice contains no duplicates.
///
/// # Examples
///
/// ```
/// #![feature(slice_partition_dedup)]
///
/// let mut slice = [10, 20, 21, 30, 30, 20, 11, 13];
///
/// let (dedup, duplicates) = slice.partition_dedup_by_key(|i| *i / 10);
///
/// assert_eq!(dedup, [10, 20, 30, 20, 11]);
/// assert_eq!(duplicates, [21, 30, 13]);
/// ```
#[unstable(feature = "slice_partition_dedup", issue = "54279")]
#[inline]
pub fn partition_dedup_by_key<K, F>(&mut self, mut key: F) -> (&mut [T], &mut [T])
where F: FnMut(&mut T) -> K,
K: PartialEq,
{
self.partition_dedup_by(|a, b| key(a) == key(b))
}

/// Rotates the slice in-place such that the first `mid` elements of the
/// slice move to the end while the last `self.len() - mid` elements move to
/// the front. After calling `rotate_left`, the element previously at index
Expand Down
1 change: 1 addition & 0 deletions src/libcore/tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#![feature(inner_deref)]
#![feature(slice_internals)]
#![feature(option_replace)]
#![feature(slice_partition_dedup)]
#![feature(copy_within)]

extern crate core;
Expand Down
59 changes: 59 additions & 0 deletions src/libcore/tests/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1001,6 +1001,65 @@ fn test_align_to_empty_mid() {
}
}

#[test]
fn test_slice_partition_dedup_by() {
let mut slice: [i32; 9] = [1, -1, 2, 3, 1, -5, 5, -2, 2];

let (dedup, duplicates) = slice.partition_dedup_by(|a, b| a.abs() == b.abs());

assert_eq!(dedup, [1, 2, 3, 1, -5, -2]);
assert_eq!(duplicates, [5, -1, 2]);
Kerollmops marked this conversation as resolved.
Show resolved Hide resolved
}

#[test]
fn test_slice_partition_dedup_empty() {
let mut slice: [i32; 0] = [];

let (dedup, duplicates) = slice.partition_dedup();

assert_eq!(dedup, []);
assert_eq!(duplicates, []);
}

#[test]
fn test_slice_partition_dedup_one() {
let mut slice = [12];

let (dedup, duplicates) = slice.partition_dedup();

assert_eq!(dedup, [12]);
assert_eq!(duplicates, []);
}

#[test]
fn test_slice_partition_dedup_multiple_ident() {
let mut slice = [12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11];

let (dedup, duplicates) = slice.partition_dedup();

assert_eq!(dedup, [12, 11]);
assert_eq!(duplicates, [12, 12, 12, 12, 11, 11, 11, 11, 11]);
}

#[test]
fn test_slice_partition_dedup_partialeq() {
#[derive(Debug)]
struct Foo(i32, i32);

impl PartialEq for Foo {
fn eq(&self, other: &Foo) -> bool {
self.0 == other.0
}
}

let mut slice = [Foo(0, 1), Foo(0, 5), Foo(1, 7), Foo(1, 9)];

let (dedup, duplicates) = slice.partition_dedup();

assert_eq!(dedup, [Foo(0, 1), Foo(1, 7)]);
assert_eq!(duplicates, [Foo(0, 5), Foo(1, 9)]);
}

#[test]
fn test_copy_within() {
// Start to end, with a RangeTo.
Expand Down