From 80a22ac2706c6f9b1318fe724a4e6727b9532f05 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Wed, 16 Aug 2023 12:37:02 +0200 Subject: [PATCH 01/14] `LazyBuffer::size_hint` --- src/lazy_buffer.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/lazy_buffer.rs b/src/lazy_buffer.rs index 65cc8e2cd..88ee06c7c 100644 --- a/src/lazy_buffer.rs +++ b/src/lazy_buffer.rs @@ -2,6 +2,8 @@ use std::iter::Fuse; use std::ops::Index; use alloc::vec::Vec; +use crate::size_hint::{self, SizeHint}; + #[derive(Debug, Clone)] pub struct LazyBuffer { pub it: Fuse, @@ -23,6 +25,10 @@ where self.buffer.len() } + pub fn size_hint(&self) -> SizeHint { + size_hint::add_scalar(self.it.size_hint(), self.len()) + } + pub fn get_next(&mut self) -> bool { if let Some(x) = self.it.next() { self.buffer.push(x); From d9f20e9a672707b14bfbfa57d0d26853d0cfb196 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Wed, 16 Aug 2023 13:13:41 +0200 Subject: [PATCH 02/14] Checked binomial See the end of this: https://en.wikipedia.org/wiki/Binomial_coefficient#In_programming_languages --- src/combinations.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/combinations.rs b/src/combinations.rs index 18adfc70e..f4adde995 100644 --- a/src/combinations.rs +++ b/src/combinations.rs @@ -126,3 +126,17 @@ impl FusedIterator for Combinations where I: Iterator, I::Item: Clone {} + +pub(crate) fn checked_binomial(mut n: usize, mut k: usize) -> Option { + if n < k { + return Some(0); + } + // `factorial(n) / factorial(n - k) / factorial(k)` but trying to avoid it overflows: + k = (n - k).min(k); // symmetry + let mut c = 1; + for i in 1..=k { + c = (c / i).checked_mul(n)?.checked_add((c % i).checked_mul(n)? / i)?; + n -= 1; + } + Some(c) +} From bd0b08047a2b04bf6fa008d7eda194c1bdbefec6 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Wed, 16 Aug 2023 13:19:34 +0200 Subject: [PATCH 03/14] Remaining combinations for an unknown `n` Because we give combinations in the lexicographic order, we can use the indices to count the remaining combinations to come. We are gonna heavily test it thought. --- src/combinations.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/combinations.rs b/src/combinations.rs index f4adde995..8b23eca15 100644 --- a/src/combinations.rs +++ b/src/combinations.rs @@ -77,6 +77,21 @@ impl Combinations { self.pool.prefill(k); } } + + /// For a given size `n`, return the count of remaining elements or None if it would overflow. + fn remaining_for(&self, n: usize) -> Option { + let k = self.k(); + if self.first { + checked_binomial(n, k) + } else { + self.indices + .iter() + .enumerate() + .fold(Some(0), |sum, (k0, n0)| { + sum.and_then(|s| s.checked_add(checked_binomial(n - 1 - *n0, k - k0)?)) + }) + } + } } impl Iterator for Combinations From 76efb2555a01f66dd96f4608d236f93685b109f1 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Wed, 16 Aug 2023 13:25:26 +0200 Subject: [PATCH 04/14] `LazyBuffer::fill` The whole point of the lazy buffer is to not fill the pool all at once but in some cases we want to, and then it's better than do it with `while lazy_buffer.get_next() {}` that would be weird. --- src/lazy_buffer.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lazy_buffer.rs b/src/lazy_buffer.rs index 88ee06c7c..7fcb165fc 100644 --- a/src/lazy_buffer.rs +++ b/src/lazy_buffer.rs @@ -45,6 +45,10 @@ where self.buffer.extend(self.it.by_ref().take(delta)); } } + + pub fn fill(&mut self) { + self.buffer.extend(self.it.by_ref()); + } } impl Index for LazyBuffer From 85c7980b9c034614df34e517c624ffe6c51bc16a Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Wed, 16 Aug 2023 13:30:46 +0200 Subject: [PATCH 05/14] `Combinations::count` tested --- src/combinations.rs | 5 +++++ tests/test_std.rs | 14 ++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/combinations.rs b/src/combinations.rs index 8b23eca15..c67af0fb1 100644 --- a/src/combinations.rs +++ b/src/combinations.rs @@ -135,6 +135,11 @@ impl Iterator for Combinations // Create result vector based on the indices Some(self.indices.iter().map(|i| self.pool[*i].clone()).collect()) } + + fn count(mut self) -> usize { + self.pool.fill(); + self.remaining_for(self.n()).expect("Iterator count greater than usize::MAX") + } } impl FusedIterator for Combinations diff --git a/tests/test_std.rs b/tests/test_std.rs index 77207d87e..b70c206a2 100644 --- a/tests/test_std.rs +++ b/tests/test_std.rs @@ -909,6 +909,20 @@ fn combinations_zero() { it::assert_equal((0..0).combinations(0), vec![vec![]]); } +#[test] +fn combinations_range_count() { + for n in 0..6 { + for k in 0..=n { + let len = (n - k + 1..=n).product::() / (1..=k).product::(); + let mut it = (0..n).combinations(k); + for count in (0..=len).rev() { + assert_eq!(it.clone().count(), count); + assert_eq!(it.next().is_none(), count == 0); + } + } + } +} + #[test] fn permutations_zero() { it::assert_equal((1..3).permutations(0), vec![vec![]]); From ed998ba41c25251ffcaf52dbc92be6383f57c787 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Wed, 16 Aug 2023 13:41:17 +0200 Subject: [PATCH 06/14] `Combinations::size_hint` --- src/combinations.rs | 7 +++++++ tests/test_std.rs | 1 + 2 files changed, 8 insertions(+) diff --git a/src/combinations.rs b/src/combinations.rs index c67af0fb1..ca30a57d0 100644 --- a/src/combinations.rs +++ b/src/combinations.rs @@ -136,6 +136,13 @@ impl Iterator for Combinations Some(self.indices.iter().map(|i| self.pool[*i].clone()).collect()) } + fn size_hint(&self) -> (usize, Option) { + let (mut low, mut upp) = self.pool.size_hint(); + low = self.remaining_for(low).unwrap_or(usize::MAX); + upp = upp.and_then(|upp| self.remaining_for(upp)); + (low, upp) + } + fn count(mut self) -> usize { self.pool.fill(); self.remaining_for(self.n()).expect("Iterator count greater than usize::MAX") diff --git a/tests/test_std.rs b/tests/test_std.rs index b70c206a2..701ef72db 100644 --- a/tests/test_std.rs +++ b/tests/test_std.rs @@ -916,6 +916,7 @@ fn combinations_range_count() { let len = (n - k + 1..=n).product::() / (1..=k).product::(); let mut it = (0..n).combinations(k); for count in (0..=len).rev() { + assert_eq!(it.size_hint(), (count, Some(count))); assert_eq!(it.clone().count(), count); assert_eq!(it.next().is_none(), count == 0); } From 2ab0ff199293b68f1e8ff38db373c439dac24222 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Thu, 17 Aug 2023 00:14:26 +0200 Subject: [PATCH 07/14] Remove `LazyBuffer::fill`, count instead `pool.it.count()` consumes `pool` before we call `remaining_for` so it can not be a method and therefore becomes a free function. --- src/combinations.rs | 41 +++++++++++++++++++++-------------------- src/lazy_buffer.rs | 4 ---- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/src/combinations.rs b/src/combinations.rs index ca30a57d0..5c7a60a20 100644 --- a/src/combinations.rs +++ b/src/combinations.rs @@ -77,21 +77,6 @@ impl Combinations { self.pool.prefill(k); } } - - /// For a given size `n`, return the count of remaining elements or None if it would overflow. - fn remaining_for(&self, n: usize) -> Option { - let k = self.k(); - if self.first { - checked_binomial(n, k) - } else { - self.indices - .iter() - .enumerate() - .fold(Some(0), |sum, (k0, n0)| { - sum.and_then(|s| s.checked_add(checked_binomial(n - 1 - *n0, k - k0)?)) - }) - } - } } impl Iterator for Combinations @@ -138,14 +123,15 @@ impl Iterator for Combinations fn size_hint(&self) -> (usize, Option) { let (mut low, mut upp) = self.pool.size_hint(); - low = self.remaining_for(low).unwrap_or(usize::MAX); - upp = upp.and_then(|upp| self.remaining_for(upp)); + low = remaining_for(low, self.first, &self.indices).unwrap_or(usize::MAX); + upp = upp.and_then(|upp| remaining_for(upp, self.first, &self.indices)); (low, upp) } - fn count(mut self) -> usize { - self.pool.fill(); - self.remaining_for(self.n()).expect("Iterator count greater than usize::MAX") + fn count(self) -> usize { + let Self { indices, pool, first } = self; + let n = pool.len() + pool.it.count(); + remaining_for(n, first, &indices).expect("Iterator count greater than usize::MAX") } } @@ -167,3 +153,18 @@ pub(crate) fn checked_binomial(mut n: usize, mut k: usize) -> Option { } Some(c) } + +/// For a given size `n`, return the count of remaining combinations or None if it would overflow. +fn remaining_for(n: usize, first: bool, indices: &[usize]) -> Option { + let k = indices.len(); + if first { + checked_binomial(n, k) + } else { + indices + .iter() + .enumerate() + .fold(Some(0), |sum, (k0, n0)| { + sum.and_then(|s| s.checked_add(checked_binomial(n - 1 - *n0, k - k0)?)) + }) + } +} diff --git a/src/lazy_buffer.rs b/src/lazy_buffer.rs index 7fcb165fc..88ee06c7c 100644 --- a/src/lazy_buffer.rs +++ b/src/lazy_buffer.rs @@ -45,10 +45,6 @@ where self.buffer.extend(self.it.by_ref().take(delta)); } } - - pub fn fill(&mut self) { - self.buffer.extend(self.it.by_ref()); - } } impl Index for LazyBuffer From 1f48da730fa47a565317e0cee746f894561d66a5 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Fri, 18 Aug 2023 12:14:27 +0200 Subject: [PATCH 08/14] `checked_binomial`: url algorithm --- src/combinations.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/combinations.rs b/src/combinations.rs index 5c7a60a20..f0215e341 100644 --- a/src/combinations.rs +++ b/src/combinations.rs @@ -140,6 +140,7 @@ impl FusedIterator for Combinations I::Item: Clone {} +// https://en.wikipedia.org/wiki/Binomial_coefficient#In_programming_languages pub(crate) fn checked_binomial(mut n: usize, mut k: usize) -> Option { if n < k { return Some(0); From 95c3e277d4dbf96e4153fd9cb5d3fe7530a3aad7 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Fri, 18 Aug 2023 12:20:41 +0200 Subject: [PATCH 09/14] remaining combinations: handle `n < k` --- src/combinations.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/combinations.rs b/src/combinations.rs index f0215e341..f99df521c 100644 --- a/src/combinations.rs +++ b/src/combinations.rs @@ -158,7 +158,9 @@ pub(crate) fn checked_binomial(mut n: usize, mut k: usize) -> Option { /// For a given size `n`, return the count of remaining combinations or None if it would overflow. fn remaining_for(n: usize, first: bool, indices: &[usize]) -> Option { let k = indices.len(); - if first { + if n < k { + Some(0) + } else if first { checked_binomial(n, k) } else { indices From da190e303be006efa6edc4e8f5d2ded5f6b78d43 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Fri, 18 Aug 2023 12:39:00 +0200 Subject: [PATCH 10/14] update `combinations_range_count` test --- tests/test_std.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tests/test_std.rs b/tests/test_std.rs index 701ef72db..a351f4b3e 100644 --- a/tests/test_std.rs +++ b/tests/test_std.rs @@ -911,15 +911,22 @@ fn combinations_zero() { #[test] fn combinations_range_count() { - for n in 0..6 { + for n in 0..=10 { for k in 0..=n { let len = (n - k + 1..=n).product::() / (1..=k).product::(); let mut it = (0..n).combinations(k); - for count in (0..=len).rev() { - assert_eq!(it.size_hint(), (count, Some(count))); - assert_eq!(it.clone().count(), count); - assert_eq!(it.next().is_none(), count == 0); + assert_eq!(len, it.clone().count()); + assert_eq!(len, it.size_hint().0); + assert_eq!(Some(len), it.size_hint().1); + for count in (0..len).rev() { + let elem = it.next(); + assert!(elem.is_some()); + assert_eq!(count, it.clone().count()); + assert_eq!(count, it.size_hint().0); + assert_eq!(Some(count), it.size_hint().1); } + let should_be_none = it.next(); + assert!(should_be_none.is_none()); } } } From baf6708aa1d44494609d469bb2fa2018ba498ae7 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Fri, 18 Aug 2023 12:12:47 +0200 Subject: [PATCH 11/14] `Combinations::count`: unwrap instead of expect And add todo --- src/combinations.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/combinations.rs b/src/combinations.rs index f99df521c..be73f29fe 100644 --- a/src/combinations.rs +++ b/src/combinations.rs @@ -130,8 +130,9 @@ impl Iterator for Combinations fn count(self) -> usize { let Self { indices, pool, first } = self; + // TODO: make `pool.it` private let n = pool.len() + pool.it.count(); - remaining_for(n, first, &indices).expect("Iterator count greater than usize::MAX") + remaining_for(n, first, &indices).unwrap() } } From d8d4d61e4e027bf84d1799688f61b064d2f0c0b4 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Fri, 18 Aug 2023 13:02:28 +0200 Subject: [PATCH 12/14] `remaining_for`: add an explanation comment --- src/combinations.rs | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/combinations.rs b/src/combinations.rs index be73f29fe..da5c41377 100644 --- a/src/combinations.rs +++ b/src/combinations.rs @@ -164,11 +164,29 @@ fn remaining_for(n: usize, first: bool, indices: &[usize]) -> Option { } else if first { checked_binomial(n, k) } else { + // https://en.wikipedia.org/wiki/Combinatorial_number_system + // http://www.site.uottawa.ca/~lucia/courses/5165-09/GenCombObj.pdf + + // The combinations generated after the current one can be counted by counting as follows: + // - The subsequent combinations that differ in indices[0]: + // If subsequent combinations differ in indices[0], then their value for indices[0] + // must be at least 1 greater than the current indices[0]. + // As indices is strictly monotonically sorted, this means we can effectively choose k values + // from (n - 1 - indices[0]), leading to binomial(n - 1 - indices[0], k) possibilities. + // - The subsequent combinations with same indices[0], but differing indices[1]: + // Here we can choose k - 1 values from (n - 1 - indices[1]) values, + // leading to binomial(n - 1 - indices[1], k - 1) possibilities. + // - (...) + // - The subsequent combinations with same indices[0..=i], but differing indices[i]: + // Here we can choose k - i values from (n - 1 - indices[i]) values: binomial(n - 1 - indices[i], k - i). + // Since subsequent combinations can in any index, we must sum up the aforementioned binomial coefficients. + + // Below, `n0` resembles indices[i]. indices .iter() .enumerate() - .fold(Some(0), |sum, (k0, n0)| { - sum.and_then(|s| s.checked_add(checked_binomial(n - 1 - *n0, k - k0)?)) + .fold(Some(0), |sum, (i, n0)| { + sum.and_then(|s| s.checked_add(checked_binomial(n - 1 - *n0, k - i)?)) }) } } From cfb2ec99fc24db4f98a057e9144dcf8ac4477a49 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Fri, 18 Aug 2023 13:11:22 +0200 Subject: [PATCH 13/14] TODO: simplify when MSRV hits 1.37 --- src/combinations.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/combinations.rs b/src/combinations.rs index da5c41377..df49c6ab2 100644 --- a/src/combinations.rs +++ b/src/combinations.rs @@ -185,6 +185,9 @@ fn remaining_for(n: usize, first: bool, indices: &[usize]) -> Option { indices .iter() .enumerate() + // TODO: Once the MSRV hits 1.37.0, we can sum options instead: + // .map(|(i, n0)| checked_binomial(n - 1 - *n0, k - i)) + // .sum() .fold(Some(0), |sum, (i, n0)| { sum.and_then(|s| s.checked_add(checked_binomial(n - 1 - *n0, k - i)?)) }) From 65e08d49659f41e46e6d4452a3c195daddcc330d Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Fri, 18 Aug 2023 13:35:36 +0200 Subject: [PATCH 14/14] Test `checked_binomial` Only test for `n <= 500` but only a few values of the last row do not already have overflow on a 64 bits computer. NOTE: Not faster with an array updated in-place. --- src/combinations.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/combinations.rs b/src/combinations.rs index df49c6ab2..c50e95d01 100644 --- a/src/combinations.rs +++ b/src/combinations.rs @@ -156,6 +156,25 @@ pub(crate) fn checked_binomial(mut n: usize, mut k: usize) -> Option { Some(c) } +#[test] +fn test_checked_binomial() { + // With the first row: [1, 0, 0, ...] and the first column full of 1s, we check + // row by row the recurrence relation of binomials (which is an equivalent definition). + // For n >= 1 and k >= 1 we have: + // binomial(n, k) == binomial(n - 1, k - 1) + binomial(n - 1, k) + const LIMIT: usize = 500; + let mut row = vec![Some(0); LIMIT + 1]; + row[0] = Some(1); + for n in 0..=LIMIT { + for k in 0..=LIMIT { + assert_eq!(row[k], checked_binomial(n, k)); + } + row = std::iter::once(Some(1)) + .chain((1..=LIMIT).map(|k| row[k - 1]?.checked_add(row[k]?))) + .collect(); + } +} + /// For a given size `n`, return the count of remaining combinations or None if it would overflow. fn remaining_for(n: usize, first: bool, indices: &[usize]) -> Option { let k = indices.len();