From 3e3ab8d2dcedf81300cbcd34d4bd16a28417efed Mon Sep 17 00:00:00 2001
From: Theo Butler <theodusbutler@gmail.com>
Date: Tue, 26 Mar 2024 13:30:09 -0400
Subject: [PATCH] remove weighted random selection

---
 Cargo.lock                      |  2 -
 README.md                       |  2 +-
 candidate-selection/Cargo.toml  |  4 --
 candidate-selection/src/lib.rs  | 91 +++++++++++++++------------------
 candidate-selection/src/num.rs  |  2 +-
 candidate-selection/src/test.rs | 31 ++++-------
 indexer-selection/Cargo.toml    |  2 -
 indexer-selection/src/lib.rs    | 39 ++++----------
 indexer-selection/src/test.rs   | 21 +-------
 simulator/Cargo.toml            |  2 +-
 simulator/src/main.rs           |  6 +--
 11 files changed, 70 insertions(+), 132 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 43c7eb7..85fb1ad 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -354,7 +354,6 @@ dependencies = [
  "ordered-float",
  "permutation",
  "proptest",
- "rand",
 ]
 
 [[package]]
@@ -934,7 +933,6 @@ dependencies = [
  "candidate-selection",
  "custom_debug",
  "proptest",
- "rand",
  "thegraph-core",
  "url",
 ]
diff --git a/README.md b/README.md
index 7057b72..1a0efec 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 # candidate-selection
 
-Utilities for picking candidates out of a set, based on weighted random selection. The weights used for selection are combuted using a [weighted product model](https://en.wikipedia.org/wiki/Weighted_product_model) over criteria such as success rate, latency, etc.
+Utilities for picking candidates out of a set based on the [weighted product model](https://en.wikipedia.org/wiki/Weighted_product_model) over criteria such as success rate, latency, etc.
 
 This library is heavily influenced by, and intended to replace, the original indexer selection algorithm used by the Graph Gateway. The original algorithm was designed by Zachary Burns & Theodore Butler.
diff --git a/candidate-selection/Cargo.toml b/candidate-selection/Cargo.toml
index e5f91dc..08ef8fe 100644
--- a/candidate-selection/Cargo.toml
+++ b/candidate-selection/Cargo.toml
@@ -8,7 +8,3 @@ arrayvec = "0.7.4"
 ordered-float = { version = "4.2.0", default-features = false }
 permutation = "0.4.1"
 proptest = "1.4.0"
-rand = { version = "0.8.5", default-features = false, features = ["alloc"] }
-
-[dev-dependencies]
-rand = { version = "0.8.5", default-features = true, features = ["small_rng"] }
diff --git a/candidate-selection/src/lib.rs b/candidate-selection/src/lib.rs
index 50f636d..d5f15b9 100644
--- a/candidate-selection/src/lib.rs
+++ b/candidate-selection/src/lib.rs
@@ -5,12 +5,12 @@ mod test;
 
 pub use crate::num::Normalized;
 pub use arrayvec::ArrayVec;
-use rand::seq::SliceRandom as _;
-use std::collections::BTreeMap;
+use ordered_float::NotNan;
 
 pub trait Candidate {
     type Id: Eq + Ord;
     fn id(&self) -> Self::Id;
+    fn fee(&self) -> Normalized;
     fn score(&self) -> Normalized;
     fn score_many<const LIMIT: usize>(candidates: &[&Self]) -> Normalized;
 }
@@ -24,60 +24,53 @@ pub trait Candidate {
 ///
 /// If a candidate's score is below `min_score_cutoff` as a proportion of the max provider's
 /// individual score, then the provider will not be selected.
-pub fn select<'c, Rng, Candidate, const LIMIT: usize>(
-    rng: &mut Rng,
+pub fn select<'c, Candidate, const LIMIT: usize>(
     candidates: &'c [Candidate],
-    min_score_cutoff: Normalized,
 ) -> ArrayVec<&'c Candidate, LIMIT>
 where
-    Rng: rand::Rng,
     Candidate: crate::Candidate,
 {
     assert!(LIMIT > 0);
-    // Collect into a map to remove duplicate candidates.
-    let candidates: BTreeMap<Candidate::Id, (&Candidate, Normalized)> = candidates
-        .iter()
-        .map(|candidate| {
-            let score = Candidate::score(candidate);
-            (candidate.id(), (candidate, score))
-        })
-        .filter(|(_, (_, score))| score > &Normalized::ZERO)
-        .collect();
-    if candidates.is_empty() {
-        return ArrayVec::new();
-    }
-    let max_score = *candidates.values().map(|(_, score)| score).max().unwrap();
-    let cutoff_score = max_score * min_score_cutoff;
-    // Collect into a vec because `choose_weighted` requires a slice to pick from.
-    let mut candidates: Vec<(&Candidate, Normalized)> = candidates
-        .into_iter()
-        .filter(|(_, (_, score))| *score >= cutoff_score)
-        .map(|(_, (candidate, score))| (candidate, score))
-        .collect();
-    // At this point we have reduced the candidates to those with a nonzero score above the cutoff.
 
-    let (first_selection, combined_score) = *candidates
-        .choose_weighted(rng, |(_, score)| score.as_f64())
-        .unwrap();
-    let mut selections: ArrayVec<&Candidate, LIMIT> = Default::default();
-    selections.push(first_selection);
-    candidates.retain(|(candidate, _)| candidate.id() != first_selection.id());
+    let marginal_score = |current_score: Normalized,
+                          selected: &ArrayVec<&'c Candidate, LIMIT>,
+                          candidate: &'c Candidate| {
+        let mut buf = selected.clone();
+        buf.push(candidate);
+        let potential_score = Candidate::score_many::<LIMIT>(&buf);
+        NotNan::new(potential_score.as_f64() - current_score.as_f64()).unwrap()
+    };
 
-    // Sample sets of candidates to find combinations that increase the combined score.
-    let sample_limit = candidates.len().min(LIMIT * 5);
-    for _ in 0..sample_limit {
-        if (selections.len() == LIMIT) || candidates.is_empty() {
-            break;
-        }
-        let (picked, _) = *candidates
-            .choose_weighted(rng, |(_, score)| score.as_f64())
-            .unwrap();
-        selections.push(picked);
-        if Candidate::score_many::<LIMIT>(&selections) > combined_score {
-            candidates.retain(|(candidate, _)| candidate.id() != picked.id());
-        } else {
-            selections.pop();
-        }
+    let mut selected: ArrayVec<&Candidate, LIMIT> = Default::default();
+    while selected.len() < LIMIT {
+        let current_score = match selected.len() {
+            0 => Normalized::ZERO,
+            1 => Candidate::score(selected[0]),
+            _ => Candidate::score_many::<LIMIT>(&selected),
+        };
+        let selection = candidates
+            .iter()
+            .filter(|c| selected.iter().all(|s| s.id() != c.id()))
+            .map(|c| (c, marginal_score(current_score, &selected, c)))
+            .max_by_key(|(c, marginal_score)| {
+                if c.fee() == Normalized::ZERO {
+                    return *marginal_score;
+                }
+                marginal_score / c.fee().as_f64()
+            })
+            .filter(|(c, marginal_score)| {
+                if current_score == Normalized::ZERO {
+                    return true;
+                }
+                let max_score = 0.5 * *(marginal_score / current_score.as_f64());
+                c.fee().as_f64() <= max_score
+            });
+        match selection {
+            Some((selection, _)) => {
+                selected.push(selection);
+            }
+            _ => break,
+        };
     }
-    selections
+    selected
 }
diff --git a/candidate-selection/src/num.rs b/candidate-selection/src/num.rs
index 43c852d..221ea98 100644
--- a/candidate-selection/src/num.rs
+++ b/candidate-selection/src/num.rs
@@ -29,7 +29,7 @@ impl Normalized {
         self == &Self::ZERO
     }
 
-    pub fn arbitrary() -> impl Strategy<Value = Normalized> {
+    pub fn arbitrary() -> impl Strategy<Value = Self> {
         (0.0..=1.0).prop_map(|n| Normalized::new(n).unwrap())
     }
 }
diff --git a/candidate-selection/src/test.rs b/candidate-selection/src/test.rs
index f51b117..6fcb84a 100644
--- a/candidate-selection/src/test.rs
+++ b/candidate-selection/src/test.rs
@@ -1,18 +1,21 @@
 use crate::{select, ArrayVec, Candidate, Normalized};
 use proptest::{prelude::prop, prop_assert_eq, prop_compose, proptest};
-use rand::{rngs::SmallRng, SeedableRng as _};
 
 #[derive(Debug)]
 struct TestCandidate {
-    id: usize,
+    id: u8,
+    fee: Normalized,
     score: Normalized,
 }
 
 impl Candidate for TestCandidate {
-    type Id = usize;
+    type Id = u8;
     fn id(&self) -> Self::Id {
         self.id
     }
+    fn fee(&self) -> Normalized {
+        self.fee
+    }
     fn score(&self) -> Normalized {
         self.score
     }
@@ -26,35 +29,23 @@ impl Candidate for TestCandidate {
 }
 
 prop_compose! {
-    fn candidates()(scores in prop::collection::vec(Normalized::arbitrary(), 1..32)) -> Vec<TestCandidate> {
-        scores.into_iter().enumerate().map(|(id, score)| TestCandidate { id, score }).collect()
+    fn candidate()(id: u8, fee in Normalized::arbitrary(), score in Normalized::arbitrary()) -> TestCandidate {
+        TestCandidate { id, fee, score }
     }
 }
 proptest! {
     #[test]
     fn acceptable_candidates_selected(
-        seed: u64,
-        candidates in candidates(),
-        min_score_cutoff in Normalized::arbitrary(),
+        candidates in prop::collection::vec(candidate(), 1..16),
     ) {
-        let mut rng = SmallRng::seed_from_u64(seed);
         let exists_acceptable_candidate = candidates.iter().any(|c| c.score > Normalized::ZERO);
-        let min_score = candidates
-            .iter()
-            .filter(|c| c.score > Normalized::ZERO)
-            .map(|c| c.score)
-            .max()
-            .map(|s| s * min_score_cutoff)
-            .unwrap_or(Normalized::ZERO);
 
-        let selections: ArrayVec<&TestCandidate, 1> = select(&mut rng, &candidates, min_score_cutoff);
+        let selections: ArrayVec<&TestCandidate, 1> = select(&candidates);
         prop_assert_eq!(exists_acceptable_candidate, !selections.is_empty());
         prop_assert_eq!(true, selections.iter().all(|s| s.score > Normalized::ZERO));
-        prop_assert_eq!(true, selections.iter().all(|s| s.score >= min_score));
 
-        let selections: ArrayVec<&TestCandidate, 3> = select(&mut rng, &candidates, min_score_cutoff);
+        let selections: ArrayVec<&TestCandidate, 3> = select(&candidates);
         prop_assert_eq!(true, selections.iter().all(|s| s.score > Normalized::ZERO));
         prop_assert_eq!(exists_acceptable_candidate, !selections.is_empty());
-        prop_assert_eq!(true, selections.iter().all(|s| s.score >= min_score));
     }
 }
diff --git a/indexer-selection/Cargo.toml b/indexer-selection/Cargo.toml
index 21e9688..1499186 100644
--- a/indexer-selection/Cargo.toml
+++ b/indexer-selection/Cargo.toml
@@ -6,10 +6,8 @@ edition = "2021"
 [dependencies]
 candidate-selection = { path = "../candidate-selection" }
 custom_debug = "0.6.1"
-rand = { version = "0.8.5", default-features = false }
 thegraph-core = "0.3.0"
 url = "2.5.0"
 
 [dev-dependencies]
 proptest = "1.4.0"
-rand = { version = "0.8.5", default-features = true, features = ["small_rng"] }
diff --git a/indexer-selection/src/lib.rs b/indexer-selection/src/lib.rs
index af55135..f8bf2c1 100644
--- a/indexer-selection/src/lib.rs
+++ b/indexer-selection/src/lib.rs
@@ -29,16 +29,8 @@ pub struct Candidate {
     pub zero_allocation: bool,
 }
 
-const MIN_SCORE_CUTOFF: f64 = 0.25;
-
-pub fn select<'c, Rng, const LIMIT: usize>(
-    rng: &mut Rng,
-    candidates: &'c [Candidate],
-) -> ArrayVec<&'c Candidate, LIMIT>
-where
-    Rng: rand::Rng,
-{
-    candidate_selection::select(rng, candidates, Normalized::new(MIN_SCORE_CUTOFF).unwrap())
+pub fn select<const LIMIT: usize>(candidates: &[Candidate]) -> ArrayVec<&Candidate, LIMIT> {
+    candidate_selection::select(candidates)
 }
 
 impl candidate_selection::Candidate for Candidate {
@@ -51,11 +43,14 @@ impl candidate_selection::Candidate for Candidate {
         hasher.finish()
     }
 
+    fn fee(&self) -> Normalized {
+        self.fee
+    }
+
     fn score(&self) -> Normalized {
         [
             score_success_rate(self.perf.success_rate),
             score_latency(self.perf.latency_ms()),
-            score_fee(self.fee),
             score_seconds_behind(self.seconds_behind),
             score_slashable_grt(self.slashable_grt),
             score_subgraph_versions_behind(self.subgraph_versions_behind),
@@ -67,10 +62,9 @@ impl candidate_selection::Candidate for Candidate {
 
     fn score_many<const LIMIT: usize>(candidates: &[&Self]) -> Normalized {
         let fee = candidates.iter().map(|c| c.fee.as_f64()).sum::<f64>();
-        let fee = match Normalized::new(fee) {
-            Some(fee) => fee,
-            None => return Normalized::ZERO,
-        };
+        if Normalized::new(fee).is_none() {
+            return Normalized::ZERO;
+        }
 
         let perf: ArrayVec<ExpectedPerformance, LIMIT> =
             candidates.iter().map(|c| c.perf).collect();
@@ -113,7 +107,6 @@ impl candidate_selection::Candidate for Candidate {
         [
             score_success_rate(success_rate),
             score_latency(latency),
-            score_fee(fee),
             score_seconds_behind(seconds_behind),
             score_slashable_grt(slashable_grt),
             score_subgraph_versions_behind(subgraph_versions_behind),
@@ -124,21 +117,9 @@ impl candidate_selection::Candidate for Candidate {
     }
 }
 
-/// Score the given `fee`, which is a fraction of some budget. The weight chosen for WPM should be
-/// set to target the "optimal" value shown as the vertical line in the following plot.
-/// https://www.desmos.com/calculator/wf0tsp1sxh
-pub fn score_fee(fee: Normalized) -> Normalized {
-    // (5_f64.sqrt() - 1.0) / 2.0
-    const S: f64 = 0.6180339887498949;
-    let score = (fee.as_f64() + S).recip() - S;
-    // Set minimum score, since a very small negative value can result from loss of precision when
-    // the fee approaches the budget.
-    Normalized::new(score.max(1e-18)).unwrap()
-}
-
 /// Avoid serving deployments at versions behind, unless newer versions have poor indexer support.
 fn score_subgraph_versions_behind(subgraph_versions_behind: u8) -> Normalized {
-    Normalized::new(MIN_SCORE_CUTOFF.powi(subgraph_versions_behind as i32)).unwrap()
+    Normalized::new(0.25_f64.powi(subgraph_versions_behind as i32)).unwrap()
 }
 
 /// https://www.desmos.com/calculator/wmgkasfvza
diff --git a/indexer-selection/src/test.rs b/indexer-selection/src/test.rs
index bd9b311..fb4e405 100644
--- a/indexer-selection/src/test.rs
+++ b/indexer-selection/src/test.rs
@@ -1,27 +1,10 @@
 use crate::*;
 use candidate_selection::num::assert_within;
 use proptest::{prop_assert, prop_compose, proptest};
-use rand::{rngs::SmallRng, SeedableRng};
 
 mod limits {
     use super::*;
 
-    #[test]
-    fn fee() {
-        assert_within(score_fee(Normalized::ZERO).as_f64(), 1.0, 1e-12);
-        assert_within(
-            score_fee(Normalized::new(1e-18).unwrap()).as_f64(),
-            1.0,
-            1e-12,
-        );
-        assert_within(score_fee(Normalized::ONE).as_f64(), 0.0, 1e-12);
-        assert_within(
-            score_fee(Normalized::new(1.0 - 1e-18).unwrap()).as_f64(),
-            0.0,
-            1e-12,
-        );
-    }
-
     #[test]
     fn success_rate() {
         assert_within(score_success_rate(Normalized::ZERO).as_f64(), 0.01, 0.001);
@@ -78,11 +61,9 @@ prop_compose! {
 proptest! {
     #[test]
     fn select(
-        seed: u64,
         candidates in candidates(),
     ) {
-        let mut rng = SmallRng::seed_from_u64(seed);
-        let selections: ArrayVec<&Candidate, 3> = crate::select(&mut rng, &candidates);
+        let selections: ArrayVec<&Candidate, 3> = crate::select(&candidates);
         println!("{:#?}", selections.iter().map(|c| c.indexer).collect::<Vec<_>>());
 
         let valid_candidate = |c: &Candidate| -> bool {
diff --git a/simulator/Cargo.toml b/simulator/Cargo.toml
index f0986b4..9ab1666 100644
--- a/simulator/Cargo.toml
+++ b/simulator/Cargo.toml
@@ -6,5 +6,5 @@ edition = "2021"
 [dependencies]
 candidate-selection = { path = "../candidate-selection" }
 indexer-selection = { path = "../indexer-selection" }
-rand = { version = "0.8.5", default-features = true, features = ["small_rng"] }
+rand = "0.8.5"
 thegraph-core = "0.3.0"
diff --git a/simulator/src/main.rs b/simulator/src/main.rs
index 7b26833..8ff25fc 100644
--- a/simulator/src/main.rs
+++ b/simulator/src/main.rs
@@ -1,6 +1,6 @@
 use std::{collections::BTreeMap, io::stdin, time::Instant};
 
-use rand::{rngs::SmallRng, Rng, SeedableRng};
+use rand::{thread_rng, Rng as _};
 use thegraph_core::types::alloy_primitives::Address;
 
 use candidate_selection::{
@@ -45,7 +45,7 @@ fn main() {
         })
         .collect();
 
-    let mut rng = SmallRng::from_entropy();
+    let mut rng = thread_rng();
 
     let mut perf: BTreeMap<Address, Performance> = characteristics
         .iter()
@@ -97,7 +97,7 @@ fn main() {
             .collect();
 
         let t0 = Instant::now();
-        let selections: ArrayVec<&Candidate, 3> = select(&mut rng, &candidates);
+        let selections: ArrayVec<&Candidate, 3> = select(&candidates);
         total_selection_μs += Instant::now().duration_since(t0).as_micros();
         total_fees_usd += selections
             .iter()