paritytech · tdimitrov · Jun 26, 2024 · Jun 26, 2024 · Jun 26, 2024 · Jun 27, 2024
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/polkadot/node/network/collator-protocol/Cargo.toml b/polkadot/node/network/collator-protocol/Cargo.toml
@@ -37,6 +37,7 @@ rstest = { workspace = true }
 sp-core = { features = ["std"], workspace = true, default-features = true }
 sp-keyring = { workspace = true, default-features = true }
 sc-keystore = { workspace = true, default-features = true }
+sp-tracing = { workspace = true }
 sc-network = { workspace = true, default-features = true }
 codec = { features = ["std"], workspace = true, default-features = true }
 

diff --git a/polkadot/node/network/collator-protocol/src/validator_side/collation.rs b/polkadot/node/network/collator-protocol/src/validator_side/collation.rs
@@ -27,7 +27,12 @@
 //!    ┌──────────────────────────────────────────┐
 //!    └─▶Advertised ─▶ Pending ─▶ Fetched ─▶ Validated
 
-use std::{collections::VecDeque, future::Future, pin::Pin, task::Poll};
+use std::{
+	collections::{BTreeMap, VecDeque},
+	future::Future,
+	pin::Pin,
+	task::Poll,
+};
 
 use futures::{future::BoxFuture, FutureExt};
 use polkadot_node_network_protocol::{
@@ -216,27 +221,97 @@ impl CollationStatus {
 }
 
 /// Information about collations per relay parent.
-#[derive(Default)]
 pub struct Collations {
 	/// What is the current status in regards to a collation for this relay parent?
 	pub status: CollationStatus,
 	/// Collator we're fetching from, optionally which candidate was requested.
 	///
-	/// This is the currently last started fetch, which did not exceed `MAX_UNSHARED_DOWNLOAD_TIME`
-	/// yet.
+	/// This is the last fetch for the relay parent. The value is used in
+	/// `get_next_collation_to_fetch` (called from `dequeue_next_collation_and_fetch`) to determine
+	/// if the last fetched collation is the same as the one which just finished. If yes - another
+	/// collation should be fetched. If not - another fetch was already initiated and
+	/// `get_next_collation_to_fetch` will do nothing.
+	///
+	/// For the reasons above this value is not set to `None` when the fetch is done! Don't use it
+	/// to check if there is a pending fetch.
 	pub fetching_from: Option<(CollatorId, Option<CandidateHash>)>,
-	/// Collation that were advertised to us, but we did not yet fetch.
-	pub waiting_queue: VecDeque<(PendingCollation, CollatorId)>,
+	/// Collation that were advertised to us, but we did not yet fetch. Grouped by `ParaId`.
+	waiting_queue: BTreeMap<ParaId, VecDeque<(PendingCollation, CollatorId)>>,
 	/// How many collations have been seconded.
 	pub seconded_count: usize,
+	/// What collations were fetched so far for this relay parent.
+	fetched_per_para: BTreeMap<ParaId, usize>,
+	// Claims per `ParaId` for the assigned core at the relay parent. This information is obtained
+	// from `GroupAssignments` which contains either the claim queue (if runtime supports it) for
+	// the core or the `ParaId` of the parachain assigned to the core.
+	claims_per_para: BTreeMap<ParaId, usize>,
+	// Represents the claim queue at the relay parent. The `bool` field indicates if a candidate
+	// was fetched for the `ParaId` at the position in question. In other words - if the claim is
+	// 'satisfied'. If the claim queue is not available `claim_queue_state` will be `None`.
+	claim_queue_state: Option<Vec<(bool, ParaId)>>,
 }
 
 impl Collations {
+	/// `Collations` should work with and without claim queue support. If the claim queue runtime
+	/// api is available `GroupAssignments` the claim queue. If not - group assignments will contain
+	/// just one item (what's scheduled on the core).
+	///
+	/// Some of the logic in `Collations` relies on the claim queue and if it is not available
+	/// fallbacks to another logic. For this reason `Collations` needs to know if claim queue is
+	/// available or not.
+	///
+	/// Once claim queue runtime api is released everywhere this logic won't be needed anymore and
+	/// can be cleaned up.
+	pub(super) fn new(group_assignments: &Vec<ParaId>, has_claim_queue: bool) -> Self {
+		let mut claims_per_para = BTreeMap::new();
+		let mut claim_queue_state = Vec::with_capacity(group_assignments.len());
+
+		for para_id in group_assignments {
+			*claims_per_para.entry(*para_id).or_default() += 1;
+			claim_queue_state.push((false, *para_id));
+		}
+
+		// Not optimal but if the claim queue is not available `group_assignments` will have just
+		// one element. Can be fixed once claim queue api is released everywhere and the fallback
+		// code is cleaned up.
+		let claim_queue_state = if has_claim_queue { Some(claim_queue_state) } else { None };
+
+		Self {
+			status: Default::default(),
+			fetching_from: None,
+			waiting_queue: Default::default(),
+			seconded_count: 0,
+			fetched_per_para: Default::default(),
+			claims_per_para,
+			claim_queue_state,
+		}
+	}
+
 	/// Note a seconded collation for a given para.
 	pub(super) fn note_seconded(&mut self) {
 		self.seconded_count += 1
 	}
 
+	// Note a collation which has been successfully fetched.
+	pub(super) fn note_fetched(&mut self, para_id: ParaId) {
+		// update the number of fetched collations for the para_id
+		*self.fetched_per_para.entry(para_id).or_default() += 1;
+
+		// and the claim queue state
+		if let Some(claim_queue_state) = self.claim_queue_state.as_mut() {
+			for (satisfied, assignment) in claim_queue_state {
+				if *satisfied {
+					continue
+				}
+
+				if assignment == &para_id {
+					*satisfied = true;
+					break
+				}
+			}
+		}
+	}
+
 	/// Returns the next collation to fetch from the `waiting_queue`.
 	///
 	/// This will reset the status back to `Waiting` using [`CollationStatus::back_to_waiting`].
@@ -247,6 +322,8 @@ impl Collations {
 		&mut self,
 		finished_one: &(CollatorId, Option<CandidateHash>),
 		relay_parent_mode: ProspectiveParachainsMode,
+		group_assignments: &Vec<ParaId>,
+		pending_fetches: &BTreeMap<ParaId, usize>,
 	) -> Option<(PendingCollation, CollatorId)> {
 		// If finished one does not match waiting_collation, then we already dequeued another fetch
 		// to replace it.
@@ -267,33 +344,167 @@ impl Collations {
 		self.status.back_to_waiting(relay_parent_mode);
 
 		match self.status {
-			// We don't need to fetch any other collation when we already have seconded one.
+			// If async backing is enabled `back_to_waiting` will change `Seconded` state to
+			// `Waiting` so that we can fetch more collations. If async backing is disabled we can't
+			// fetch more than one collation per relay parent so `None` is returned.
 			CollationStatus::Seconded => None,
 			CollationStatus::Waiting =>
-				if self.is_seconded_limit_reached(relay_parent_mode) {
-					None
-				} else {
-					self.waiting_queue.pop_front()
-				},
+				self.pick_a_collation_to_fetch(&group_assignments, pending_fetches),
 			CollationStatus::WaitingOnValidation | CollationStatus::Fetching =>
 				unreachable!("We have reset the status above!"),
 		}
 	}
 
-	/// Checks the limit of seconded candidates.
-	pub(super) fn is_seconded_limit_reached(
+	/// Checks if another collation can be accepted. The number of collations that can be fetched
+	/// per parachain is limited by the entries in claim queue for the `ParaId` in question.
+	///
+	/// If prospective parachains mode is not enabled then we fall back to synchronous backing. In
+	/// this case there is a limit of 1 collation per relay parent.
+	///
+	/// If prospective parachains mode is enabled but claim queue is not supported then up to
+	/// `max_candidate_depth + 1` seconded collations are accepted. In theory in this case if two
+	/// parachains are sharing a core no fairness is guaranteed between them and the faster one can
+	/// starve the slower one by exhausting the limit with its own advertisements. In practice this
+	/// should not happen because core sharing implies core time support which implies the claim
+	/// queue being available.
+	pub(super) fn is_collations_limit_reached(
 		&self,
 		relay_parent_mode: ProspectiveParachainsMode,
+		para_id: ParaId,
+		num_pending_fetches: usize,
 	) -> bool {
-		let seconded_limit =
-			if let ProspectiveParachainsMode::Enabled { max_candidate_depth, .. } =
-				relay_parent_mode
+		match relay_parent_mode {
+			ProspectiveParachainsMode::Disabled => {
+				gum::trace!(
+					target: LOG_TARGET,
+					?para_id,
+					seconded_count=self.seconded_count,
+					"is_collations_limit_reached - ProspectiveParachainsMode::Disabled"
+				);
+
+				self.seconded_count >= 1
+			},
+			ProspectiveParachainsMode::Enabled { max_candidate_depth, allowed_ancestry_len: _ }
+				if !self.claim_queue_state.is_some() =>
 			{
-				max_candidate_depth + 1
-			} else {
-				1
-			};
-		self.seconded_count >= seconded_limit
+				gum::trace!(
+					target: LOG_TARGET,
+					?para_id,
+					seconded_count=self.seconded_count,
+					max_candidate_depth,
+					"is_collations_limit_reached - ProspectiveParachainsMode::Enabled without claim queue support"
+				);
+
+				self.seconded_count > max_candidate_depth
+			},
+			ProspectiveParachainsMode::Enabled {
+				max_candidate_depth: _,
+				allowed_ancestry_len: _,
+			} => {
+				// Successful fetches + pending fetches < claim queue entries for `para_id`
+				let respected_per_para_limit =
+					self.claims_per_para.get(&para_id).copied().unwrap_or_default() >
+						self.fetched_per_para.get(&para_id).copied().unwrap_or_default() +
+							num_pending_fetches;
+
+				gum::trace!(
+					target: LOG_TARGET,
+					?para_id,
+					claims_per_para=?self.claims_per_para,
+					fetched_per_para=?self.fetched_per_para,
+					?num_pending_fetches,
+					?respected_per_para_limit,
+					"is_collations_limit_reached - ProspectiveParachainsMode::Enabled with claim queue support"
+				);
+
+				!respected_per_para_limit
+			},
+		}
+	}
+
+	/// Adds a new collation to the waiting queue for the relay parent. This function doesn't
+	/// perform any limits check. The caller (`enqueue_collation`) should assure that the collation
+	/// limit is respected.
+	pub(super) fn add_to_waiting_queue(&mut self, collation: (PendingCollation, CollatorId)) {
+		self.waiting_queue.entry(collation.0.para_id).or_default().push_back(collation);
+	}
+
+	/// Picks a collation to fetch from the waiting queue.
+	/// When fetching collations we need to ensure that each parachain has got a fair core time
+	/// share depending on its assignments in the claim queue. This means that the number of
+	/// collations fetched per parachain should ideally be equal to the number of claims for the
+	/// particular parachain in the claim queue.
+	///
+	/// To achieve this each parachain with at an entry in the `waiting_queue` has got a score
+	/// calculated by dividing the number of fetched collations by the number of entries in the
+	/// claim queue. Lower score means higher fetching priority. Note that if a parachain hasn't got
+	/// anything fetched at this relay parent it will have score 0 which means highest priority. If
+	/// two parachains has got the same score the one which is earlier in the claim queue will be
+	/// picked.
+	///
+	/// If claim queue is not supported then `group_assignment` should contain just one element and
+	/// the score won't matter. In this case collations will be fetched in the order they were
+	/// received.
+	///
+	/// Note: `group_assignments` is needed just for the fall back logic. It should be removed once
+	/// claim queue runtime api is released everywhere since it will be redundant - claim queue will
+	/// already be available in `self.claim_queue_state`.
+	fn pick_a_collation_to_fetch(
+		&mut self,
+		group_assignments: &Vec<ParaId>,
+		pending_fetches: &BTreeMap<ParaId, usize>,
+	) -> Option<(PendingCollation, CollatorId)> {
+		gum::trace!(
+			target: LOG_TARGET,
+			waiting_queue=?self.waiting_queue,
+			fetched_per_para=?self.fetched_per_para,
+			claims_per_para=?self.claims_per_para,
+			?group_assignments,
+			"Pick a collation to fetch."
+		);
+
+		let claim_queue_state = match self.claim_queue_state.as_mut() {
+			Some(cqs) => cqs,
+			// Fallback if claim queue is not available. There is only one assignment in
+			// `group_assignments` so fetch the first advertisement for it and return.
+			None =>
+				if let Some(assigned_para_id) = group_assignments.first() {
+					return self
+						.waiting_queue
+						.get_mut(assigned_para_id)
+						.and_then(|collations| collations.pop_front())
+				} else {
+					unreachable!("Group assignments should contain at least one element.")
+				},
+		};
+
+		let mut pending_fetches = pending_fetches.clone();
+
+		for (fulfilled, assignment) in claim_queue_state {
+			// if this assignment has been already fulfilled - move on
+			if *fulfilled {
+				continue
+			}
+
+			// if there is a pending fetch for this assignment, we should consider it satisfied and
+			// proceed with the next
+			if let Some(pending_fetch) = pending_fetches.get_mut(assignment) {
+				if *pending_fetch > 0 {
+					*pending_fetch -= 1;
+					continue
+				}
+			}
+
+			// we have found and unfulfilled assignment - try to fulfill it
+			if let Some(collations) = self.waiting_queue.get_mut(assignment) {
+				if let Some(collation) = collations.pop_front() {
+					// we don't mark the entry as fulfilled because it is considered pending
+					return Some(collation)
+				}
+			}
+		}
+
+		None
 	}
 }