From 01acdfe0350ab8228b45c0b464c4aa5f90cac07a Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Thu, 11 Jul 2024 15:58:10 +1200 Subject: [PATCH 01/40] feat: Add outline for dedicated indexer control loops --- coordinator/src/lifecycle.rs | 79 ++++++++++++++++++++++++++++++++++++ coordinator/src/main.rs | 1 + 2 files changed, 80 insertions(+) create mode 100644 coordinator/src/lifecycle.rs diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs new file mode 100644 index 00000000..978ef77a --- /dev/null +++ b/coordinator/src/lifecycle.rs @@ -0,0 +1,79 @@ +use crate::indexer_config::IndexerConfig; + +#[derive(Default)] +enum LifeCycle { + #[default] + Provisioning, + Running, + Stopping, + Stopped, + Deprovisioning, + Erroring, + Deleted, +} + +struct LifecycleManager { + indexer_config: IndexerConfig, +} + +impl LifecycleManager { + fn new(indexer_config: IndexerConfig) -> LifecycleManager { + LifecycleManager { indexer_config } + } + + fn start(&self) { + println!("Starting lifecycle manager"); + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn something() { + let config = IndexerConfig::default(); + let _manager = LifecycleManager::new(config); + let mut lifecycle = LifeCycle::default(); + let enabled = true; + loop { + // fetch state and create if doesn't exist + // fetch config - it should exist + + match lifecycle { + LifeCycle::Provisioning => { + // do something + lifecycle = LifeCycle::Running; + } + LifeCycle::Running => { + // ensure block stream/executor are running + // get config + // do something + // change state + if !enabled { + lifecycle = LifeCycle::Stopping; + } + } + LifeCycle::Stopping => { + // do something + // change state + } + LifeCycle::Stopped => { + // do something + // change state + } + LifeCycle::Deprovisioning => { + // do something + // change state + } + LifeCycle::Erroring => { + // clean up + } + LifeCycle::Deleted => { + // clean up + break; + } + } + } + } +} diff --git a/coordinator/src/main.rs b/coordinator/src/main.rs index 6d55aa78..e024691c 100644 --- a/coordinator/src/main.rs +++ b/coordinator/src/main.rs @@ -15,6 +15,7 @@ use crate::synchroniser::Synchroniser; mod handlers; mod indexer_config; mod indexer_state; +mod lifecycle; mod redis; mod registry; mod server; From 515d05144ccbfbf857018ffdeddfade833344179 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Fri, 12 Jul 2024 15:05:59 +1200 Subject: [PATCH 02/40] feat: Implement rough state handling --- coordinator/src/handlers/block_streams.rs | 9 + coordinator/src/handlers/executors.rs | 10 + coordinator/src/indexer_state.rs | 4 + coordinator/src/lifecycle.rs | 252 ++++++++++++++---- coordinator/src/registry.rs | 4 +- .../src/server/indexer_manager_service.rs | 4 +- 6 files changed, 226 insertions(+), 57 deletions(-) diff --git a/coordinator/src/handlers/block_streams.rs b/coordinator/src/handlers/block_streams.rs index f88a412d..1ecd2ca8 100644 --- a/coordinator/src/handlers/block_streams.rs +++ b/coordinator/src/handlers/block_streams.rs @@ -79,6 +79,15 @@ impl BlockStreamsHandlerImpl { .into() } + pub async fn get(&self, indexer_config: &IndexerConfig) -> anyhow::Result> { + Ok(Some(StreamInfo { + stream_id: "".to_string(), + account_id: indexer_config.account_id.to_string(), + function_name: indexer_config.function_name.clone(), + version: indexer_config.get_registry_version(), + })) + } + pub async fn start( &self, start_block_height: u64, diff --git a/coordinator/src/handlers/executors.rs b/coordinator/src/handlers/executors.rs index 68616404..9956d5c9 100644 --- a/coordinator/src/handlers/executors.rs +++ b/coordinator/src/handlers/executors.rs @@ -50,6 +50,16 @@ impl ExecutorsHandlerImpl { .await } + pub async fn get(&self, config: &IndexerConfig) -> anyhow::Result> { + Ok(Some(ExecutorInfo { + executor_id: "".into(), + account_id: config.account_id.to_string(), + function_name: config.function_name.clone(), + version: 0, + status: "".to_string(), + })) + } + pub async fn start(&self, indexer_config: &IndexerConfig) -> anyhow::Result<()> { let request = StartExecutorRequest { code: indexer_config.code.clone(), diff --git a/coordinator/src/indexer_state.rs b/coordinator/src/indexer_state.rs index 2539e06a..3cb03ea2 100644 --- a/coordinator/src/indexer_state.rs +++ b/coordinator/src/indexer_state.rs @@ -4,6 +4,7 @@ use anyhow::Context; use near_primitives::types::AccountId; use crate::indexer_config::IndexerConfig; +use crate::lifecycle::Lifecycle; use crate::redis::{KeyProvider, RedisClient}; #[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] @@ -17,11 +18,13 @@ pub enum ProvisionedState { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] pub struct IndexerState { + // store previous config to make comparison easier? pub account_id: AccountId, pub function_name: String, pub block_stream_synced_at: Option, pub enabled: bool, pub provisioned_state: ProvisionedState, + pub lifecycle: Lifecycle, } impl KeyProvider for IndexerState { @@ -56,6 +59,7 @@ impl IndexerStateManagerImpl { block_stream_synced_at: None, enabled: true, provisioned_state: ProvisionedState::Unprovisioned, + lifecycle: Lifecycle::default(), } } diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index 978ef77a..667b2521 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -1,79 +1,225 @@ +use near_primitives::types::AccountId; + +use crate::handlers::block_streams::{BlockStreamsHandler, StreamInfo}; +use crate::handlers::data_layer::{DataLayerHandler, TaskStatus}; +use crate::handlers::executors::{ExecutorInfo, ExecutorsHandler}; use crate::indexer_config::IndexerConfig; +use crate::indexer_state::{IndexerState, IndexerStateManager, ProvisionedState}; +use crate::redis::RedisClient; +use crate::registry::Registry; -#[derive(Default)] -enum LifeCycle { +// is there a way to map the transitions in this type? +#[derive(Default, Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] +pub enum Lifecycle { + // are these too specific? e.g. should deprovisioning happen within deleting? #[default] Provisioning, Running, Stopping, Stopped, + // this is kinda the same as deleting, do we need it? Deprovisioning, Erroring, + Deleting, Deleted, } -struct LifecycleManager { - indexer_config: IndexerConfig, +pub struct LifecycleManager<'a> { + account_id: AccountId, + function_name: String, + block_streams_handler: &'a BlockStreamsHandler, + executors_handler: &'a ExecutorsHandler, + data_layer_handler: &'a DataLayerHandler, + registry: &'a Registry, + state_manager: &'a IndexerStateManager, + redis_client: &'a RedisClient, } -impl LifecycleManager { - fn new(indexer_config: IndexerConfig) -> LifecycleManager { - LifecycleManager { indexer_config } +impl<'a> LifecycleManager<'a> { + #[allow(clippy::too_many_arguments)] + fn new( + account_id: AccountId, + function_name: String, + block_streams_handler: &'a BlockStreamsHandler, + executors_handler: &'a ExecutorsHandler, + data_layer_handler: &'a DataLayerHandler, + registry: &'a Registry, + state_manager: &'a IndexerStateManager, + redis_client: &'a RedisClient, + ) -> Self { + Self { + account_id, + function_name, + block_streams_handler, + executors_handler, + data_layer_handler, + registry, + state_manager, + redis_client, + } } - fn start(&self) { - println!("Starting lifecycle manager"); + async fn handle_provisioning( + &self, + config: &IndexerConfig, + _state: &IndexerState, + ) -> Lifecycle { + // ensure_provisioned() + let task_id = self + .data_layer_handler + .start_provisioning_task(config) + .await + .unwrap(); + + loop { + let status = self + .data_layer_handler + .get_task_status(task_id.clone()) + .await + .unwrap(); + + if status == TaskStatus::Complete { + break; + } + } + + Lifecycle::Running + } + + async fn handle_running(&self, config: &IndexerConfig, state: &IndexerState) -> Lifecycle { + if !state.enabled { + return Lifecycle::Stopping; + } + + // check if we need to reprovision + + // ensure_running() + let block_stream = self.block_streams_handler.get(config).await.unwrap(); + if let Some(block_stream) = block_stream { + if block_stream.version != config.get_registry_version() { + self.block_streams_handler + .stop(block_stream.stream_id) + .await + .unwrap(); + self.block_streams_handler.start(0, config).await.unwrap(); + } + } else { + self.block_streams_handler.start(0, config).await.unwrap(); + } + + // ensure_running() + let executor = self.executors_handler.get(config).await.unwrap(); + if let Some(executor) = executor { + if executor.version != config.get_registry_version() { + self.executors_handler + .stop(executor.executor_id) + .await + .unwrap(); + self.executors_handler.start(config).await.unwrap(); + } + } else { + self.executors_handler.start(config).await.unwrap(); + } + + Lifecycle::Running + } + + async fn handle_stopping(&self, config: &IndexerConfig) -> Lifecycle { + if let Some(block_stream) = self.block_streams_handler.get(config).await.unwrap() { + self.block_streams_handler + .stop(block_stream.stream_id) + .await + .unwrap(); + } + + if let Some(executor) = self.executors_handler.get(config).await.unwrap() { + self.executors_handler + .stop(executor.executor_id) + .await + .unwrap(); + } + + Lifecycle::Stopped } -} -#[cfg(test)] -mod test { - use super::*; + async fn handle_stopped(&self, state: &IndexerState) -> Lifecycle { + // check if config update? + + if state.enabled { + return Lifecycle::Running; + } + + Lifecycle::Stopped + } + + async fn handle_deprovisioning(&self) -> Lifecycle { + Lifecycle::Deprovisioning + } + + async fn handle_erroring(&self, config: &IndexerConfig, state: &IndexerState) -> Lifecycle { + if config.get_registry_version() != state.block_stream_synced_at.unwrap() { + return Lifecycle::Running; + } + + Lifecycle::Erroring + } + + async fn handle_deleting(&self, state: &IndexerState) -> Lifecycle { + // ensure_deprovisioned + let task_id = self + .data_layer_handler + .start_deprovisioning_task(state.account_id.clone(), state.function_name.clone()) + .await + .unwrap(); - #[test] - fn something() { - let config = IndexerConfig::default(); - let _manager = LifecycleManager::new(config); - let mut lifecycle = LifeCycle::default(); - let enabled = true; loop { - // fetch state and create if doesn't exist - // fetch config - it should exist + let status = self + .data_layer_handler + .get_task_status(task_id.clone()) + .await + .unwrap(); - match lifecycle { - LifeCycle::Provisioning => { - // do something - lifecycle = LifeCycle::Running; - } - LifeCycle::Running => { - // ensure block stream/executor are running - // get config - // do something - // change state - if !enabled { - lifecycle = LifeCycle::Stopping; - } - } - LifeCycle::Stopping => { - // do something - // change state - } - LifeCycle::Stopped => { - // do something - // change state - } - LifeCycle::Deprovisioning => { - // do something - // change state - } - LifeCycle::Erroring => { - // clean up - } - LifeCycle::Deleted => { - // clean up - break; + if status == TaskStatus::Complete { + break; + } + } + + Lifecycle::Deleted + } + + // should not return a result here, all errors should be handled internally + pub async fn run(&self) -> anyhow::Result<()> { + // should throttle this + loop { + // this would be optional, and would decide the deleting state + let config = Some( + self.registry + .fetch_indexer(&self.account_id, &self.function_name) + .await?, + ); + let mut state = self + .state_manager + .get_state(&config.clone().unwrap()) + .await?; + + state.lifecycle = if let Some(config) = config { + match state.lifecycle { + Lifecycle::Provisioning => self.handle_provisioning(&config, &state).await, + Lifecycle::Running => self.handle_running(&config, &state).await, + Lifecycle::Stopping => self.handle_stopping(&config).await, + Lifecycle::Stopped => self.handle_stopped(&state).await, + Lifecycle::Deprovisioning => self.handle_deprovisioning().await, + Lifecycle::Erroring => self.handle_erroring(&config, &state).await, + Lifecycle::Deleting => unreachable!("handled below"), + Lifecycle::Deleted => break, } + } else { + self.handle_deleting(&state).await } + + // flush state } + + Ok(()) } } diff --git a/coordinator/src/registry.rs b/coordinator/src/registry.rs index d3cb8739..81dbdb31 100644 --- a/coordinator/src/registry.rs +++ b/coordinator/src/registry.rs @@ -169,8 +169,8 @@ impl RegistryImpl { pub async fn fetch_indexer( &self, - account_id: AccountId, - function_name: String, + account_id: &AccountId, + function_name: &str, ) -> anyhow::Result { let response = self .json_rpc_client diff --git a/coordinator/src/server/indexer_manager_service.rs b/coordinator/src/server/indexer_manager_service.rs index 36a91ca3..2a8d7e35 100644 --- a/coordinator/src/server/indexer_manager_service.rs +++ b/coordinator/src/server/indexer_manager_service.rs @@ -42,7 +42,7 @@ impl indexer_manager::indexer_manager_server::IndexerManager for IndexerManagerS let indexer_config = self .registry - .fetch_indexer(account_id, request.function_name) + .fetch_indexer(&account_id, &request.function_name) .await .map_err(|_| Status::not_found("Indexer not found"))?; @@ -78,7 +78,7 @@ impl indexer_manager::indexer_manager_server::IndexerManager for IndexerManagerS let indexer_config = self .registry - .fetch_indexer(account_id, request.function_name) + .fetch_indexer(&account_id, &request.function_name) .await .map_err(|_| Status::not_found("Indexer not found"))?; From 5fdb5d2b3aabe87510e6a7d59c85ed2caa23d06b Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Fri, 12 Jul 2024 15:28:48 +1200 Subject: [PATCH 03/40] refactor: Abstract data layer provisioning logic --- coordinator/src/handlers/data_layer.rs | 29 ++++++++++++++++++++++++-- coordinator/src/lifecycle.rs | 20 +++++------------- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/coordinator/src/handlers/data_layer.rs b/coordinator/src/handlers/data_layer.rs index 68537b3c..1bdf865a 100644 --- a/coordinator/src/handlers/data_layer.rs +++ b/coordinator/src/handlers/data_layer.rs @@ -8,7 +8,7 @@ use anyhow::Context; use runner::data_layer::data_layer_client::DataLayerClient; use runner::data_layer::{DeprovisionRequest, GetTaskStatusRequest, ProvisionRequest}; use tonic::transport::channel::Channel; -use tonic::Request; +use tonic::{Request, Status}; use crate::indexer_config::IndexerConfig; @@ -37,7 +37,7 @@ impl DataLayerHandlerImpl { pub async fn start_provisioning_task( &self, indexer_config: &IndexerConfig, - ) -> anyhow::Result { + ) -> Result { let request = ProvisionRequest { account_id: indexer_config.account_id.to_string(), function_name: indexer_config.function_name.clone(), @@ -98,4 +98,29 @@ impl DataLayerHandlerImpl { Ok(status) } + + pub async fn ensure_provisioned(&self, indexer_config: &IndexerConfig) -> anyhow::Result<()> { + let start_task_result = self.start_provisioning_task(indexer_config).await; + + if let Err(error) = start_task_result { + // Already provisioned + if error.code() == tonic::Code::FailedPrecondition { + return Ok(()); + } + + return Err(error.into()); + } + + let task_id = start_task_result.unwrap(); + + loop { + if self.get_task_status(task_id.clone()).await? == TaskStatus::Complete { + break; + } + + tokio::time::sleep(std::time::Duration::from_secs(1)).await; + } + + Ok(()) + } } diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index 667b2521..32930265 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -64,23 +64,13 @@ impl<'a> LifecycleManager<'a> { config: &IndexerConfig, _state: &IndexerState, ) -> Lifecycle { - // ensure_provisioned() - let task_id = self + if self .data_layer_handler - .start_provisioning_task(config) + .ensure_provisioned(config) .await - .unwrap(); - - loop { - let status = self - .data_layer_handler - .get_task_status(task_id.clone()) - .await - .unwrap(); - - if status == TaskStatus::Complete { - break; - } + .is_err() + { + return Lifecycle::Erroring; } Lifecycle::Running From 2c959257f62ab9a82e050deff3b05e310f17fe5c Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Fri, 12 Jul 2024 15:40:51 +1200 Subject: [PATCH 04/40] refactor: Abstract block stream/executor synchronisation --- coordinator/src/handlers/block_streams.rs | 15 +++++++++ coordinator/src/handlers/executors.rs | 14 +++++++++ coordinator/src/lifecycle.rs | 38 +++++++++-------------- 3 files changed, 43 insertions(+), 24 deletions(-) diff --git a/coordinator/src/handlers/block_streams.rs b/coordinator/src/handlers/block_streams.rs index 1ecd2ca8..e7a30bcb 100644 --- a/coordinator/src/handlers/block_streams.rs +++ b/coordinator/src/handlers/block_streams.rs @@ -148,4 +148,19 @@ impl BlockStreamsHandlerImpl { Ok(()) } + + // TODO handle reconfiguration + pub async fn synchronise_block_stream(&self, config: &IndexerConfig) -> anyhow::Result<()> { + let block_stream = self.get(config).await.unwrap(); + if let Some(block_stream) = block_stream { + if block_stream.version != config.get_registry_version() { + self.stop(block_stream.stream_id).await.unwrap(); + self.start(0, config).await.unwrap(); + } + } else { + self.start(0, config).await.unwrap(); + } + + Ok(()) + } } diff --git a/coordinator/src/handlers/executors.rs b/coordinator/src/handlers/executors.rs index 9956d5c9..abbce90c 100644 --- a/coordinator/src/handlers/executors.rs +++ b/coordinator/src/handlers/executors.rs @@ -107,4 +107,18 @@ impl ExecutorsHandlerImpl { Ok(()) } + + pub async fn synchronise_executor(&self, config: &IndexerConfig) -> anyhow::Result<()> { + let executor = self.get(config).await.unwrap(); + if let Some(executor) = executor { + if executor.version != config.get_registry_version() { + self.stop(executor.executor_id).await.unwrap(); + self.start(config).await.unwrap(); + } + } else { + self.start(config).await.unwrap(); + } + + Ok(()) + } } diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index 32930265..0c357ab2 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -83,32 +83,22 @@ impl<'a> LifecycleManager<'a> { // check if we need to reprovision - // ensure_running() - let block_stream = self.block_streams_handler.get(config).await.unwrap(); - if let Some(block_stream) = block_stream { - if block_stream.version != config.get_registry_version() { - self.block_streams_handler - .stop(block_stream.stream_id) - .await - .unwrap(); - self.block_streams_handler.start(0, config).await.unwrap(); - } - } else { - self.block_streams_handler.start(0, config).await.unwrap(); + if self + .block_streams_handler + .synchronise_block_stream(config) + .await + .is_err() + { + return Lifecycle::Erroring; } - // ensure_running() - let executor = self.executors_handler.get(config).await.unwrap(); - if let Some(executor) = executor { - if executor.version != config.get_registry_version() { - self.executors_handler - .stop(executor.executor_id) - .await - .unwrap(); - self.executors_handler.start(config).await.unwrap(); - } - } else { - self.executors_handler.start(config).await.unwrap(); + if self + .executors_handler + .synchronise_executor(config) + .await + .is_err() + { + return Lifecycle::Erroring; } Lifecycle::Running From 08c96c16a04f2ab72abe4165d50e23d2a4fc590e Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Fri, 12 Jul 2024 15:42:44 +1200 Subject: [PATCH 05/40] refactor: Abstract deprovisioning handling --- coordinator/src/handlers/data_layer.rs | 20 ++++++++++++++++++++ coordinator/src/lifecycle.rs | 20 +++++--------------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/coordinator/src/handlers/data_layer.rs b/coordinator/src/handlers/data_layer.rs index 1bdf865a..ae1124da 100644 --- a/coordinator/src/handlers/data_layer.rs +++ b/coordinator/src/handlers/data_layer.rs @@ -123,4 +123,24 @@ impl DataLayerHandlerImpl { Ok(()) } + + pub async fn ensure_deprovisioned( + &self, + account_id: AccountId, + function_name: String, + ) -> anyhow::Result<()> { + let task_id = self + .start_deprovisioning_task(account_id, function_name) + .await?; + + loop { + if self.get_task_status(task_id.clone()).await? == TaskStatus::Complete { + break; + } + + tokio::time::sleep(std::time::Duration::from_secs(1)).await; + } + + Ok(()) + } } diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index 0c357ab2..b92d36b5 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -145,23 +145,13 @@ impl<'a> LifecycleManager<'a> { } async fn handle_deleting(&self, state: &IndexerState) -> Lifecycle { - // ensure_deprovisioned - let task_id = self + if self .data_layer_handler - .start_deprovisioning_task(state.account_id.clone(), state.function_name.clone()) + .ensure_deprovisioned(state.account_id.clone(), state.function_name.clone()) .await - .unwrap(); - - loop { - let status = self - .data_layer_handler - .get_task_status(task_id.clone()) - .await - .unwrap(); - - if status == TaskStatus::Complete { - break; - } + .is_err() + { + return Lifecycle::Erroring; } Lifecycle::Deleted From 3a5559eac4e2c5974f15fb4751669931d1477fef Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Fri, 12 Jul 2024 15:45:04 +1200 Subject: [PATCH 06/40] feat: Flush state on control loop finish --- coordinator/src/indexer_state.rs | 2 +- coordinator/src/lifecycle.rs | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/coordinator/src/indexer_state.rs b/coordinator/src/indexer_state.rs index 3cb03ea2..7688eec0 100644 --- a/coordinator/src/indexer_state.rs +++ b/coordinator/src/indexer_state.rs @@ -83,7 +83,7 @@ impl IndexerStateManagerImpl { self.redis_client.delete_indexer_state(indexer_state).await } - async fn set_state( + pub async fn set_state( &self, indexer_config: &IndexerConfig, state: IndexerState, diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index b92d36b5..d58947ca 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -172,7 +172,7 @@ impl<'a> LifecycleManager<'a> { .get_state(&config.clone().unwrap()) .await?; - state.lifecycle = if let Some(config) = config { + state.lifecycle = if let Some(config) = config.clone() { match state.lifecycle { Lifecycle::Provisioning => self.handle_provisioning(&config, &state).await, Lifecycle::Running => self.handle_running(&config, &state).await, @@ -185,9 +185,11 @@ impl<'a> LifecycleManager<'a> { } } else { self.handle_deleting(&state).await - } + }; - // flush state + self.state_manager + .set_state(&config.unwrap(), state) + .await?; } Ok(()) From 1d8e8882fc16933bac98d25c857c7d22fc3b3e24 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Fri, 12 Jul 2024 15:47:11 +1200 Subject: [PATCH 07/40] chore: Notes --- coordinator/src/main.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/coordinator/src/main.rs b/coordinator/src/main.rs index e024691c..03a3f81f 100644 --- a/coordinator/src/main.rs +++ b/coordinator/src/main.rs @@ -1,3 +1,12 @@ +// TODO +// - [ ] Ensure all states are appropriate, i.e. do we need both Deprovisioning and Deleting? +// - [ ] Fully implement state handling, i.e. block stream/executor synchronisation +// - [ ] Spawn lifecycle tasks from `main()` +// - [ ] Extract client from handlers to make mocking/testing easier? is handler the best place for +// those higher level methods +// - [ ] Introduce typestate? +// - [ ] tests + use std::sync::Arc; use std::time::Duration; From 2e947da6ef5d26018808a4dfd76ca71498d50162 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Mon, 15 Jul 2024 10:46:12 +1200 Subject: [PATCH 08/40] feat: Correctly handle non-existant indexer in registry --- coordinator/src/registry.rs | 12 +++++++----- coordinator/src/server/indexer_manager_service.rs | 6 ++++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/coordinator/src/registry.rs b/coordinator/src/registry.rs index 81dbdb31..de891ffa 100644 --- a/coordinator/src/registry.rs +++ b/coordinator/src/registry.rs @@ -171,7 +171,7 @@ impl RegistryImpl { &self, account_id: &AccountId, function_name: &str, - ) -> anyhow::Result { + ) -> anyhow::Result> { let response = self .json_rpc_client .call(RpcQueryRequest { @@ -194,10 +194,10 @@ impl RegistryImpl { .context("Failed to fetch indexer")?; if let QueryResponseKind::CallResult(call_result) = response.kind { - let indexer: registry_types::IndexerConfig = - serde_json::from_slice(&call_result.result)?; - - return Ok(IndexerConfig { + let indexer = serde_json::from_slice::>( + &call_result.result, + )? + .map(|indexer| IndexerConfig { account_id: account_id.clone(), function_name: function_name.to_string(), code: indexer.code, @@ -207,6 +207,8 @@ impl RegistryImpl { updated_at_block_height: indexer.updated_at_block_height, created_at_block_height: indexer.created_at_block_height, }); + + return Ok(indexer); } anyhow::bail!("Invalid registry response") diff --git a/coordinator/src/server/indexer_manager_service.rs b/coordinator/src/server/indexer_manager_service.rs index 2a8d7e35..809fd897 100644 --- a/coordinator/src/server/indexer_manager_service.rs +++ b/coordinator/src/server/indexer_manager_service.rs @@ -44,7 +44,8 @@ impl indexer_manager::indexer_manager_server::IndexerManager for IndexerManagerS .registry .fetch_indexer(&account_id, &request.function_name) .await - .map_err(|_| Status::not_found("Indexer not found"))?; + .map_err(|_| Status::internal("Failed to fetch indexer"))? + .ok_or(Status::not_found("Indexer not found"))?; self.indexer_state_manager .set_enabled(&indexer_config, true) @@ -80,7 +81,8 @@ impl indexer_manager::indexer_manager_server::IndexerManager for IndexerManagerS .registry .fetch_indexer(&account_id, &request.function_name) .await - .map_err(|_| Status::not_found("Indexer not found"))?; + .map_err(|_| Status::internal("Failed to fetch indexer"))? + .ok_or(Status::not_found("Indexer not found"))?; self.indexer_state_manager .set_enabled(&indexer_config, false) From d57a42bc16f9128ab8fd15157a79528495ee3a83 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Mon, 15 Jul 2024 11:12:25 +1200 Subject: [PATCH 09/40] chore: Log delays during de/provisioning data layer --- coordinator/src/handlers/data_layer.rs | 32 ++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/coordinator/src/handlers/data_layer.rs b/coordinator/src/handlers/data_layer.rs index ae1124da..234f6beb 100644 --- a/coordinator/src/handlers/data_layer.rs +++ b/coordinator/src/handlers/data_layer.rs @@ -113,12 +113,26 @@ impl DataLayerHandlerImpl { let task_id = start_task_result.unwrap(); + let mut iterations = 0; + let delay_seconds = 1; + loop { if self.get_task_status(task_id.clone()).await? == TaskStatus::Complete { break; } tokio::time::sleep(std::time::Duration::from_secs(1)).await; + + iterations += 1; + + if iterations * delay_seconds % 60 == 0 { + tracing::warn!( + ?indexer_config.account_id, + ?indexer_config.function_name, + "Still waiting for provisioning to complete after {} seconds", + iterations * delay_seconds + ); + } } Ok(()) @@ -130,15 +144,29 @@ impl DataLayerHandlerImpl { function_name: String, ) -> anyhow::Result<()> { let task_id = self - .start_deprovisioning_task(account_id, function_name) + .start_deprovisioning_task(account_id.clone(), function_name.clone()) .await?; + let mut iterations = 0; + let delay_seconds = 1; + loop { if self.get_task_status(task_id.clone()).await? == TaskStatus::Complete { break; } - tokio::time::sleep(std::time::Duration::from_secs(1)).await; + tokio::time::sleep(std::time::Duration::from_secs(delay_seconds)).await; + + iterations += 1; + + if iterations * delay_seconds % 60 == 0 { + tracing::warn!( + ?account_id, + ?function_name, + "Still waiting for deprovisioning to complete after {} seconds", + iterations * delay_seconds + ); + } } Ok(()) From 5655c6617e375b2f2a48f538fe9f89dc7efb5c01 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Mon, 15 Jul 2024 11:22:30 +1200 Subject: [PATCH 10/40] feat: Complete sync executor implementation --- coordinator/src/handlers/executors.rs | 28 +++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/coordinator/src/handlers/executors.rs b/coordinator/src/handlers/executors.rs index abbce90c..eae12dcd 100644 --- a/coordinator/src/handlers/executors.rs +++ b/coordinator/src/handlers/executors.rs @@ -109,16 +109,32 @@ impl ExecutorsHandlerImpl { } pub async fn synchronise_executor(&self, config: &IndexerConfig) -> anyhow::Result<()> { - let executor = self.get(config).await.unwrap(); + let executor = self.get(config).await?; + if let Some(executor) = executor { - if executor.version != config.get_registry_version() { - self.stop(executor.executor_id).await.unwrap(); - self.start(config).await.unwrap(); + if executor.version == config.get_registry_version() { + return Ok(()); } - } else { - self.start(config).await.unwrap(); + + tracing::info!( + account_id = config.account_id.as_str(), + function_name = config.function_name, + version = executor.version, + "Stopping executor" + ); + + self.stop(executor.executor_id).await?; } + tracing::info!( + account_id = config.account_id.as_str(), + function_name = config.function_name, + version = config.get_registry_version(), + "Starting executor" + ); + + self.start(config).await?; + Ok(()) } } From 3ff0ca47fea00564651f3f4c3f48153f598163d5 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Mon, 15 Jul 2024 14:17:31 +1200 Subject: [PATCH 11/40] refactor: Move block sync logic to handler struct --- coordinator/src/handlers/block_streams.rs | 118 ++++++++++++++++++++-- coordinator/src/lifecycle.rs | 19 ++-- coordinator/src/main.rs | 3 +- 3 files changed, 120 insertions(+), 20 deletions(-) diff --git a/coordinator/src/handlers/block_streams.rs b/coordinator/src/handlers/block_streams.rs index e7a30bcb..8fdfc7b3 100644 --- a/coordinator/src/handlers/block_streams.rs +++ b/coordinator/src/handlers/block_streams.rs @@ -8,11 +8,12 @@ use block_streamer::{ start_stream_request::Rule, ActionAnyRule, ActionFunctionCallRule, ListStreamsRequest, StartStreamRequest, Status, StopStreamRequest, }; +use registry_types::StartBlock; use tonic::transport::channel::Channel; use tonic::Request; use crate::indexer_config::IndexerConfig; -use crate::redis::KeyProvider; +use crate::redis::{KeyProvider, RedisClient}; use crate::utils::exponential_retry; #[cfg(not(test))] @@ -22,17 +23,21 @@ pub use MockBlockStreamsHandlerImpl as BlockStreamsHandler; pub struct BlockStreamsHandlerImpl { client: BlockStreamerClient, + redis_client: RedisClient, } #[cfg_attr(test, mockall::automock)] impl BlockStreamsHandlerImpl { - pub fn connect(block_streamer_url: &str) -> anyhow::Result { + pub fn connect(block_streamer_url: &str, redis_client: RedisClient) -> anyhow::Result { let channel = Channel::from_shared(block_streamer_url.to_string()) .context("Block Streamer URL is invalid")? .connect_lazy(); let client = BlockStreamerClient::new(channel); - Ok(Self { client }) + Ok(Self { + client, + redis_client, + }) } pub async fn list(&self) -> anyhow::Result> { @@ -149,18 +154,109 @@ impl BlockStreamsHandlerImpl { Ok(()) } - // TODO handle reconfiguration - pub async fn synchronise_block_stream(&self, config: &IndexerConfig) -> anyhow::Result<()> { - let block_stream = self.get(config).await.unwrap(); + async fn reconfigure_block_stream(&self, config: &IndexerConfig) -> anyhow::Result<()> { + if matches!( + config.start_block, + StartBlock::Latest | StartBlock::Height(..) + ) { + self.redis_client.clear_block_stream(config).await?; + } + + let height = match config.start_block { + StartBlock::Latest => config.get_registry_version(), + StartBlock::Height(height) => height, + StartBlock::Continue => self.get_continuation_block_height(config).await?, + }; + + tracing::info!(height, "Starting block stream"); + + self.start(height, config).await?; + + Ok(()) + } + + async fn start_new_block_stream(&self, config: &IndexerConfig) -> anyhow::Result<()> { + let height = match config.start_block { + StartBlock::Height(height) => height, + StartBlock::Latest => config.get_registry_version(), + StartBlock::Continue => { + tracing::warn!( + "Attempted to start new Block Stream with CONTINUE, using LATEST instead" + ); + config.get_registry_version() + } + }; + + tracing::info!(height, "Starting block stream"); + + self.start(height, config).await + } + + async fn get_continuation_block_height(&self, config: &IndexerConfig) -> anyhow::Result { + let height = self + .redis_client + .get_last_published_block(config) + .await? + .map(|height| height + 1) + .unwrap_or_else(|| { + tracing::warn!( + "Failed to get continuation block height, using registry version instead" + ); + + config.get_registry_version() + }); + + Ok(height) + } + + async fn resume_block_stream(&self, config: &IndexerConfig) -> anyhow::Result<()> { + let height = self.get_continuation_block_height(config).await?; + + tracing::info!(height, "Resuming block stream"); + + self.start(height, config).await?; + + Ok(()) + } + + pub async fn synchronise_block_stream( + &self, + config: &IndexerConfig, + previous_sync_version: Option, + ) -> anyhow::Result<()> { + let block_stream = self.get(config).await?; + if let Some(block_stream) = block_stream { - if block_stream.version != config.get_registry_version() { - self.stop(block_stream.stream_id).await.unwrap(); - self.start(0, config).await.unwrap(); + if block_stream.version == config.get_registry_version() { + return Ok(()); } - } else { - self.start(0, config).await.unwrap(); + + tracing::info!( + previous_version = block_stream.version, + "Stopping outdated block stream" + ); + + self.stop(block_stream.stream_id.clone()).await?; + + self.reconfigure_block_stream(config).await?; + + return Ok(()); + } + + if previous_sync_version.is_none() { + self.start_new_block_stream(config).await?; + + return Ok(()); } + if previous_sync_version.unwrap() != config.get_registry_version() { + self.reconfigure_block_stream(config).await?; + + return Ok(()); + } + + self.resume_block_stream(config).await?; + Ok(()) } } diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index d58947ca..3cf4dce0 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -10,6 +10,7 @@ use crate::registry::Registry; // is there a way to map the transitions in this type? #[derive(Default, Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] +// LifecycleStates pub enum Lifecycle { // are these too specific? e.g. should deprovisioning happen within deleting? #[default] @@ -85,7 +86,7 @@ impl<'a> LifecycleManager<'a> { if self .block_streams_handler - .synchronise_block_stream(config) + .synchronise_block_stream(config, state.block_stream_synced_at) .await .is_err() { @@ -137,6 +138,7 @@ impl<'a> LifecycleManager<'a> { } async fn handle_erroring(&self, config: &IndexerConfig, state: &IndexerState) -> Lifecycle { + // check for update if config.get_registry_version() != state.block_stream_synced_at.unwrap() { return Lifecycle::Running; } @@ -154,19 +156,19 @@ impl<'a> LifecycleManager<'a> { return Lifecycle::Erroring; } + // remove redis state + Lifecycle::Deleted } - // should not return a result here, all errors should be handled internally + // should _not_ return a result here, all errors should be handled internally pub async fn run(&self) -> anyhow::Result<()> { // should throttle this loop { - // this would be optional, and would decide the deleting state - let config = Some( - self.registry - .fetch_indexer(&self.account_id, &self.function_name) - .await?, - ); + let config = self + .registry + .fetch_indexer(&self.account_id, &self.function_name) + .await?; let mut state = self .state_manager .get_state(&config.clone().unwrap()) @@ -187,6 +189,7 @@ impl<'a> LifecycleManager<'a> { self.handle_deleting(&state).await }; + // only set if not deleting self.state_manager .set_state(&config.unwrap(), state) .await?; diff --git a/coordinator/src/main.rs b/coordinator/src/main.rs index 03a3f81f..61c29da1 100644 --- a/coordinator/src/main.rs +++ b/coordinator/src/main.rs @@ -68,7 +68,8 @@ async fn main() -> anyhow::Result<()> { let registry = Arc::new(Registry::connect(registry_contract_id.clone(), &rpc_url)); let redis_client = RedisClient::connect(&redis_url).await?; - let block_streams_handler = BlockStreamsHandler::connect(&block_streamer_url)?; + let block_streams_handler = + BlockStreamsHandler::connect(&block_streamer_url, redis_client.clone())?; let executors_handler = ExecutorsHandler::connect(&runner_url)?; let data_layer_handler = DataLayerHandler::connect(&runner_url)?; let indexer_state_manager = Arc::new(IndexerStateManager::new(redis_client.clone())); From 47e554fdd3bd7c8250bf6c441de2ac084cc39eed Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Mon, 15 Jul 2024 14:18:12 +1200 Subject: [PATCH 12/40] refactor: Rename enum `Lifecycle` > `LifecycleStates` --- coordinator/src/lifecycle.rs | 71 ++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index 3cf4dce0..22d2767f 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -10,8 +10,7 @@ use crate::registry::Registry; // is there a way to map the transitions in this type? #[derive(Default, Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] -// LifecycleStates -pub enum Lifecycle { +pub enum LifecycleStates { // are these too specific? e.g. should deprovisioning happen within deleting? #[default] Provisioning, @@ -64,22 +63,26 @@ impl<'a> LifecycleManager<'a> { &self, config: &IndexerConfig, _state: &IndexerState, - ) -> Lifecycle { + ) -> LifecycleStates { if self .data_layer_handler .ensure_provisioned(config) .await .is_err() { - return Lifecycle::Erroring; + return LifecycleStates::Erroring; } - Lifecycle::Running + LifecycleStates::Running } - async fn handle_running(&self, config: &IndexerConfig, state: &IndexerState) -> Lifecycle { + async fn handle_running( + &self, + config: &IndexerConfig, + state: &IndexerState, + ) -> LifecycleStates { if !state.enabled { - return Lifecycle::Stopping; + return LifecycleStates::Stopping; } // check if we need to reprovision @@ -90,7 +93,7 @@ impl<'a> LifecycleManager<'a> { .await .is_err() { - return Lifecycle::Erroring; + return LifecycleStates::Erroring; } if self @@ -99,13 +102,13 @@ impl<'a> LifecycleManager<'a> { .await .is_err() { - return Lifecycle::Erroring; + return LifecycleStates::Erroring; } - Lifecycle::Running + LifecycleStates::Running } - async fn handle_stopping(&self, config: &IndexerConfig) -> Lifecycle { + async fn handle_stopping(&self, config: &IndexerConfig) -> LifecycleStates { if let Some(block_stream) = self.block_streams_handler.get(config).await.unwrap() { self.block_streams_handler .stop(block_stream.stream_id) @@ -120,45 +123,49 @@ impl<'a> LifecycleManager<'a> { .unwrap(); } - Lifecycle::Stopped + LifecycleStates::Stopped } - async fn handle_stopped(&self, state: &IndexerState) -> Lifecycle { + async fn handle_stopped(&self, state: &IndexerState) -> LifecycleStates { // check if config update? if state.enabled { - return Lifecycle::Running; + return LifecycleStates::Running; } - Lifecycle::Stopped + LifecycleStates::Stopped } - async fn handle_deprovisioning(&self) -> Lifecycle { - Lifecycle::Deprovisioning + async fn handle_deprovisioning(&self) -> LifecycleStates { + LifecycleStates::Deprovisioning } - async fn handle_erroring(&self, config: &IndexerConfig, state: &IndexerState) -> Lifecycle { + async fn handle_erroring( + &self, + config: &IndexerConfig, + state: &IndexerState, + ) -> LifecycleStates { // check for update if config.get_registry_version() != state.block_stream_synced_at.unwrap() { - return Lifecycle::Running; + return LifecycleStates::Running; } - Lifecycle::Erroring + LifecycleStates::Erroring } - async fn handle_deleting(&self, state: &IndexerState) -> Lifecycle { + async fn handle_deleting(&self, state: &IndexerState) -> LifecycleStates { if self .data_layer_handler .ensure_deprovisioned(state.account_id.clone(), state.function_name.clone()) .await .is_err() { - return Lifecycle::Erroring; + return LifecycleStates::Erroring; } // remove redis state - Lifecycle::Deleted + LifecycleStates::Deleted } // should _not_ return a result here, all errors should be handled internally @@ -176,14 +183,16 @@ impl<'a> LifecycleManager<'a> { state.lifecycle = if let Some(config) = config.clone() { match state.lifecycle { - Lifecycle::Provisioning => self.handle_provisioning(&config, &state).await, - Lifecycle::Running => self.handle_running(&config, &state).await, - Lifecycle::Stopping => self.handle_stopping(&config).await, - Lifecycle::Stopped => self.handle_stopped(&state).await, - Lifecycle::Deprovisioning => self.handle_deprovisioning().await, - Lifecycle::Erroring => self.handle_erroring(&config, &state).await, - Lifecycle::Deleting => unreachable!("handled below"), - Lifecycle::Deleted => break, + LifecycleStates::Provisioning => { + self.handle_provisioning(&config, &state).await + } + LifecycleStates::Running => self.handle_running(&config, &state).await, + LifecycleStates::Stopping => self.handle_stopping(&config).await, + LifecycleStates::Stopped => self.handle_stopped(&state).await, + LifecycleStates::Deprovisioning => self.handle_deprovisioning().await, + LifecycleStates::Erroring => self.handle_erroring(&config, &state).await, + LifecycleStates::Deleting => unreachable!("handled below"), + LifecycleStates::Deleted => break, } } else { self.handle_deleting(&state).await From d548079d45f6e93fd746355c0af6537ee65511d7 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Mon, 15 Jul 2024 14:18:39 +1200 Subject: [PATCH 13/40] chore: Remove uncompilable synchorniser tests --- coordinator/src/synchroniser.rs | 953 -------------------------------- 1 file changed, 953 deletions(-) diff --git a/coordinator/src/synchroniser.rs b/coordinator/src/synchroniser.rs index bf362bc5..64a5ce07 100644 --- a/coordinator/src/synchroniser.rs +++ b/coordinator/src/synchroniser.rs @@ -427,956 +427,3 @@ impl<'a> Synchroniser<'a> { Ok(()) } } - -#[cfg(test)] -mod test { - use super::*; - - use mockall::predicate::*; - use std::collections::HashMap; - - use crate::registry::IndexerRegistry; - - #[tokio::test] - async fn generates_sync_states() { - let existing_account_ids = vec![ - "account1.near".to_string(), - "account2.near".to_string(), - "account3.near".to_string(), - "account4.near".to_string(), - ]; - let new_account_ids = vec![ - "new_account1.near".to_string(), - "new_account2.near".to_string(), - ]; - let deleted_account_ids = vec![ - "deleted_account1.near".to_string(), - "deleted_account2.near".to_string(), - ]; - - let mut existing_indexer_configs: Vec = Vec::new(); - for (i, account_id) in existing_account_ids.iter().enumerate() { - for j in 1..=5 { - existing_indexer_configs.push(IndexerConfig { - account_id: account_id.parse().unwrap(), - function_name: format!("existing_indexer{}_{}", i + 1, j), - ..Default::default() - }); - } - } - - let mut new_indexer_configs: Vec = Vec::new(); - for (i, account_id) in new_account_ids.iter().enumerate() { - for j in 1..=3 { - new_indexer_configs.push(IndexerConfig { - account_id: account_id.parse().unwrap(), - function_name: format!("new_indexer{}_{}", i + 1, j), - ..Default::default() - }); - } - } - - let mut deleted_indexer_configs: Vec = Vec::new(); - for (i, account_id) in deleted_account_ids.iter().enumerate() { - for j in 1..=2 { - deleted_indexer_configs.push(IndexerConfig { - account_id: account_id.parse().unwrap(), - function_name: format!("deleted_indexer{}_{}", i + 1, j), - ..Default::default() - }); - } - } - - let mut indexer_registry = IndexerRegistry::new(); - for indexer in existing_indexer_configs - .iter() - .chain(new_indexer_configs.iter()) - { - indexer_registry - .entry(indexer.account_id.clone()) - .or_default() - .insert(indexer.function_name.clone(), indexer.clone()); - } - - let mut block_streams_handler = BlockStreamsHandler::default(); - let block_streams: Vec = existing_indexer_configs - .iter() - // generate some "randomness" - .rev() - .enumerate() - .map(|(i, indexer)| StreamInfo { - stream_id: format!("stream_id{}", i + 1), - account_id: indexer.account_id.to_string(), - function_name: indexer.function_name.clone(), - version: indexer.get_registry_version(), - }) - .collect(); - block_streams_handler - .expect_list() - .returning(move || Ok(block_streams.clone())); - - let mut executors_handler = ExecutorsHandler::default(); - let executors: Vec = existing_indexer_configs - .iter() - // generate some "randomness" - .rev() - .enumerate() - .map(|(i, indexer)| ExecutorInfo { - executor_id: format!("executor_id{}", i + 1), - account_id: indexer.account_id.to_string(), - function_name: indexer.function_name.clone(), - version: indexer.get_registry_version(), - status: "running".to_string(), - }) - .collect(); - - executors_handler - .expect_list() - .returning(move || Ok(executors.clone())); - - let mut registry = Registry::default(); - registry - .expect_fetch() - .returning(move || Ok(indexer_registry.clone())); - - let mut state_manager = IndexerStateManager::default(); - let states: Vec = existing_indexer_configs - .iter() - .map(|indexer| IndexerState { - account_id: indexer.account_id.clone(), - function_name: indexer.function_name.clone(), - block_stream_synced_at: Some(indexer.get_registry_version()), - enabled: true, - provisioned_state: ProvisionedState::Provisioned, - }) - .chain(deleted_indexer_configs.iter().map(|indexer| IndexerState { - account_id: indexer.account_id.clone(), - function_name: indexer.function_name.clone(), - block_stream_synced_at: Some(indexer.get_registry_version()), - enabled: true, - provisioned_state: ProvisionedState::Provisioned, - })) - .collect(); - state_manager - .expect_list() - .returning(move || Ok(states.clone())); - - let redis_client = RedisClient::default(); - let data_layer_handler = DataLayerHandler::default(); - - let synchroniser = Synchroniser::new( - &block_streams_handler, - &executors_handler, - &data_layer_handler, - ®istry, - &state_manager, - &redis_client, - ); - - let synchronisation_states = synchroniser - .generate_synchronisation_states() - .await - .unwrap(); - - let mut new_count = 0; - let mut existing_count = 0; - let mut deleted_count = 0; - - for state in &synchronisation_states { - match state { - SynchronisationState::New(_) => new_count += 1, - SynchronisationState::Existing(_, _, executor, block_stream) => { - assert!(executor.is_some(), "Executor should exist for the indexer"); - assert!( - block_stream.is_some(), - "Block stream should exist for the indexer" - ); - existing_count += 1; - } - SynchronisationState::Deleted(_, _, _) => { - deleted_count += 1; - } - } - } - - assert_eq!(new_count, 6); - assert_eq!(existing_count, 20); - assert_eq!(deleted_count, 4); - } - - mod new { - use super::*; - - #[tokio::test] - async fn triggers_data_layer_provisioning() { - let config = IndexerConfig::default(); - - let indexer_registry = IndexerRegistry::from(&[( - config.account_id.clone(), - HashMap::from([(config.function_name.clone(), config.clone())]), - )]); - - let mut block_streams_handler = BlockStreamsHandler::default(); - block_streams_handler.expect_list().returning(|| Ok(vec![])); - - let mut executors_handler = ExecutorsHandler::default(); - executors_handler.expect_list().returning(|| Ok(vec![])); - - let mut registry = Registry::default(); - registry - .expect_fetch() - .returning(move || Ok(indexer_registry.clone())); - - let mut state_manager = IndexerStateManager::default(); - state_manager.expect_list().returning(|| Ok(vec![])); - state_manager - .expect_set_provisioning() - .with(eq(config.clone()), eq("task_id".to_string())) - .returning(|_, _| Ok(())) - .once(); - - let mut data_layer_handler = DataLayerHandler::default(); - data_layer_handler - .expect_start_provisioning_task() - .with(eq(config)) - .returning(|_| Ok("task_id".to_string())) - .once(); - - let redis_client = RedisClient::default(); - - let synchroniser = Synchroniser::new( - &block_streams_handler, - &executors_handler, - &data_layer_handler, - ®istry, - &state_manager, - &redis_client, - ); - - synchroniser.sync().await.unwrap(); - } - } - - mod existing { - use super::*; - - #[tokio::test] - async fn waits_for_provisioning_to_complete() { - let config = IndexerConfig::default(); - - let indexer_registry = IndexerRegistry::from(&[( - config.account_id.clone(), - HashMap::from([(config.function_name.clone(), config.clone())]), - )]); - - let task_id = "task_id".to_string(); - - let state = IndexerState { - account_id: config.account_id.clone(), - function_name: config.function_name.clone(), - block_stream_synced_at: Some(config.get_registry_version()), - enabled: true, - provisioned_state: ProvisionedState::Provisioning { - task_id: task_id.clone().to_string(), - }, - }; - - let mut registry = Registry::default(); - registry - .expect_fetch() - .returning(move || Ok(indexer_registry.clone())); - - let mut state_manager = IndexerStateManager::default(); - state_manager - .expect_set_provisioned() - .with(eq(config.clone())) - .returning(|_| Ok(())) - .once(); - - let mut data_layer_handler = DataLayerHandler::default(); - data_layer_handler - .expect_get_task_status() - .with(eq(task_id)) - .returning(|_| Ok(TaskStatus::Complete)); - - let mut block_streams_handler = BlockStreamsHandler::default(); - block_streams_handler.expect_start().never(); - - let mut executors_handler = ExecutorsHandler::default(); - executors_handler.expect_start().never(); - - let redis_client = RedisClient::default(); - - let synchroniser = Synchroniser::new( - &block_streams_handler, - &executors_handler, - &data_layer_handler, - ®istry, - &state_manager, - &redis_client, - ); - - synchroniser - .sync_existing_indexer(&config, &state, None, None) - .await - .unwrap(); - } - - #[tokio::test] - async fn ignores_failed_provisioning() { - let config = IndexerConfig::default(); - - let indexer_registry = IndexerRegistry::from(&[( - config.account_id.clone(), - HashMap::from([(config.function_name.clone(), config.clone())]), - )]); - - let state = IndexerState { - account_id: config.account_id.clone(), - function_name: config.function_name.clone(), - block_stream_synced_at: Some(config.get_registry_version()), - enabled: true, - provisioned_state: ProvisionedState::Provisioning { - task_id: "task_id".to_string(), - }, - }; - - let mut registry = Registry::default(); - registry - .expect_fetch() - .returning(move || Ok(indexer_registry.clone())); - - let mut state_manager = IndexerStateManager::default(); - state_manager - .expect_set_provisioning_failure() - .with(eq(config.clone())) - .returning(|_| Ok(())) - .once(); - - let mut data_layer_handler = DataLayerHandler::default(); - data_layer_handler - .expect_get_task_status() - .with(eq("task_id".to_string())) - .returning(|_| Ok(TaskStatus::Failed)); - - let mut block_streams_handler = BlockStreamsHandler::default(); - block_streams_handler.expect_start().never(); - - let mut executors_handler = ExecutorsHandler::default(); - executors_handler.expect_start().never(); - - let redis_client = RedisClient::default(); - - let synchroniser = Synchroniser::new( - &block_streams_handler, - &executors_handler, - &data_layer_handler, - ®istry, - &state_manager, - &redis_client, - ); - - synchroniser - .sync_existing_indexer(&config, &state, None, None) - .await - .unwrap(); - } - - #[tokio::test] - async fn ignores_synced() { - let config = IndexerConfig::default(); - - let indexer_registry = IndexerRegistry::from(&[( - config.account_id.clone(), - HashMap::from([(config.function_name.clone(), config.clone())]), - )]); - - let mut block_streams_handler = BlockStreamsHandler::default(); - let config_clone = config.clone(); - block_streams_handler.expect_list().returning(move || { - Ok(vec![StreamInfo { - stream_id: config_clone.get_redis_stream_key(), - account_id: config_clone.account_id.to_string(), - function_name: config_clone.function_name.clone(), - version: config_clone.get_registry_version(), - }]) - }); - block_streams_handler.expect_stop().never(); - block_streams_handler.expect_start().never(); - - let mut executors_handler = ExecutorsHandler::default(); - let config_clone = config.clone(); - executors_handler.expect_list().returning(move || { - Ok(vec![ExecutorInfo { - executor_id: "executor_id".to_string(), - account_id: config_clone.account_id.to_string(), - function_name: config_clone.function_name.clone(), - version: config_clone.get_registry_version(), - status: "running".to_string(), - }]) - }); - executors_handler.expect_stop().never(); - executors_handler.expect_start().never(); - - let mut registry = Registry::default(); - registry - .expect_fetch() - .returning(move || Ok(indexer_registry.clone())); - - let mut state_manager = IndexerStateManager::default(); - state_manager - .expect_set_synced() - .with(eq(config.clone())) - .returning(|_| Ok(())) - .once(); - state_manager.expect_list().returning(move || { - Ok(vec![IndexerState { - account_id: config.account_id.clone(), - function_name: config.function_name.clone(), - block_stream_synced_at: Some(config.get_registry_version()), - enabled: true, - provisioned_state: ProvisionedState::Provisioned, - }]) - }); - - let redis_client = RedisClient::default(); - let data_layer_handler = DataLayerHandler::default(); - - let synchroniser = Synchroniser::new( - &block_streams_handler, - &executors_handler, - &data_layer_handler, - ®istry, - &state_manager, - &redis_client, - ); - - synchroniser.sync().await.unwrap(); - } - - #[tokio::test] - async fn restarts_outdated() { - let config = IndexerConfig::default(); - - let indexer_registry = IndexerRegistry::from(&[( - config.account_id.clone(), - HashMap::from([(config.function_name.clone(), config.clone())]), - )]); - - let mut block_streams_handler = BlockStreamsHandler::default(); - let config_clone = config.clone(); - block_streams_handler.expect_list().returning(move || { - Ok(vec![StreamInfo { - stream_id: "stream_id".to_string(), - account_id: config_clone.account_id.to_string(), - function_name: config_clone.function_name.clone(), - version: config_clone.get_registry_version() + 1, - }]) - }); - block_streams_handler - .expect_stop() - .with(eq("stream_id".to_string())) - .returning(|_| Ok(())) - .once(); - block_streams_handler - .expect_start() - .with(eq(100), eq(config.clone())) - .returning(|_, _| Ok(())) - .once(); - - let mut executors_handler = ExecutorsHandler::default(); - let config_clone = config.clone(); - executors_handler.expect_list().returning(move || { - Ok(vec![ExecutorInfo { - executor_id: "executor_id".to_string(), - account_id: config_clone.account_id.to_string(), - function_name: config_clone.function_name.clone(), - version: config_clone.get_registry_version() + 1, - status: "running".to_string(), - }]) - }); - executors_handler - .expect_stop() - .with(eq("executor_id".to_string())) - .returning(|_| Ok(())) - .once(); - executors_handler - .expect_start() - .with(eq(config.clone())) - .returning(|_| Ok(())) - .once(); - - let mut registry = Registry::default(); - registry - .expect_fetch() - .returning(move || Ok(indexer_registry.clone())); - - let mut redis_client = RedisClient::default(); - redis_client - .expect_clear_block_stream() - .with(eq(config.clone())) - .returning(|_| Ok(())) - .once(); - - let mut state_manager = IndexerStateManager::default(); - state_manager - .expect_set_synced() - .with(eq(config.clone())) - .returning(|_| Ok(())) - .once(); - state_manager.expect_list().returning(move || { - Ok(vec![IndexerState { - account_id: config.account_id.clone(), - function_name: config.function_name.clone(), - block_stream_synced_at: Some(config.get_registry_version()), - enabled: true, - provisioned_state: ProvisionedState::Provisioned, - }]) - }); - - let data_layer_handler = DataLayerHandler::default(); - - let synchroniser = Synchroniser::new( - &block_streams_handler, - &executors_handler, - &data_layer_handler, - ®istry, - &state_manager, - &redis_client, - ); - - synchroniser.sync().await.unwrap(); - } - - #[tokio::test] - async fn treats_unsynced_blocks_streams_as_new() { - let config = IndexerConfig::default(); - let state = IndexerState { - account_id: config.account_id.clone(), - function_name: config.function_name.clone(), - block_stream_synced_at: None, - enabled: true, - provisioned_state: ProvisionedState::Provisioned, - }; - - let mut block_streams_handler = BlockStreamsHandler::default(); - block_streams_handler - .expect_start() - .with(eq(100), eq(config.clone())) - .returning(|_, _| Ok(())) - .once(); - - let redis_client = RedisClient::default(); - let state_manager = IndexerStateManager::default(); - let executors_handler = ExecutorsHandler::default(); - let registry = Registry::default(); - let data_layer_handler = DataLayerHandler::default(); - - let synchroniser = Synchroniser::new( - &block_streams_handler, - &executors_handler, - &data_layer_handler, - ®istry, - &state_manager, - &redis_client, - ); - - synchroniser - .sync_existing_block_stream(&config, &state, None) - .await - .unwrap(); - } - - #[tokio::test] - async fn restarts_stopped_and_outdated_block_stream() { - let config = IndexerConfig::default(); - let state = IndexerState { - account_id: config.account_id.clone(), - function_name: config.function_name.clone(), - block_stream_synced_at: Some(config.get_registry_version() - 1), - enabled: true, - provisioned_state: ProvisionedState::Provisioned, - }; - - let mut block_streams_handler = BlockStreamsHandler::default(); - block_streams_handler - .expect_start() - .with(eq(100), eq(config.clone())) - .returning(|_, _| Ok(())) - .once(); - - let mut redis_client = RedisClient::default(); - redis_client - .expect_clear_block_stream() - .with(eq(config.clone())) - .returning(|_| Ok(())) - .once(); - - let state_manager = IndexerStateManager::default(); - let executors_handler = ExecutorsHandler::default(); - let registry = Registry::default(); - let data_layer_handler = DataLayerHandler::default(); - - let synchroniser = Synchroniser::new( - &block_streams_handler, - &executors_handler, - &data_layer_handler, - ®istry, - &state_manager, - &redis_client, - ); - - synchroniser - .sync_existing_block_stream(&config, &state, None) - .await - .unwrap(); - } - - #[tokio::test] - async fn resumes_stopped_and_synced_block_stream() { - let config = IndexerConfig::default(); - let state = IndexerState { - account_id: config.account_id.clone(), - function_name: config.function_name.clone(), - block_stream_synced_at: Some(config.get_registry_version()), - enabled: true, - provisioned_state: ProvisionedState::Provisioned, - }; - - let last_published_block = 1; - - let mut redis_client = RedisClient::default(); - redis_client - .expect_clear_block_stream::() - .never(); - redis_client - .expect_get_last_published_block() - .with(eq(config.clone())) - .returning(move |_| Ok(Some(last_published_block))); - - let mut block_streams_handler = BlockStreamsHandler::default(); - block_streams_handler - .expect_start() - .with(eq(last_published_block + 1), eq(config.clone())) - .returning(|_, _| Ok(())) - .once(); - - let state_manager = IndexerStateManager::default(); - let executors_handler = ExecutorsHandler::default(); - let registry = Registry::default(); - let data_layer_handler = DataLayerHandler::default(); - - let synchroniser = Synchroniser::new( - &block_streams_handler, - &executors_handler, - &data_layer_handler, - ®istry, - &state_manager, - &redis_client, - ); - - synchroniser - .sync_existing_block_stream(&config, &state, None) - .await - .unwrap(); - } - - #[tokio::test] - async fn reconfigures_block_stream() { - let config_with_latest = IndexerConfig { - start_block: StartBlock::Latest, - ..IndexerConfig::default() - }; - let height = 5; - let config_with_height = IndexerConfig { - start_block: StartBlock::Height(height), - ..IndexerConfig::default() - }; - let last_published_block = 1; - let config_with_continue = IndexerConfig { - start_block: StartBlock::Continue, - ..IndexerConfig::default() - }; - - let mut block_streams_handler = BlockStreamsHandler::default(); - block_streams_handler - .expect_start() - .with( - eq(last_published_block + 1), - eq(config_with_continue.clone()), - ) - .returning(|_, _| Ok(())) - .once(); - block_streams_handler - .expect_start() - .with( - eq(config_with_latest.get_registry_version()), - eq(config_with_latest.clone()), - ) - .returning(|_, _| Ok(())) - .once(); - block_streams_handler - .expect_start() - .with(eq(height), eq(config_with_height.clone())) - .returning(|_, _| Ok(())) - .once(); - - let mut redis_client = RedisClient::default(); - redis_client - .expect_clear_block_stream() - .with(eq(config_with_latest.clone())) - .returning(|_| Ok(())) - .once(); - redis_client - .expect_clear_block_stream() - .with(eq(config_with_height.clone())) - .returning(|_| Ok(())) - .once(); - redis_client - .expect_get_last_published_block() - .with(eq(config_with_continue.clone())) - .returning(move |_| Ok(Some(last_published_block))); - - let state_manager = IndexerStateManager::default(); - let executors_handler = ExecutorsHandler::default(); - let registry = Registry::default(); - let data_layer_handler = DataLayerHandler::default(); - - let synchroniser = Synchroniser::new( - &block_streams_handler, - &executors_handler, - &data_layer_handler, - ®istry, - &state_manager, - &redis_client, - ); - - synchroniser - .reconfigure_block_stream(&config_with_latest) - .await - .unwrap(); - synchroniser - .reconfigure_block_stream(&config_with_height) - .await - .unwrap(); - synchroniser - .reconfigure_block_stream(&config_with_continue) - .await - .unwrap(); - } - - #[tokio::test] - async fn stops_disabled_indexers() { - let config = IndexerConfig::default(); - let state = IndexerState { - account_id: config.account_id.clone(), - function_name: config.function_name.clone(), - block_stream_synced_at: Some(config.get_registry_version()), - enabled: false, - provisioned_state: ProvisionedState::Provisioned, - }; - let executor = ExecutorInfo { - executor_id: "executor_id".to_string(), - account_id: config.account_id.to_string(), - function_name: config.function_name.clone(), - version: config.get_registry_version(), - status: "running".to_string(), - }; - let block_stream = StreamInfo { - stream_id: "stream_id".to_string(), - account_id: config.account_id.to_string(), - function_name: config.function_name.clone(), - version: config.get_registry_version(), - }; - - let mut block_streams_handler = BlockStreamsHandler::default(); - block_streams_handler - .expect_stop() - .with(eq("stream_id".to_string())) - .returning(|_| Ok(())) - .once(); - - let mut executors_handler = ExecutorsHandler::default(); - executors_handler - .expect_stop() - .with(eq("executor_id".to_string())) - .returning(|_| Ok(())) - .once(); - - let mut state_manager = IndexerStateManager::default(); - state_manager - .expect_set_synced() - .with(eq(config.clone())) - .returning(|_| Ok(())) - .never(); - - let registry = Registry::default(); - let redis_client = RedisClient::default(); - let data_layer_handler = DataLayerHandler::default(); - - let synchroniser = Synchroniser::new( - &block_streams_handler, - &executors_handler, - &data_layer_handler, - ®istry, - &state_manager, - &redis_client, - ); - - synchroniser - .sync_existing_indexer(&config, &state, Some(&executor), Some(&block_stream)) - .await - .unwrap(); - // Simulate second run, start/stop etc should not be called - synchroniser - .sync_existing_indexer(&config, &state, None, None) - .await - .unwrap(); - } - } - - mod deleted { - use super::*; - - #[tokio::test] - async fn stops_block_stream_and_executor() { - let config = IndexerConfig::default(); - let state = IndexerState { - account_id: config.account_id.clone(), - function_name: config.function_name.clone(), - block_stream_synced_at: Some(config.get_registry_version()), - enabled: false, - provisioned_state: ProvisionedState::Deprovisioning { - task_id: "task_id".to_string(), - }, - }; - let executor = ExecutorInfo { - executor_id: "executor_id".to_string(), - account_id: config.account_id.to_string(), - function_name: config.function_name.clone(), - version: config.get_registry_version(), - status: "running".to_string(), - }; - let block_stream = StreamInfo { - stream_id: "stream_id".to_string(), - account_id: config.account_id.to_string(), - function_name: config.function_name.clone(), - version: config.get_registry_version(), - }; - - let mut block_streams_handler = BlockStreamsHandler::default(); - block_streams_handler - .expect_stop() - .with(eq("stream_id".to_string())) - .returning(|_| Ok(())) - .once(); - - let mut executors_handler = ExecutorsHandler::default(); - executors_handler - .expect_stop() - .with(eq("executor_id".to_string())) - .returning(|_| Ok(())) - .once(); - - let mut state_manager = IndexerStateManager::default(); - state_manager.expect_delete_state().never(); - - let mut data_layer_handler = DataLayerHandler::default(); - data_layer_handler - .expect_get_task_status() - .with(eq("task_id".to_string())) - .returning(|_| Ok(TaskStatus::Pending)); - - let registry = Registry::default(); - let redis_client = RedisClient::default(); - - let synchroniser = Synchroniser::new( - &block_streams_handler, - &executors_handler, - &data_layer_handler, - ®istry, - &state_manager, - &redis_client, - ); - - synchroniser - .sync_deleted_indexer(&state, Some(&executor), Some(&block_stream)) - .await - .unwrap(); - } - - #[tokio::test] - async fn cleans_indexer_resources() { - let config = IndexerConfig::default(); - let provisioned_state = IndexerState { - account_id: config.account_id.clone(), - function_name: config.function_name.clone(), - block_stream_synced_at: Some(config.get_registry_version()), - enabled: false, - provisioned_state: ProvisionedState::Provisioned, - }; - let deprovisioning_state = IndexerState { - account_id: config.account_id.clone(), - function_name: config.function_name.clone(), - block_stream_synced_at: Some(config.get_registry_version()), - enabled: false, - provisioned_state: ProvisionedState::Deprovisioning { - task_id: "task_id".to_string(), - }, - }; - - let mut state_manager = IndexerStateManager::default(); - state_manager - .expect_set_deprovisioning() - .with(eq(provisioned_state.clone()), eq("task_id".to_string())) - .returning(|_, _| Ok(())); - state_manager - .expect_delete_state() - .with(eq(deprovisioning_state.clone())) - .returning(|_| Ok(())) - .once(); - - let mut data_layer_handler = DataLayerHandler::default(); - data_layer_handler - .expect_start_deprovisioning_task() - .with( - eq(config.clone().account_id), - eq(config.clone().function_name), - ) - .returning(|_, _| Ok("task_id".to_string())); - data_layer_handler - .expect_get_task_status() - .with(eq("task_id".to_string())) - .returning(|_| Ok(TaskStatus::Complete)); - - let mut redis_client = RedisClient::default(); - redis_client - .expect_del::() - .with(eq(config.get_redis_stream_key())) - .returning(|_| Ok(())) - .once(); - - let registry = Registry::default(); - let block_streams_handler = BlockStreamsHandler::default(); - let executors_handler = ExecutorsHandler::default(); - - let synchroniser = Synchroniser::new( - &block_streams_handler, - &executors_handler, - &data_layer_handler, - ®istry, - &state_manager, - &redis_client, - ); - - synchroniser - .sync_deleted_indexer(&provisioned_state, None, None) - .await - .unwrap(); - synchroniser - .sync_deleted_indexer(&deprovisioning_state, None, None) - .await - .unwrap(); - } - } -} From ef4962aca1598a51e0c80186115b8daffe083581 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Mon, 15 Jul 2024 14:23:24 +1200 Subject: [PATCH 14/40] feat: Throttle control loop --- coordinator/src/lifecycle.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index 22d2767f..4a242021 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -8,6 +8,8 @@ use crate::indexer_state::{IndexerState, IndexerStateManager, ProvisionedState}; use crate::redis::RedisClient; use crate::registry::Registry; +const LOOP_THROTTLE_MS: u64 = 500; + // is there a way to map the transitions in this type? #[derive(Default, Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] pub enum LifecycleStates { @@ -85,8 +87,6 @@ impl<'a> LifecycleManager<'a> { return LifecycleStates::Stopping; } - // check if we need to reprovision - if self .block_streams_handler .synchronise_block_stream(config, state.block_stream_synced_at) @@ -170,7 +170,6 @@ impl<'a> LifecycleManager<'a> { // should _not_ return a result here, all errors should be handled internally pub async fn run(&self) -> anyhow::Result<()> { - // should throttle this loop { let config = self .registry @@ -202,6 +201,8 @@ impl<'a> LifecycleManager<'a> { self.state_manager .set_state(&config.unwrap(), state) .await?; + + tokio::time::sleep(std::time::Duration::from_millis(LOOP_THROTTLE_MS)).await; } Ok(()) From 39a6bc171f7907e5bb6636643f1f32670f9fd479 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Mon, 15 Jul 2024 14:43:51 +1200 Subject: [PATCH 15/40] refactor: Remove unnecessary `Deprovisioning` state --- coordinator/src/lifecycle.rs | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index 4a242021..884075e0 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -1,10 +1,10 @@ use near_primitives::types::AccountId; -use crate::handlers::block_streams::{BlockStreamsHandler, StreamInfo}; -use crate::handlers::data_layer::{DataLayerHandler, TaskStatus}; -use crate::handlers::executors::{ExecutorInfo, ExecutorsHandler}; +use crate::handlers::block_streams::BlockStreamsHandler; +use crate::handlers::data_layer::DataLayerHandler; +use crate::handlers::executors::ExecutorsHandler; use crate::indexer_config::IndexerConfig; -use crate::indexer_state::{IndexerState, IndexerStateManager, ProvisionedState}; +use crate::indexer_state::{IndexerState, IndexerStateManager}; use crate::redis::RedisClient; use crate::registry::Registry; @@ -13,14 +13,12 @@ const LOOP_THROTTLE_MS: u64 = 500; // is there a way to map the transitions in this type? #[derive(Default, Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] pub enum LifecycleStates { - // are these too specific? e.g. should deprovisioning happen within deleting? #[default] Provisioning, Running, Stopping, Stopped, - // this is kinda the same as deleting, do we need it? - Deprovisioning, + // Repairing? Erroring, Deleting, Deleted, @@ -127,7 +125,7 @@ impl<'a> LifecycleManager<'a> { } async fn handle_stopped(&self, state: &IndexerState) -> LifecycleStates { - // check if config update? + // TODO Transistion to `Running` on config update if state.enabled { return LifecycleStates::Running; @@ -136,10 +134,6 @@ impl<'a> LifecycleManager<'a> { LifecycleStates::Stopped } - async fn handle_deprovisioning(&self) -> LifecycleStates { - LifecycleStates::Deprovisioning - } - async fn handle_erroring( &self, config: &IndexerConfig, @@ -180,7 +174,7 @@ impl<'a> LifecycleManager<'a> { .get_state(&config.clone().unwrap()) .await?; - state.lifecycle = if let Some(config) = config.clone() { + let next_lifecycle_state = if let Some(config) = config.clone() { match state.lifecycle { LifecycleStates::Provisioning => { self.handle_provisioning(&config, &state).await @@ -188,7 +182,6 @@ impl<'a> LifecycleManager<'a> { LifecycleStates::Running => self.handle_running(&config, &state).await, LifecycleStates::Stopping => self.handle_stopping(&config).await, LifecycleStates::Stopped => self.handle_stopped(&state).await, - LifecycleStates::Deprovisioning => self.handle_deprovisioning().await, LifecycleStates::Erroring => self.handle_erroring(&config, &state).await, LifecycleStates::Deleting => unreachable!("handled below"), LifecycleStates::Deleted => break, @@ -197,6 +190,8 @@ impl<'a> LifecycleManager<'a> { self.handle_deleting(&state).await }; + state.lifecycle = next_lifecycle_state; + // only set if not deleting self.state_manager .set_state(&config.unwrap(), state) From 5eaaccad11ba84656e4813c19bd1b15834ad22bd Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Mon, 15 Jul 2024 14:55:14 +1200 Subject: [PATCH 16/40] refactor: Rename for clarity --- coordinator/src/lifecycle.rs | 78 +++++++++++++++--------------------- 1 file changed, 33 insertions(+), 45 deletions(-) diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index 884075e0..7647fb8b 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -10,16 +10,14 @@ use crate::registry::Registry; const LOOP_THROTTLE_MS: u64 = 500; -// is there a way to map the transitions in this type? #[derive(Default, Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] -pub enum LifecycleStates { +pub enum LifecycleState { #[default] - Provisioning, + Initializing, Running, Stopping, Stopped, - // Repairing? - Erroring, + Repairing, Deleting, Deleted, } @@ -59,30 +57,26 @@ impl<'a> LifecycleManager<'a> { } } - async fn handle_provisioning( + async fn handle_initializing( &self, config: &IndexerConfig, _state: &IndexerState, - ) -> LifecycleStates { + ) -> LifecycleState { if self .data_layer_handler .ensure_provisioned(config) .await .is_err() { - return LifecycleStates::Erroring; + return LifecycleState::Repairing; } - LifecycleStates::Running + LifecycleState::Running } - async fn handle_running( - &self, - config: &IndexerConfig, - state: &IndexerState, - ) -> LifecycleStates { + async fn handle_running(&self, config: &IndexerConfig, state: &IndexerState) -> LifecycleState { if !state.enabled { - return LifecycleStates::Stopping; + return LifecycleState::Stopping; } if self @@ -91,7 +85,7 @@ impl<'a> LifecycleManager<'a> { .await .is_err() { - return LifecycleStates::Erroring; + return LifecycleState::Repairing; } if self @@ -100,13 +94,13 @@ impl<'a> LifecycleManager<'a> { .await .is_err() { - return LifecycleStates::Erroring; + return LifecycleState::Repairing; } - LifecycleStates::Running + LifecycleState::Running } - async fn handle_stopping(&self, config: &IndexerConfig) -> LifecycleStates { + async fn handle_stopping(&self, config: &IndexerConfig) -> LifecycleState { if let Some(block_stream) = self.block_streams_handler.get(config).await.unwrap() { self.block_streams_handler .stop(block_stream.stream_id) @@ -121,45 +115,41 @@ impl<'a> LifecycleManager<'a> { .unwrap(); } - LifecycleStates::Stopped + LifecycleState::Stopped } - async fn handle_stopped(&self, state: &IndexerState) -> LifecycleStates { + async fn handle_stopped(&self, state: &IndexerState) -> LifecycleState { // TODO Transistion to `Running` on config update if state.enabled { - return LifecycleStates::Running; + return LifecycleState::Running; } - LifecycleStates::Stopped + LifecycleState::Stopped } - async fn handle_erroring( + async fn handle_repairing( &self, - config: &IndexerConfig, - state: &IndexerState, - ) -> LifecycleStates { - // check for update - if config.get_registry_version() != state.block_stream_synced_at.unwrap() { - return LifecycleStates::Running; - } - - LifecycleStates::Erroring + _config: &IndexerConfig, + _state: &IndexerState, + ) -> LifecycleState { + // TODO Add more robust error handling + LifecycleState::Repairing } - async fn handle_deleting(&self, state: &IndexerState) -> LifecycleStates { + async fn handle_deleting(&self, state: &IndexerState) -> LifecycleState { if self .data_layer_handler .ensure_deprovisioned(state.account_id.clone(), state.function_name.clone()) .await .is_err() { - return LifecycleStates::Erroring; + return LifecycleState::Repairing; } // remove redis state - LifecycleStates::Deleted + LifecycleState::Deleted } // should _not_ return a result here, all errors should be handled internally @@ -176,15 +166,13 @@ impl<'a> LifecycleManager<'a> { let next_lifecycle_state = if let Some(config) = config.clone() { match state.lifecycle { - LifecycleStates::Provisioning => { - self.handle_provisioning(&config, &state).await - } - LifecycleStates::Running => self.handle_running(&config, &state).await, - LifecycleStates::Stopping => self.handle_stopping(&config).await, - LifecycleStates::Stopped => self.handle_stopped(&state).await, - LifecycleStates::Erroring => self.handle_erroring(&config, &state).await, - LifecycleStates::Deleting => unreachable!("handled below"), - LifecycleStates::Deleted => break, + LifecycleState::Initializing => self.handle_initializing(&config, &state).await, + LifecycleState::Running => self.handle_running(&config, &state).await, + LifecycleState::Stopping => self.handle_stopping(&config).await, + LifecycleState::Stopped => self.handle_stopped(&state).await, + LifecycleState::Repairing => self.handle_repairing(&config, &state).await, + LifecycleState::Deleting => unreachable!("handled below"), + LifecycleState::Deleted => break, } } else { self.handle_deleting(&state).await From b43949afa6e7f5f35974351339afefe417ece61b Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Mon, 15 Jul 2024 15:00:25 +1200 Subject: [PATCH 17/40] feat: Move throttle to start of loop to ensure it always runs --- coordinator/src/lifecycle.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index 7647fb8b..7ffad510 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -155,6 +155,8 @@ impl<'a> LifecycleManager<'a> { // should _not_ return a result here, all errors should be handled internally pub async fn run(&self) -> anyhow::Result<()> { loop { + tokio::time::sleep(std::time::Duration::from_millis(LOOP_THROTTLE_MS)).await; + let config = self .registry .fetch_indexer(&self.account_id, &self.function_name) @@ -184,8 +186,6 @@ impl<'a> LifecycleManager<'a> { self.state_manager .set_state(&config.unwrap(), state) .await?; - - tokio::time::sleep(std::time::Duration::from_millis(LOOP_THROTTLE_MS)).await; } Ok(()) From 40021f63f43170070a40cd85ffb46634f2e02a0d Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Mon, 15 Jul 2024 15:00:44 +1200 Subject: [PATCH 18/40] feat: Stop lifecycle on error --- coordinator/src/lifecycle.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index 7ffad510..c2cf6075 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -133,8 +133,8 @@ impl<'a> LifecycleManager<'a> { _config: &IndexerConfig, _state: &IndexerState, ) -> LifecycleState { - // TODO Add more robust error handling - LifecycleState::Repairing + // TODO Add more robust error handling, for now just stop + LifecycleState::Stopping } async fn handle_deleting(&self, state: &IndexerState) -> LifecycleState { From 8538874c73d3e3fe133d964406ec7b9d77b74b7f Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Mon, 15 Jul 2024 15:52:56 +1200 Subject: [PATCH 19/40] refactor: Abstract stopping of block_streams/executors --- coordinator/src/handlers/block_streams.rs | 8 +++ coordinator/src/handlers/executors.rs | 8 +++ coordinator/src/indexer_state.rs | 6 +-- coordinator/src/lifecycle.rs | 63 ++++++++++++++--------- 4 files changed, 58 insertions(+), 27 deletions(-) diff --git a/coordinator/src/handlers/block_streams.rs b/coordinator/src/handlers/block_streams.rs index 8fdfc7b3..5f64a3f3 100644 --- a/coordinator/src/handlers/block_streams.rs +++ b/coordinator/src/handlers/block_streams.rs @@ -259,4 +259,12 @@ impl BlockStreamsHandlerImpl { Ok(()) } + + pub async fn stop_if_needed(&self, config: &IndexerConfig) -> anyhow::Result<()> { + if let Some(block_stream) = self.get(config).await? { + self.stop(block_stream.stream_id).await?; + } + + Ok(()) + } } diff --git a/coordinator/src/handlers/executors.rs b/coordinator/src/handlers/executors.rs index eae12dcd..dc00a0bd 100644 --- a/coordinator/src/handlers/executors.rs +++ b/coordinator/src/handlers/executors.rs @@ -137,4 +137,12 @@ impl ExecutorsHandlerImpl { Ok(()) } + + pub async fn stop_if_needed(&self, config: &IndexerConfig) -> anyhow::Result<()> { + if let Some(executor) = self.get(config).await? { + self.stop(executor.executor_id).await?; + } + + Ok(()) + } } diff --git a/coordinator/src/indexer_state.rs b/coordinator/src/indexer_state.rs index 7688eec0..cf745606 100644 --- a/coordinator/src/indexer_state.rs +++ b/coordinator/src/indexer_state.rs @@ -4,7 +4,7 @@ use anyhow::Context; use near_primitives::types::AccountId; use crate::indexer_config::IndexerConfig; -use crate::lifecycle::Lifecycle; +use crate::lifecycle::LifecycleState; use crate::redis::{KeyProvider, RedisClient}; #[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] @@ -24,7 +24,7 @@ pub struct IndexerState { pub block_stream_synced_at: Option, pub enabled: bool, pub provisioned_state: ProvisionedState, - pub lifecycle: Lifecycle, + pub lifecycle: LifecycleState, } impl KeyProvider for IndexerState { @@ -59,7 +59,7 @@ impl IndexerStateManagerImpl { block_stream_synced_at: None, enabled: true, provisioned_state: ProvisionedState::Unprovisioned, - lifecycle: Lifecycle::default(), + lifecycle: LifecycleState::default(), } } diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index c2cf6075..0bf6cb51 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -17,7 +17,7 @@ pub enum LifecycleState { Running, Stopping, Stopped, - Repairing, + Repairing, // TODO Add `error` to enable reparation Deleting, Deleted, } @@ -101,18 +101,19 @@ impl<'a> LifecycleManager<'a> { } async fn handle_stopping(&self, config: &IndexerConfig) -> LifecycleState { - if let Some(block_stream) = self.block_streams_handler.get(config).await.unwrap() { - self.block_streams_handler - .stop(block_stream.stream_id) - .await - .unwrap(); + if self + .block_streams_handler + .stop_if_needed(config) + .await + .is_err() + { + // Retry + return LifecycleState::Stopping; } - if let Some(executor) = self.executors_handler.get(config).await.unwrap() { - self.executors_handler - .stop(executor.executor_id) - .await - .unwrap(); + if self.executors_handler.stop_if_needed(config).await.is_err() { + // Retry + return LifecycleState::Stopping; } LifecycleState::Stopped @@ -153,18 +154,23 @@ impl<'a> LifecycleManager<'a> { } // should _not_ return a result here, all errors should be handled internally - pub async fn run(&self) -> anyhow::Result<()> { + pub async fn run(&self) { loop { tokio::time::sleep(std::time::Duration::from_millis(LOOP_THROTTLE_MS)).await; - let config = self + let config = match self .registry .fetch_indexer(&self.account_id, &self.function_name) - .await?; - let mut state = self - .state_manager - .get_state(&config.clone().unwrap()) - .await?; + .await + { + Ok(config) => config, + Err(_) => continue, + }; + + let mut state = match self.state_manager.get_state(&config.clone().unwrap()).await { + Ok(state) => state, + Err(_) => continue, + }; let next_lifecycle_state = if let Some(config) = config.clone() { match state.lifecycle { @@ -182,12 +188,21 @@ impl<'a> LifecycleManager<'a> { state.lifecycle = next_lifecycle_state; - // only set if not deleting - self.state_manager - .set_state(&config.unwrap(), state) - .await?; + loop { + match self + .state_manager + // FIX: `config` could be `None` + .set_state(&config.clone().unwrap(), state.clone()) + .await + { + Ok(_) => break, + Err(e) => { + tracing::error!("Failed to set state: {:?}. Retrying...", e); + + tokio::time::sleep(std::time::Duration::from_millis(1000)).await; + } + } + } } - - Ok(()) } } From 1c6f5c82148ade59c2ce9243f1855f013de51bfb Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Mon, 15 Jul 2024 15:57:20 +1200 Subject: [PATCH 20/40] refactor: Store `initial_config` to remove need to handle `Option` --- coordinator/src/lifecycle.rs | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index 0bf6cb51..ad105db1 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -1,5 +1,3 @@ -use near_primitives::types::AccountId; - use crate::handlers::block_streams::BlockStreamsHandler; use crate::handlers::data_layer::DataLayerHandler; use crate::handlers::executors::ExecutorsHandler; @@ -23,8 +21,7 @@ pub enum LifecycleState { } pub struct LifecycleManager<'a> { - account_id: AccountId, - function_name: String, + initial_config: IndexerConfig, block_streams_handler: &'a BlockStreamsHandler, executors_handler: &'a ExecutorsHandler, data_layer_handler: &'a DataLayerHandler, @@ -36,8 +33,7 @@ pub struct LifecycleManager<'a> { impl<'a> LifecycleManager<'a> { #[allow(clippy::too_many_arguments)] fn new( - account_id: AccountId, - function_name: String, + initial_config: IndexerConfig, block_streams_handler: &'a BlockStreamsHandler, executors_handler: &'a ExecutorsHandler, data_layer_handler: &'a DataLayerHandler, @@ -46,8 +42,7 @@ impl<'a> LifecycleManager<'a> { redis_client: &'a RedisClient, ) -> Self { Self { - account_id, - function_name, + initial_config, block_streams_handler, executors_handler, data_layer_handler, @@ -153,21 +148,23 @@ impl<'a> LifecycleManager<'a> { LifecycleState::Deleted } - // should _not_ return a result here, all errors should be handled internally pub async fn run(&self) { loop { tokio::time::sleep(std::time::Duration::from_millis(LOOP_THROTTLE_MS)).await; let config = match self .registry - .fetch_indexer(&self.account_id, &self.function_name) + .fetch_indexer( + &self.initial_config.account_id, + &self.initial_config.function_name, + ) .await { Ok(config) => config, Err(_) => continue, }; - let mut state = match self.state_manager.get_state(&config.clone().unwrap()).await { + let mut state = match self.state_manager.get_state(&self.initial_config).await { Ok(state) => state, Err(_) => continue, }; @@ -191,8 +188,7 @@ impl<'a> LifecycleManager<'a> { loop { match self .state_manager - // FIX: `config` could be `None` - .set_state(&config.clone().unwrap(), state.clone()) + .set_state(&self.initial_config, state.clone()) .await { Ok(_) => break, From bbd9f7a900ae54c1a572c77fbc771590377c8d58 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Mon, 15 Jul 2024 16:01:30 +1200 Subject: [PATCH 21/40] feat: Clean up redis state --- coordinator/src/lifecycle.rs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index ad105db1..8f694002 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -3,7 +3,7 @@ use crate::handlers::data_layer::DataLayerHandler; use crate::handlers::executors::ExecutorsHandler; use crate::indexer_config::IndexerConfig; use crate::indexer_state::{IndexerState, IndexerStateManager}; -use crate::redis::RedisClient; +use crate::redis::{KeyProvider, RedisClient}; use crate::registry::Registry; const LOOP_THROTTLE_MS: u64 = 500; @@ -134,6 +134,21 @@ impl<'a> LifecycleManager<'a> { } async fn handle_deleting(&self, state: &IndexerState) -> LifecycleState { + if self.state_manager.delete_state(state).await.is_err() { + // Retry + return LifecycleState::Deleting; + } + + if self + .redis_client + .del(state.get_redis_stream_key()) + .await + .is_err() + { + // Retry + return LifecycleState::Deleting; + } + if self .data_layer_handler .ensure_deprovisioned(state.account_id.clone(), state.function_name.clone()) @@ -143,8 +158,6 @@ impl<'a> LifecycleManager<'a> { return LifecycleState::Repairing; } - // remove redis state - LifecycleState::Deleted } From 4165cfcdb5393a110f2d12ca3ca276ad662aa4cd Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Mon, 15 Jul 2024 16:42:29 +1200 Subject: [PATCH 22/40] feat: Spawn lifecycle manager per indexer --- coordinator/src/handlers/block_streams.rs | 4 +- coordinator/src/handlers/data_layer.rs | 3 +- coordinator/src/handlers/executors.rs | 3 +- coordinator/src/lifecycle.rs | 3 +- coordinator/src/main.rs | 52 ++++++++++++++++++----- 5 files changed, 50 insertions(+), 15 deletions(-) diff --git a/coordinator/src/handlers/block_streams.rs b/coordinator/src/handlers/block_streams.rs index 5f64a3f3..e61ed2d4 100644 --- a/coordinator/src/handlers/block_streams.rs +++ b/coordinator/src/handlers/block_streams.rs @@ -19,8 +19,10 @@ use crate::utils::exponential_retry; #[cfg(not(test))] pub use BlockStreamsHandlerImpl as BlockStreamsHandler; #[cfg(test)] -pub use MockBlockStreamsHandlerImpl as BlockStreamsHandler; +pub use BlockStreamsHandlerImpl as BlockStreamsHandler; +//pub use MockBlockStreamsHandlerImpl as BlockStreamsHandler; +#[derive(Clone)] pub struct BlockStreamsHandlerImpl { client: BlockStreamerClient, redis_client: RedisClient, diff --git a/coordinator/src/handlers/data_layer.rs b/coordinator/src/handlers/data_layer.rs index 234f6beb..2a86df50 100644 --- a/coordinator/src/handlers/data_layer.rs +++ b/coordinator/src/handlers/data_layer.rs @@ -15,10 +15,11 @@ use crate::indexer_config::IndexerConfig; #[cfg(not(test))] pub use DataLayerHandlerImpl as DataLayerHandler; #[cfg(test)] -pub use MockDataLayerHandlerImpl as DataLayerHandler; +pub use DataLayerHandlerImpl as DataLayerHandler; type TaskId = String; +#[derive(Clone)] pub struct DataLayerHandlerImpl { client: DataLayerClient, } diff --git a/coordinator/src/handlers/executors.rs b/coordinator/src/handlers/executors.rs index dc00a0bd..f9b06d3d 100644 --- a/coordinator/src/handlers/executors.rs +++ b/coordinator/src/handlers/executors.rs @@ -15,8 +15,9 @@ use crate::utils::exponential_retry; #[cfg(not(test))] pub use ExecutorsHandlerImpl as ExecutorsHandler; #[cfg(test)] -pub use MockExecutorsHandlerImpl as ExecutorsHandler; +pub use ExecutorsHandlerImpl as ExecutorsHandler; +#[derive(Clone)] pub struct ExecutorsHandlerImpl { client: RunnerClient, } diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index 8f694002..65b5053b 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -31,8 +31,7 @@ pub struct LifecycleManager<'a> { } impl<'a> LifecycleManager<'a> { - #[allow(clippy::too_many_arguments)] - fn new( + pub fn new( initial_config: IndexerConfig, block_streams_handler: &'a BlockStreamsHandler, executors_handler: &'a ExecutorsHandler, diff --git a/coordinator/src/main.rs b/coordinator/src/main.rs index 61c29da1..cf25ee61 100644 --- a/coordinator/src/main.rs +++ b/coordinator/src/main.rs @@ -7,16 +7,19 @@ // - [ ] Introduce typestate? // - [ ] tests +use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; use near_primitives::types::AccountId; +use tokio::task::JoinHandle; use tracing_subscriber::prelude::*; use crate::handlers::block_streams::BlockStreamsHandler; use crate::handlers::data_layer::DataLayerHandler; use crate::handlers::executors::ExecutorsHandler; use crate::indexer_state::IndexerStateManager; +use crate::lifecycle::LifecycleManager; use crate::redis::RedisClient; use crate::registry::Registry; use crate::synchroniser::Synchroniser; @@ -31,7 +34,7 @@ mod server; mod synchroniser; mod utils; -const CONTROL_LOOP_THROTTLE_SECONDS: Duration = Duration::from_secs(1); +const LOOP_THROTTLE_SECONDS: Duration = Duration::from_secs(1); async fn sleep(duration: Duration) -> anyhow::Result<()> { tokio::time::sleep(duration).await; @@ -73,14 +76,6 @@ async fn main() -> anyhow::Result<()> { let executors_handler = ExecutorsHandler::connect(&runner_url)?; let data_layer_handler = DataLayerHandler::connect(&runner_url)?; let indexer_state_manager = Arc::new(IndexerStateManager::new(redis_client.clone())); - let synchroniser = Synchroniser::new( - &block_streams_handler, - &executors_handler, - &data_layer_handler, - ®istry, - &indexer_state_manager, - &redis_client, - ); tokio::spawn({ let indexer_state_manager = indexer_state_manager.clone(); @@ -88,7 +83,44 @@ async fn main() -> anyhow::Result<()> { async move { server::init(grpc_port, indexer_state_manager, registry).await } }); + // handle removal + let mut lifecycle_tasks = HashMap::>::new(); + loop { - tokio::try_join!(synchroniser.sync(), sleep(CONTROL_LOOP_THROTTLE_SECONDS))?; + let indexer_registry = registry.fetch().await?; + + for config in indexer_registry.iter() { + if lifecycle_tasks.contains_key(&config.get_full_name()) { + continue; + } + + let handle = tokio::spawn({ + let indexer_state_manager = indexer_state_manager.clone(); + let config = config.clone(); + let registry = registry.clone(); + let redis_client = redis_client.clone(); + let block_streams_handler = block_streams_handler.clone(); + let data_layer_handler = data_layer_handler.clone(); + let executors_handler = executors_handler.clone(); + + async move { + let lifecycle_manager = LifecycleManager::new( + config, + &block_streams_handler, + &executors_handler, + &data_layer_handler, + ®istry, + &indexer_state_manager, + &redis_client, + ); + + lifecycle_manager.run().await + } + }); + + lifecycle_tasks.insert(config.get_full_name(), handle); + } + + sleep(LOOP_THROTTLE_SECONDS).await?; } } From b1a494418b4fff5c77c5c0f36a4b566db9517506 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Mon, 15 Jul 2024 16:53:48 +1200 Subject: [PATCH 23/40] feat: Remove finished lifecycles --- coordinator/src/main.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/coordinator/src/main.rs b/coordinator/src/main.rs index cf25ee61..f9adec57 100644 --- a/coordinator/src/main.rs +++ b/coordinator/src/main.rs @@ -83,7 +83,6 @@ async fn main() -> anyhow::Result<()> { async move { server::init(grpc_port, indexer_state_manager, registry).await } }); - // handle removal let mut lifecycle_tasks = HashMap::>::new(); loop { @@ -121,6 +120,17 @@ async fn main() -> anyhow::Result<()> { lifecycle_tasks.insert(config.get_full_name(), handle); } + let finished_tasks: Vec = lifecycle_tasks + .iter() + .filter_map(|(name, task)| task.is_finished().then_some(name.clone())) + .collect(); + + for indexer_name in finished_tasks { + tracing::info!(indexer_name, "Lifecycle has finished, removing..."); + + lifecycle_tasks.remove(&indexer_name); + } + sleep(LOOP_THROTTLE_SECONDS).await?; } } From 1ef544ab5834e39c41f97ee6fc787927853a07fe Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Tue, 16 Jul 2024 13:38:00 +1200 Subject: [PATCH 24/40] chore: Remove notes --- coordinator/src/main.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/coordinator/src/main.rs b/coordinator/src/main.rs index f9adec57..1f8332c4 100644 --- a/coordinator/src/main.rs +++ b/coordinator/src/main.rs @@ -1,12 +1,3 @@ -// TODO -// - [ ] Ensure all states are appropriate, i.e. do we need both Deprovisioning and Deleting? -// - [ ] Fully implement state handling, i.e. block stream/executor synchronisation -// - [ ] Spawn lifecycle tasks from `main()` -// - [ ] Extract client from handlers to make mocking/testing easier? is handler the best place for -// those higher level methods -// - [ ] Introduce typestate? -// - [ ] tests - use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; @@ -22,7 +13,6 @@ use crate::indexer_state::IndexerStateManager; use crate::lifecycle::LifecycleManager; use crate::redis::RedisClient; use crate::registry::Registry; -use crate::synchroniser::Synchroniser; mod handlers; mod indexer_config; From 40c471e106ecb9f9de7a08210e7be8c7eed85f53 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Tue, 16 Jul 2024 13:41:05 +1200 Subject: [PATCH 25/40] refactor: Remove mocks to enable cloning --- coordinator/src/handlers/block_streams.rs | 10 ++-------- coordinator/src/handlers/data_layer.rs | 10 ++-------- coordinator/src/handlers/executors.rs | 10 ++-------- 3 files changed, 6 insertions(+), 24 deletions(-) diff --git a/coordinator/src/handlers/block_streams.rs b/coordinator/src/handlers/block_streams.rs index e61ed2d4..2a00efa0 100644 --- a/coordinator/src/handlers/block_streams.rs +++ b/coordinator/src/handlers/block_streams.rs @@ -16,20 +16,14 @@ use crate::indexer_config::IndexerConfig; use crate::redis::{KeyProvider, RedisClient}; use crate::utils::exponential_retry; -#[cfg(not(test))] -pub use BlockStreamsHandlerImpl as BlockStreamsHandler; -#[cfg(test)] -pub use BlockStreamsHandlerImpl as BlockStreamsHandler; -//pub use MockBlockStreamsHandlerImpl as BlockStreamsHandler; - #[derive(Clone)] -pub struct BlockStreamsHandlerImpl { +pub struct BlockStreamsHandler { client: BlockStreamerClient, redis_client: RedisClient, } #[cfg_attr(test, mockall::automock)] -impl BlockStreamsHandlerImpl { +impl BlockStreamsHandler { pub fn connect(block_streamer_url: &str, redis_client: RedisClient) -> anyhow::Result { let channel = Channel::from_shared(block_streamer_url.to_string()) .context("Block Streamer URL is invalid")? diff --git a/coordinator/src/handlers/data_layer.rs b/coordinator/src/handlers/data_layer.rs index 2a86df50..e7fcf780 100644 --- a/coordinator/src/handlers/data_layer.rs +++ b/coordinator/src/handlers/data_layer.rs @@ -12,20 +12,14 @@ use tonic::{Request, Status}; use crate::indexer_config::IndexerConfig; -#[cfg(not(test))] -pub use DataLayerHandlerImpl as DataLayerHandler; -#[cfg(test)] -pub use DataLayerHandlerImpl as DataLayerHandler; - type TaskId = String; #[derive(Clone)] -pub struct DataLayerHandlerImpl { +pub struct DataLayerHandler { client: DataLayerClient, } -#[cfg_attr(test, mockall::automock)] -impl DataLayerHandlerImpl { +impl DataLayerHandler { pub fn connect(runner_url: &str) -> anyhow::Result { let channel = Channel::from_shared(runner_url.to_string()) .context("Runner URL is invalid")? diff --git a/coordinator/src/handlers/executors.rs b/coordinator/src/handlers/executors.rs index f9b06d3d..1068ee76 100644 --- a/coordinator/src/handlers/executors.rs +++ b/coordinator/src/handlers/executors.rs @@ -12,18 +12,12 @@ use crate::indexer_config::IndexerConfig; use crate::redis::KeyProvider; use crate::utils::exponential_retry; -#[cfg(not(test))] -pub use ExecutorsHandlerImpl as ExecutorsHandler; -#[cfg(test)] -pub use ExecutorsHandlerImpl as ExecutorsHandler; - #[derive(Clone)] -pub struct ExecutorsHandlerImpl { +pub struct ExecutorsHandler { client: RunnerClient, } -#[cfg_attr(test, mockall::automock)] -impl ExecutorsHandlerImpl { +impl ExecutorsHandler { pub fn connect(runner_url: &str) -> anyhow::Result { let channel = Channel::from_shared(runner_url.to_string()) .context("Runner URL is invalid")? From af3dc45c8f1aca7111ae3b6c85878a4793d36f01 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Tue, 16 Jul 2024 14:12:39 +1200 Subject: [PATCH 26/40] feat: Get block stream by ID --- block-streamer/proto/block_streamer.proto | 9 ++ .../src/server/block_streamer_service.rs | 88 ++++++++++++++++++- 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/block-streamer/proto/block_streamer.proto b/block-streamer/proto/block_streamer.proto index 5d1e0517..015ee264 100644 --- a/block-streamer/proto/block_streamer.proto +++ b/block-streamer/proto/block_streamer.proto @@ -12,6 +12,15 @@ service BlockStreamer { // Lists all current BlockStream processes rpc ListStreams (ListStreamsRequest) returns (ListStreamsResponse); + + // Get info for an existing BlockStream process + rpc GetStream (GetStreamRequest) returns (StreamInfo); +} + +// Request message for getting a BlockStream +message GetStreamRequest { + // ID or handle of the BlockStream + string stream_id = 1; } // Request message for starting a BlockStream diff --git a/block-streamer/src/server/block_streamer_service.rs b/block-streamer/src/server/block_streamer_service.rs index 94ccb22a..1745a351 100644 --- a/block-streamer/src/server/block_streamer_service.rs +++ b/block-streamer/src/server/block_streamer_service.rs @@ -59,6 +59,33 @@ impl BlockStreamerService { #[tonic::async_trait] impl blockstreamer::block_streamer_server::BlockStreamer for BlockStreamerService { + #[tracing::instrument(skip(self))] + async fn get_stream( + &self, + request: Request, + ) -> Result, Status> { + let request = request.into_inner(); + + let lock = self.block_streams.lock().map_err(|err| { + tracing::error!(?err, "Failed to acquire `block_streams` lock"); + tonic::Status::internal("Failed to acquire `block_streams` lock") + })?; + + if let Some(stream) = lock.get(&request.stream_id) { + Ok(Response::new(StreamInfo { + stream_id: request.stream_id, + account_id: stream.indexer_config.account_id.to_string(), + function_name: stream.indexer_config.function_name.to_string(), + version: stream.version, + })) + } else { + Err(Status::not_found(format!( + "Block Stream with ID {} does not exist", + request.stream_id + ))) + } + } + #[tracing::instrument(skip(self))] async fn start_stream( &self, @@ -171,7 +198,11 @@ impl blockstreamer::block_streamer_server::BlockStreamer for BlockStreamerServic &self, _request: Request, ) -> Result, Status> { - let lock = self.block_streams.lock().unwrap(); + let lock = self.block_streams.lock().map_err(|err| { + tracing::error!(?err, "Failed to acquire `block_streams` lock"); + tonic::Status::internal("Failed to acquire `block_streams` lock") + })?; + let block_streams: Vec = lock .values() .map(|block_stream| StreamInfo { @@ -234,6 +265,61 @@ mod tests { ) } + #[tokio::test] + async fn get_existing_block_stream() { + let block_streamer_service = create_block_streamer_service(); + + { + let lock = block_streamer_service.get_block_streams_lock().unwrap(); + assert_eq!(lock.len(), 0); + } + + block_streamer_service + .start_stream(Request::new(StartStreamRequest { + start_block_height: 0, + account_id: "morgs.near".to_string(), + function_name: "test".to_string(), + version: 0, + redis_stream: "stream".to_string(), + rule: Some(start_stream_request::Rule::ActionAnyRule(ActionAnyRule { + affected_account_id: "queryapi.dataplatform.near".to_string(), + status: 1, + })), + })) + .await + .unwrap(); + + let stream = block_streamer_service + .get_stream(Request::new(GetStreamRequest { + stream_id: "16210176318434468568".to_string(), + })) + .await + .unwrap(); + + assert_eq!( + stream.into_inner().stream_id, + "16210176318434468568".to_string() + ); + } + + #[tokio::test] + async fn get_non_existant_block_stream() { + let block_streamer_service = create_block_streamer_service(); + + { + let lock = block_streamer_service.get_block_streams_lock().unwrap(); + assert_eq!(lock.len(), 0); + } + + let stream_response = block_streamer_service + .get_stream(Request::new(GetStreamRequest { + stream_id: "16210176318434468568".to_string(), + })) + .await; + + assert_eq!(stream_response.err().unwrap().code(), tonic::Code::NotFound); + } + #[tokio::test] async fn starts_a_block_stream() { let block_streamer_service = create_block_streamer_service(); From 2760d13b4c29b7748921d564b284b5e032865409 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Tue, 16 Jul 2024 15:21:15 +1200 Subject: [PATCH 27/40] feat: Get Executor by id --- runner-client/proto/runner.proto | 8 +++ runner/protos/runner.proto | 8 +++ .../services/runner/runner-service.test.ts | 53 +++++++++++++++++++ .../server/services/runner/runner-service.ts | 21 ++++++++ 4 files changed, 90 insertions(+) diff --git a/runner-client/proto/runner.proto b/runner-client/proto/runner.proto index 82c457f7..869f3538 100644 --- a/runner-client/proto/runner.proto +++ b/runner-client/proto/runner.proto @@ -10,6 +10,14 @@ service Runner { // Lists all Runner executor rpc ListExecutors (ListExecutorsRequest) returns (ListExecutorsResponse); + + // Get Executor info + rpc GetExecutor (GetExecutorRequest) returns (ExecutorInfo); +} + +// Get Executor request +message GetExecutorRequest { + string executor_id = 1; } // Start Executor Request diff --git a/runner/protos/runner.proto b/runner/protos/runner.proto index 82c457f7..869f3538 100644 --- a/runner/protos/runner.proto +++ b/runner/protos/runner.proto @@ -10,6 +10,14 @@ service Runner { // Lists all Runner executor rpc ListExecutors (ListExecutorsRequest) returns (ListExecutorsResponse); + + // Get Executor info + rpc GetExecutor (GetExecutorRequest) returns (ExecutorInfo); +} + +// Get Executor request +message GetExecutorRequest { + string executor_id = 1; } // Start Executor Request diff --git a/runner/src/server/services/runner/runner-service.test.ts b/runner/src/server/services/runner/runner-service.test.ts index bfbf9ca8..e6df6fe9 100644 --- a/runner/src/server/services/runner/runner-service.test.ts +++ b/runner/src/server/services/runner/runner-service.test.ts @@ -32,6 +32,59 @@ describe('Runner gRPC Service', () => { genericIndexerConfig = new IndexerConfig(BASIC_REDIS_STREAM, BASIC_ACCOUNT_ID, BASIC_FUNCTION_NAME, BASIC_VERSION, BASIC_CODE, BASIC_SCHEMA, LogLevel.INFO); }); + it('get non existant executor', async () => { + const streamHandlerType = jest.fn().mockImplementation((indexerConfig) => { + return { + indexerConfig, + executorContext: BASIC_EXECUTOR_CONTEXT + }; + }); + const service = getRunnerService(new Map(), streamHandlerType); + + await new Promise((resolve) => { + service.GetExecutor({ request: { executorId: BASIC_EXECUTOR_ID } } as any, (err) => { + expect(err).toEqual({ + code: grpc.status.NOT_FOUND, + message: `Executor with ID ${BASIC_EXECUTOR_ID} does not exist` + }); + resolve(null); + }); + }); + }); + + it('gets an existing executor', async () => { + const streamHandlerType = jest.fn().mockImplementation((indexerConfig) => { + return { + indexerConfig, + executorContext: BASIC_EXECUTOR_CONTEXT + }; + }); + const service = getRunnerService(new Map(), streamHandlerType); + const request = generateRequest(BASIC_REDIS_STREAM + '-A', BASIC_ACCOUNT_ID, BASIC_FUNCTION_NAME, BASIC_CODE, BASIC_SCHEMA, BASIC_VERSION); + + await new Promise((resolve, reject) => { + service.StartExecutor(request, (err) => { + if (err) reject(err); + resolve(null); + }); + }); + + await new Promise((resolve, reject) => { + service.GetExecutor({ request: { executorId: BASIC_EXECUTOR_ID } } as any, (err, response) => { + if (err) reject(err); + + expect(response).toEqual({ + executorId: BASIC_EXECUTOR_ID, + accountId: genericIndexerConfig.accountId, + functionName: genericIndexerConfig.functionName, + status: IndexerStatus.RUNNING, + version: '1' + }); + resolve(null); + }); + }); + }); + it('starts a executor with correct settings', () => { const service = getRunnerService(new Map(), genericStreamHandlerType); const mockCallback = jest.fn() as unknown as any; diff --git a/runner/src/server/services/runner/runner-service.ts b/runner/src/server/services/runner/runner-service.ts index 8584d185..99b4d20b 100644 --- a/runner/src/server/services/runner/runner-service.ts +++ b/runner/src/server/services/runner/runner-service.ts @@ -8,6 +8,7 @@ import parentLogger from '../../../logger'; import { type RunnerHandlers } from '../../../generated/runner/Runner'; import { type StartExecutorResponse__Output, type StartExecutorResponse } from '../../../generated/runner/StartExecutorResponse'; import { type StartExecutorRequest__Output } from '../../../generated/runner/StartExecutorRequest'; +import { type GetExecutorRequest__Output } from '../../../generated/runner/GetExecutorRequest'; import { type StopExecutorRequest__Output } from '../../../generated/runner/StopExecutorRequest'; import { type StopExecutorResponse__Output, type StopExecutorResponse } from '../../../generated/runner/StopExecutorResponse'; import { type ListExecutorsRequest__Output } from '../../../generated/runner/ListExecutorsRequest'; @@ -19,6 +20,26 @@ export function getRunnerService ( StreamHandlerType: typeof StreamHandler = StreamHandler ): RunnerHandlers { const RunnerService: RunnerHandlers = { + GetExecutor (call: ServerUnaryCall, callback: sendUnaryData): void { + const executorId = call.request.executorId; + const executor = executors.get(executorId); + + if (executor) { + callback(null, { + executorId, + accountId: executor.indexerConfig.accountId, + functionName: executor.indexerConfig.functionName, + version: executor.indexerConfig.version.toString(), + status: executor.executorContext.status + }); + } else { + const notFoundError = { + code: grpc.status.NOT_FOUND, + message: `Executor with ID ${executorId} does not exist` + }; + callback(notFoundError, null); + } + }, StartExecutor (call: ServerUnaryCall, callback: sendUnaryData): void { // Validate request const validationResult = validateStartExecutorRequest(call.request); From bab4878b79ce05b498b824b596adef0f247de442 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Tue, 16 Jul 2024 16:38:06 +1200 Subject: [PATCH 28/40] refactor: Get stream by account id/function name --- block-streamer/proto/block_streamer.proto | 6 ++++-- .../src/server/block_streamer_service.rs | 19 +++++++++++++------ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/block-streamer/proto/block_streamer.proto b/block-streamer/proto/block_streamer.proto index 015ee264..92d3a2c9 100644 --- a/block-streamer/proto/block_streamer.proto +++ b/block-streamer/proto/block_streamer.proto @@ -19,8 +19,10 @@ service BlockStreamer { // Request message for getting a BlockStream message GetStreamRequest { - // ID or handle of the BlockStream - string stream_id = 1; + // Account ID which the indexer is defined under + string account_id = 1; + // Name of the indexer + string function_name = 2; } // Request message for starting a BlockStream diff --git a/block-streamer/src/server/block_streamer_service.rs b/block-streamer/src/server/block_streamer_service.rs index 1745a351..88d426b9 100644 --- a/block-streamer/src/server/block_streamer_service.rs +++ b/block-streamer/src/server/block_streamer_service.rs @@ -71,17 +71,22 @@ impl blockstreamer::block_streamer_server::BlockStreamer for BlockStreamerServic tonic::Status::internal("Failed to acquire `block_streams` lock") })?; - if let Some(stream) = lock.get(&request.stream_id) { + let stream_entry = lock.iter().find(|(_, block_stream)| { + block_stream.indexer_config.account_id == request.account_id + && block_stream.indexer_config.function_name == request.function_name + }); + + if let Some((stream_id, stream)) = stream_entry { Ok(Response::new(StreamInfo { - stream_id: request.stream_id, + stream_id: stream_id.to_string(), account_id: stream.indexer_config.account_id.to_string(), function_name: stream.indexer_config.function_name.to_string(), version: stream.version, })) } else { Err(Status::not_found(format!( - "Block Stream with ID {} does not exist", - request.stream_id + "Block Stream for account {} and name {} does not exist", + request.account_id, request.function_name ))) } } @@ -291,7 +296,8 @@ mod tests { let stream = block_streamer_service .get_stream(Request::new(GetStreamRequest { - stream_id: "16210176318434468568".to_string(), + account_id: "morgs.near".to_string(), + function_name: "test".to_string(), })) .await .unwrap(); @@ -313,7 +319,8 @@ mod tests { let stream_response = block_streamer_service .get_stream(Request::new(GetStreamRequest { - stream_id: "16210176318434468568".to_string(), + account_id: "morgs.near".to_string(), + function_name: "test".to_string(), })) .await; From 0421fb0244b4d0cfee59d57a7cf6308c4f0b68f2 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Tue, 16 Jul 2024 16:46:02 +1200 Subject: [PATCH 29/40] refactor: Get executor by account/name --- runner-client/proto/runner.proto | 3 ++- runner/protos/runner.proto | 3 ++- .../src/server/services/runner/runner-service.test.ts | 6 +++--- runner/src/server/services/runner/runner-service.ts | 10 ++++++---- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/runner-client/proto/runner.proto b/runner-client/proto/runner.proto index 869f3538..51045f8c 100644 --- a/runner-client/proto/runner.proto +++ b/runner-client/proto/runner.proto @@ -17,7 +17,8 @@ service Runner { // Get Executor request message GetExecutorRequest { - string executor_id = 1; + string account_id = 1; + string function_name = 2; } // Start Executor Request diff --git a/runner/protos/runner.proto b/runner/protos/runner.proto index 869f3538..51045f8c 100644 --- a/runner/protos/runner.proto +++ b/runner/protos/runner.proto @@ -17,7 +17,8 @@ service Runner { // Get Executor request message GetExecutorRequest { - string executor_id = 1; + string account_id = 1; + string function_name = 2; } // Start Executor Request diff --git a/runner/src/server/services/runner/runner-service.test.ts b/runner/src/server/services/runner/runner-service.test.ts index e6df6fe9..f7a9c6dc 100644 --- a/runner/src/server/services/runner/runner-service.test.ts +++ b/runner/src/server/services/runner/runner-service.test.ts @@ -42,10 +42,10 @@ describe('Runner gRPC Service', () => { const service = getRunnerService(new Map(), streamHandlerType); await new Promise((resolve) => { - service.GetExecutor({ request: { executorId: BASIC_EXECUTOR_ID } } as any, (err) => { + service.GetExecutor({ request: { accountId: BASIC_ACCOUNT_ID, functionName: BASIC_FUNCTION_NAME } } as any, (err) => { expect(err).toEqual({ code: grpc.status.NOT_FOUND, - message: `Executor with ID ${BASIC_EXECUTOR_ID} does not exist` + message: `Executor for account ${BASIC_ACCOUNT_ID} and name ${BASIC_FUNCTION_NAME} does not exist` }); resolve(null); }); @@ -70,7 +70,7 @@ describe('Runner gRPC Service', () => { }); await new Promise((resolve, reject) => { - service.GetExecutor({ request: { executorId: BASIC_EXECUTOR_ID } } as any, (err, response) => { + service.GetExecutor({ request: { accountId: BASIC_ACCOUNT_ID, functionName: BASIC_FUNCTION_NAME } } as any, (err, response) => { if (err) reject(err); expect(response).toEqual({ diff --git a/runner/src/server/services/runner/runner-service.ts b/runner/src/server/services/runner/runner-service.ts index 99b4d20b..0d135574 100644 --- a/runner/src/server/services/runner/runner-service.ts +++ b/runner/src/server/services/runner/runner-service.ts @@ -21,10 +21,12 @@ export function getRunnerService ( ): RunnerHandlers { const RunnerService: RunnerHandlers = { GetExecutor (call: ServerUnaryCall, callback: sendUnaryData): void { - const executorId = call.request.executorId; - const executor = executors.get(executorId); + const { accountId, functionName } = call.request; + + const executorEntry = Array.from(executors.entries()).find(([_id, executor]) => executor.indexerConfig.accountId === accountId && executor.indexerConfig.functionName === functionName); - if (executor) { + if (executorEntry) { + const [executorId, executor] = executorEntry; callback(null, { executorId, accountId: executor.indexerConfig.accountId, @@ -35,7 +37,7 @@ export function getRunnerService ( } else { const notFoundError = { code: grpc.status.NOT_FOUND, - message: `Executor with ID ${executorId} does not exist` + message: `Executor for account ${accountId} and name ${functionName} does not exist` }; callback(notFoundError, null); } From 9ea8fc0e838405823b1d54d1269f5dea3c848fda Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Tue, 16 Jul 2024 16:56:32 +1200 Subject: [PATCH 30/40] fix: Complete stream/executor get methods --- coordinator/src/handlers/block_streams.rs | 19 +++++++++++++------ coordinator/src/handlers/executors.rs | 23 +++++++++++++++++------ 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/coordinator/src/handlers/block_streams.rs b/coordinator/src/handlers/block_streams.rs index 2a00efa0..e01f5fde 100644 --- a/coordinator/src/handlers/block_streams.rs +++ b/coordinator/src/handlers/block_streams.rs @@ -5,8 +5,8 @@ pub use block_streamer::StreamInfo; use anyhow::Context; use block_streamer::block_streamer_client::BlockStreamerClient; use block_streamer::{ - start_stream_request::Rule, ActionAnyRule, ActionFunctionCallRule, ListStreamsRequest, - StartStreamRequest, Status, StopStreamRequest, + start_stream_request::Rule, ActionAnyRule, ActionFunctionCallRule, GetStreamRequest, + ListStreamsRequest, StartStreamRequest, Status, StopStreamRequest, }; use registry_types::StartBlock; use tonic::transport::channel::Channel; @@ -81,12 +81,19 @@ impl BlockStreamsHandler { } pub async fn get(&self, indexer_config: &IndexerConfig) -> anyhow::Result> { - Ok(Some(StreamInfo { - stream_id: "".to_string(), + let request = GetStreamRequest { account_id: indexer_config.account_id.to_string(), function_name: indexer_config.function_name.clone(), - version: indexer_config.get_registry_version(), - })) + }; + + match self.client.clone().get_stream(Request::new(request)).await { + Ok(response) => Ok(Some(response.into_inner())), + Err(status) if status.code() == tonic::Code::NotFound => Ok(None), + Err(err) => Err(err).context(format!( + "Failed to get stream: {}", + indexer_config.get_full_name() + )), + } } pub async fn start( diff --git a/coordinator/src/handlers/executors.rs b/coordinator/src/handlers/executors.rs index 1068ee76..d945acd0 100644 --- a/coordinator/src/handlers/executors.rs +++ b/coordinator/src/handlers/executors.rs @@ -4,7 +4,7 @@ pub use runner::ExecutorInfo; use anyhow::Context; use runner::runner_client::RunnerClient; -use runner::{ListExecutorsRequest, StartExecutorRequest, StopExecutorRequest}; +use runner::{GetExecutorRequest, ListExecutorsRequest, StartExecutorRequest, StopExecutorRequest}; use tonic::transport::channel::Channel; use tonic::Request; @@ -46,13 +46,24 @@ impl ExecutorsHandler { } pub async fn get(&self, config: &IndexerConfig) -> anyhow::Result> { - Ok(Some(ExecutorInfo { - executor_id: "".into(), + let request = GetExecutorRequest { account_id: config.account_id.to_string(), function_name: config.function_name.clone(), - version: 0, - status: "".to_string(), - })) + }; + + match self + .client + .clone() + .get_executor(Request::new(request)) + .await + { + Ok(response) => Ok(Some(response.into_inner())), + Err(status) if status.code() == tonic::Code::NotFound => Ok(None), + Err(err) => Err(err).context(format!( + "Failed to get executor: {}", + config.get_full_name() + )), + } } pub async fn start(&self, indexer_config: &IndexerConfig) -> anyhow::Result<()> { From e3ea8e64c47421a7708968ab26f9f23abb4ca3eb Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Tue, 16 Jul 2024 16:56:52 +1200 Subject: [PATCH 31/40] feat: Migrate state --- coordinator/src/indexer_state.rs | 101 ++++++++++++++----------------- coordinator/src/main.rs | 9 ++- 2 files changed, 53 insertions(+), 57 deletions(-) diff --git a/coordinator/src/indexer_state.rs b/coordinator/src/indexer_state.rs index cf745606..b046dfc3 100644 --- a/coordinator/src/indexer_state.rs +++ b/coordinator/src/indexer_state.rs @@ -17,14 +17,23 @@ pub enum ProvisionedState { } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] -pub struct IndexerState { +pub struct OldIndexerState { // store previous config to make comparison easier? pub account_id: AccountId, pub function_name: String, pub block_stream_synced_at: Option, pub enabled: bool, pub provisioned_state: ProvisionedState, - pub lifecycle: LifecycleState, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] +pub struct IndexerState { + // store previous config to make comparison easier? + pub account_id: AccountId, + pub function_name: String, + pub block_stream_synced_at: Option, + pub enabled: bool, + pub lifecycle_state: LifecycleState, } impl KeyProvider for IndexerState { @@ -52,14 +61,46 @@ impl IndexerStateManagerImpl { Self { redis_client } } + pub async fn migrate(&self) -> anyhow::Result<()> { + let raw_states = self.redis_client.list_indexer_states().await?; + + for raw_state in raw_states { + if let Ok(state) = serde_json::from_str::(&raw_state) { + tracing::info!( + "{}/{} already migrated, skipping", + state.account_id, + state.function_name + ); + continue; + } + + tracing::info!("Migrating {}", raw_state); + + let old_state: OldIndexerState = serde_json::from_str(&raw_state)?; + + let state = IndexerState { + account_id: old_state.account_id, + function_name: old_state.function_name, + block_stream_synced_at: old_state.block_stream_synced_at, + enabled: old_state.enabled, + lifecycle_state: LifecycleState::Running, + }; + + self.redis_client + .set(state.get_state_key(), serde_json::to_string(&state)?) + .await?; + } + + Ok(()) + } + fn get_default_state(&self, indexer_config: &IndexerConfig) -> IndexerState { IndexerState { account_id: indexer_config.account_id.clone(), function_name: indexer_config.function_name.clone(), block_stream_synced_at: None, enabled: true, - provisioned_state: ProvisionedState::Unprovisioned, - lifecycle: LifecycleState::default(), + lifecycle_state: LifecycleState::default(), } } @@ -105,58 +146,6 @@ impl IndexerStateManagerImpl { Ok(()) } - pub async fn set_deprovisioning( - &self, - indexer_state: &IndexerState, - task_id: String, - ) -> anyhow::Result<()> { - let mut state = indexer_state.clone(); - - state.provisioned_state = ProvisionedState::Deprovisioning { task_id }; - - self.redis_client - .set(state.get_state_key(), serde_json::to_string(&state)?) - .await?; - - Ok(()) - } - - pub async fn set_provisioning( - &self, - indexer_config: &IndexerConfig, - task_id: String, - ) -> anyhow::Result<()> { - let mut indexer_state = self.get_state(indexer_config).await?; - - indexer_state.provisioned_state = ProvisionedState::Provisioning { task_id }; - - self.set_state(indexer_config, indexer_state).await?; - - Ok(()) - } - - pub async fn set_provisioned(&self, indexer_config: &IndexerConfig) -> anyhow::Result<()> { - let mut indexer_state = self.get_state(indexer_config).await?; - - indexer_state.provisioned_state = ProvisionedState::Provisioned; - - self.set_state(indexer_config, indexer_state).await?; - - Ok(()) - } - pub async fn set_provisioning_failure( - &self, - indexer_config: &IndexerConfig, - ) -> anyhow::Result<()> { - let mut indexer_state = self.get_state(indexer_config).await?; - - indexer_state.provisioned_state = ProvisionedState::Failed; - - self.set_state(indexer_config, indexer_state).await?; - - Ok(()) - } - pub async fn set_enabled( &self, indexer_config: &IndexerConfig, diff --git a/coordinator/src/main.rs b/coordinator/src/main.rs index 1f8332c4..fef71dd3 100644 --- a/coordinator/src/main.rs +++ b/coordinator/src/main.rs @@ -21,7 +21,6 @@ mod lifecycle; mod redis; mod registry; mod server; -mod synchroniser; mod utils; const LOOP_THROTTLE_SECONDS: Duration = Duration::from_secs(1); @@ -73,6 +72,8 @@ async fn main() -> anyhow::Result<()> { async move { server::init(grpc_port, indexer_state_manager, registry).await } }); + indexer_state_manager.migrate().await?; + let mut lifecycle_tasks = HashMap::>::new(); loop { @@ -83,6 +84,12 @@ async fn main() -> anyhow::Result<()> { continue; } + tracing::info!( + account_id = config.account_id.as_str(), + function_name = config.function_name.as_str(), + "Starting lifecycle manager" + ); + let handle = tokio::spawn({ let indexer_state_manager = indexer_state_manager.clone(); let config = config.clone(); From 219daae4b4f102c6427b62a92d3dce8ec5ae5466 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Tue, 16 Jul 2024 16:57:02 +1200 Subject: [PATCH 32/40] chore: Add logging --- coordinator/src/lifecycle.rs | 71 +++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index 65b5053b..bf5feca6 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -1,3 +1,5 @@ +use tracing::{info, warn}; + use crate::handlers::block_streams::BlockStreamsHandler; use crate::handlers::data_layer::DataLayerHandler; use crate::handlers::executors::ExecutorsHandler; @@ -51,11 +53,14 @@ impl<'a> LifecycleManager<'a> { } } + #[tracing::instrument(name = "initializing", skip_all)] async fn handle_initializing( &self, config: &IndexerConfig, _state: &IndexerState, ) -> LifecycleState { + info!("Initializing"); + if self .data_layer_handler .ensure_provisioned(config) @@ -68,51 +73,47 @@ impl<'a> LifecycleManager<'a> { LifecycleState::Running } + #[tracing::instrument(name = "running", skip_all)] async fn handle_running(&self, config: &IndexerConfig, state: &IndexerState) -> LifecycleState { if !state.enabled { return LifecycleState::Stopping; } - if self + if let Err(error) = self .block_streams_handler .synchronise_block_stream(config, state.block_stream_synced_at) .await - .is_err() { + warn!(?error, "Failed to synchronise block stream"); + return LifecycleState::Repairing; } - if self - .executors_handler - .synchronise_executor(config) - .await - .is_err() - { + if let Err(error) = self.executors_handler.synchronise_executor(config).await { + warn!(?error, "Failed to synchronise executor"); + return LifecycleState::Repairing; } LifecycleState::Running } + #[tracing::instrument(name = "stopping", skip_all)] async fn handle_stopping(&self, config: &IndexerConfig) -> LifecycleState { - if self - .block_streams_handler - .stop_if_needed(config) - .await - .is_err() - { - // Retry + if let Err(error) = self.block_streams_handler.stop_if_needed(config).await { + warn!(?error, "Failed to stop block stream, retrying..."); return LifecycleState::Stopping; } - if self.executors_handler.stop_if_needed(config).await.is_err() { - // Retry + if let Err(error) = self.executors_handler.stop_if_needed(config).await { + warn!(?error, "Failed to stop executor, retrying..."); return LifecycleState::Stopping; } LifecycleState::Stopped } + #[tracing::instrument(name = "stopped", skip_all)] async fn handle_stopped(&self, state: &IndexerState) -> LifecycleState { // TODO Transistion to `Running` on config update @@ -123,15 +124,19 @@ impl<'a> LifecycleManager<'a> { LifecycleState::Stopped } + #[tracing::instrument(name = "repairing", skip_all)] async fn handle_repairing( &self, _config: &IndexerConfig, _state: &IndexerState, ) -> LifecycleState { + info!("Repairing"); + // TODO Add more robust error handling, for now just stop LifecycleState::Stopping } + #[tracing::instrument(name = "deleting", skip_all)] async fn handle_deleting(&self, state: &IndexerState) -> LifecycleState { if self.state_manager.delete_state(state).await.is_err() { // Retry @@ -160,6 +165,14 @@ impl<'a> LifecycleManager<'a> { LifecycleState::Deleted } + #[tracing::instrument( + name = "lifecycle_manager", + skip(self), + fields( + account_id = self.initial_config.account_id.as_str(), + function_name = self.initial_config.function_name.as_str() + ) + )] pub async fn run(&self) { loop { tokio::time::sleep(std::time::Duration::from_millis(LOOP_THROTTLE_MS)).await; @@ -173,16 +186,22 @@ impl<'a> LifecycleManager<'a> { .await { Ok(config) => config, - Err(_) => continue, + Err(error) => { + warn!(?error, "Failed to fetch config"); + continue; + } }; let mut state = match self.state_manager.get_state(&self.initial_config).await { Ok(state) => state, - Err(_) => continue, + Err(error) => { + warn!(?error, "Failed to get state"); + continue; + } }; let next_lifecycle_state = if let Some(config) = config.clone() { - match state.lifecycle { + match state.lifecycle_state { LifecycleState::Initializing => self.handle_initializing(&config, &state).await, LifecycleState::Running => self.handle_running(&config, &state).await, LifecycleState::Stopping => self.handle_stopping(&config).await, @@ -195,7 +214,15 @@ impl<'a> LifecycleManager<'a> { self.handle_deleting(&state).await }; - state.lifecycle = next_lifecycle_state; + if next_lifecycle_state != state.lifecycle_state { + info!( + current = ?state.lifecycle_state, + next = ?next_lifecycle_state, + "Transitioning lifecycle state" + ); + } + + state.lifecycle_state = next_lifecycle_state; loop { match self @@ -205,7 +232,7 @@ impl<'a> LifecycleManager<'a> { { Ok(_) => break, Err(e) => { - tracing::error!("Failed to set state: {:?}. Retrying...", e); + warn!("Failed to set state: {:?}. Retrying...", e); tokio::time::sleep(std::time::Duration::from_millis(1000)).await; } From 8137a46a8bbcdef3a93b8b67e867a050e3f9d1ed Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Tue, 16 Jul 2024 21:37:03 +1200 Subject: [PATCH 33/40] refactor: avoid using `config` as it may be `None` --- coordinator/src/handlers/block_streams.rs | 39 +++++++++++---- coordinator/src/handlers/data_layer.rs | 4 ++ coordinator/src/handlers/executors.rs | 29 +++++++---- coordinator/src/lifecycle.rs | 60 ++++++++++++++++------- coordinator/src/main.rs | 5 +- 5 files changed, 100 insertions(+), 37 deletions(-) diff --git a/coordinator/src/handlers/block_streams.rs b/coordinator/src/handlers/block_streams.rs index e01f5fde..e26da0d0 100644 --- a/coordinator/src/handlers/block_streams.rs +++ b/coordinator/src/handlers/block_streams.rs @@ -8,6 +8,7 @@ use block_streamer::{ start_stream_request::Rule, ActionAnyRule, ActionFunctionCallRule, GetStreamRequest, ListStreamsRequest, StartStreamRequest, Status, StopStreamRequest, }; +use near_primitives::types::AccountId; use registry_types::StartBlock; use tonic::transport::channel::Channel; use tonic::Request; @@ -80,18 +81,22 @@ impl BlockStreamsHandler { .into() } - pub async fn get(&self, indexer_config: &IndexerConfig) -> anyhow::Result> { + pub async fn get( + &self, + account_id: AccountId, + function_name: String, + ) -> anyhow::Result> { let request = GetStreamRequest { - account_id: indexer_config.account_id.to_string(), - function_name: indexer_config.function_name.clone(), + account_id: account_id.to_string(), + function_name: function_name.clone(), }; match self.client.clone().get_stream(Request::new(request)).await { Ok(response) => Ok(Some(response.into_inner())), Err(status) if status.code() == tonic::Code::NotFound => Ok(None), Err(err) => Err(err).context(format!( - "Failed to get stream: {}", - indexer_config.get_full_name() + "Failed to get stream for account {} and name {}", + account_id, function_name )), } } @@ -171,7 +176,11 @@ impl BlockStreamsHandler { StartBlock::Continue => self.get_continuation_block_height(config).await?, }; - tracing::info!(height, "Starting block stream"); + tracing::info!( + start_block = ?config.start_block, + height, + "Starting block stream" + ); self.start(height, config).await?; @@ -190,7 +199,11 @@ impl BlockStreamsHandler { } }; - tracing::info!(height, "Starting block stream"); + tracing::info!( + start_block = ?config.start_block, + height, + "Starting block stream" + ); self.start(height, config).await } @@ -227,7 +240,9 @@ impl BlockStreamsHandler { config: &IndexerConfig, previous_sync_version: Option, ) -> anyhow::Result<()> { - let block_stream = self.get(config).await?; + let block_stream = self + .get(config.account_id.clone(), config.function_name.clone()) + .await?; if let Some(block_stream) = block_stream { if block_stream.version == config.get_registry_version() { @@ -263,8 +278,12 @@ impl BlockStreamsHandler { Ok(()) } - pub async fn stop_if_needed(&self, config: &IndexerConfig) -> anyhow::Result<()> { - if let Some(block_stream) = self.get(config).await? { + pub async fn stop_if_needed( + &self, + account_id: AccountId, + function_name: String, + ) -> anyhow::Result<()> { + if let Some(block_stream) = self.get(account_id, function_name).await? { self.stop(block_stream.stream_id).await?; } diff --git a/coordinator/src/handlers/data_layer.rs b/coordinator/src/handlers/data_layer.rs index e7fcf780..3e2df54d 100644 --- a/coordinator/src/handlers/data_layer.rs +++ b/coordinator/src/handlers/data_layer.rs @@ -95,6 +95,8 @@ impl DataLayerHandler { } pub async fn ensure_provisioned(&self, indexer_config: &IndexerConfig) -> anyhow::Result<()> { + tracing::info!(account_id = ?indexer_config.account_id, function_name = ?indexer_config.function_name, "Provisioning data layer"); + let start_task_result = self.start_provisioning_task(indexer_config).await; if let Err(error) = start_task_result { @@ -138,6 +140,8 @@ impl DataLayerHandler { account_id: AccountId, function_name: String, ) -> anyhow::Result<()> { + tracing::info!(?account_id, ?function_name, "Deprovisioning data layer"); + let task_id = self .start_deprovisioning_task(account_id.clone(), function_name.clone()) .await?; diff --git a/coordinator/src/handlers/executors.rs b/coordinator/src/handlers/executors.rs index d945acd0..497ff8fc 100644 --- a/coordinator/src/handlers/executors.rs +++ b/coordinator/src/handlers/executors.rs @@ -1,5 +1,6 @@ #![cfg_attr(test, allow(dead_code))] +use near_primitives::types::AccountId; pub use runner::ExecutorInfo; use anyhow::Context; @@ -45,10 +46,14 @@ impl ExecutorsHandler { .await } - pub async fn get(&self, config: &IndexerConfig) -> anyhow::Result> { + pub async fn get( + &self, + account_id: AccountId, + function_name: String, + ) -> anyhow::Result> { let request = GetExecutorRequest { - account_id: config.account_id.to_string(), - function_name: config.function_name.clone(), + account_id: account_id.to_string(), + function_name: function_name.clone(), }; match self @@ -60,8 +65,8 @@ impl ExecutorsHandler { Ok(response) => Ok(Some(response.into_inner())), Err(status) if status.code() == tonic::Code::NotFound => Ok(None), Err(err) => Err(err).context(format!( - "Failed to get executor: {}", - config.get_full_name() + "Failed to get executor for account {} and name {}", + account_id, function_name )), } } @@ -115,7 +120,9 @@ impl ExecutorsHandler { } pub async fn synchronise_executor(&self, config: &IndexerConfig) -> anyhow::Result<()> { - let executor = self.get(config).await?; + let executor = self + .get(config.account_id.clone(), config.function_name.clone()) + .await?; if let Some(executor) = executor { if executor.version == config.get_registry_version() { @@ -126,7 +133,7 @@ impl ExecutorsHandler { account_id = config.account_id.as_str(), function_name = config.function_name, version = executor.version, - "Stopping executor" + "Stopping outdated executor" ); self.stop(executor.executor_id).await?; @@ -144,8 +151,12 @@ impl ExecutorsHandler { Ok(()) } - pub async fn stop_if_needed(&self, config: &IndexerConfig) -> anyhow::Result<()> { - if let Some(executor) = self.get(config).await? { + pub async fn stop_if_needed( + &self, + account_id: AccountId, + function_name: String, + ) -> anyhow::Result<()> { + if let Some(executor) = self.get(account_id, function_name).await? { self.stop(executor.executor_id).await?; } diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index bf5feca6..34b35d7f 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -8,7 +8,7 @@ use crate::indexer_state::{IndexerState, IndexerStateManager}; use crate::redis::{KeyProvider, RedisClient}; use crate::registry::Registry; -const LOOP_THROTTLE_MS: u64 = 500; +const LOOP_THROTTLE_MS: u64 = 1000; #[derive(Default, Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] pub enum LifecycleState { @@ -59,8 +59,6 @@ impl<'a> LifecycleManager<'a> { config: &IndexerConfig, _state: &IndexerState, ) -> LifecycleState { - info!("Initializing"); - if self .data_layer_handler .ensure_provisioned(config) @@ -84,15 +82,15 @@ impl<'a> LifecycleManager<'a> { .synchronise_block_stream(config, state.block_stream_synced_at) .await { - warn!(?error, "Failed to synchronise block stream"); + warn!(?error, "Failed to synchronise block stream, retrying..."); - return LifecycleState::Repairing; + return LifecycleState::Running; } if let Err(error) = self.executors_handler.synchronise_executor(config).await { - warn!(?error, "Failed to synchronise executor"); + warn!(?error, "Failed to synchronise executor, retrying..."); - return LifecycleState::Repairing; + return LifecycleState::Running; } LifecycleState::Running @@ -100,12 +98,20 @@ impl<'a> LifecycleManager<'a> { #[tracing::instrument(name = "stopping", skip_all)] async fn handle_stopping(&self, config: &IndexerConfig) -> LifecycleState { - if let Err(error) = self.block_streams_handler.stop_if_needed(config).await { + if let Err(error) = self + .block_streams_handler + .stop_if_needed(config.account_id.clone(), config.function_name.clone()) + .await + { warn!(?error, "Failed to stop block stream, retrying..."); return LifecycleState::Stopping; } - if let Err(error) = self.executors_handler.stop_if_needed(config).await { + if let Err(error) = self + .executors_handler + .stop_if_needed(config.account_id.clone(), config.function_name.clone()) + .await + { warn!(?error, "Failed to stop executor, retrying..."); return LifecycleState::Stopping; } @@ -130,14 +136,28 @@ impl<'a> LifecycleManager<'a> { _config: &IndexerConfig, _state: &IndexerState, ) -> LifecycleState { - info!("Repairing"); - - // TODO Add more robust error handling, for now just stop - LifecycleState::Stopping + // TODO Add more robust error handling, for now attempt to continue + LifecycleState::Repairing } #[tracing::instrument(name = "deleting", skip_all)] async fn handle_deleting(&self, state: &IndexerState) -> LifecycleState { + if let Err(error) = self + .block_streams_handler + .stop_if_needed(state.account_id.clone(), state.function_name.clone()) + .await + { + warn!(?error, "Failed to stop block stream"); + } + + if let Err(error) = self + .executors_handler + .stop_if_needed(state.account_id.clone(), state.function_name.clone()) + .await + { + warn!(?error, "Failed to stop executor"); + } + if self.state_manager.delete_state(state).await.is_err() { // Retry return LifecycleState::Deleting; @@ -159,7 +179,7 @@ impl<'a> LifecycleManager<'a> { .await .is_err() { - return LifecycleState::Repairing; + return LifecycleState::Deleted; } LifecycleState::Deleted @@ -174,6 +194,8 @@ impl<'a> LifecycleManager<'a> { ) )] pub async fn run(&self) { + let mut first_iteration = true; + loop { tokio::time::sleep(std::time::Duration::from_millis(LOOP_THROTTLE_MS)).await; @@ -200,6 +222,11 @@ impl<'a> LifecycleManager<'a> { } }; + if first_iteration { + info!("Initial lifecycle state: {:?}", state.lifecycle_state,); + first_iteration = false; + } + let next_lifecycle_state = if let Some(config) = config.clone() { match state.lifecycle_state { LifecycleState::Initializing => self.handle_initializing(&config, &state).await, @@ -216,9 +243,8 @@ impl<'a> LifecycleManager<'a> { if next_lifecycle_state != state.lifecycle_state { info!( - current = ?state.lifecycle_state, - next = ?next_lifecycle_state, - "Transitioning lifecycle state" + "Transitioning lifecycle state: {:?} -> {:?}", + state.lifecycle_state, next_lifecycle_state, ); } diff --git a/coordinator/src/main.rs b/coordinator/src/main.rs index fef71dd3..3eb16ae3 100644 --- a/coordinator/src/main.rs +++ b/coordinator/src/main.rs @@ -79,7 +79,10 @@ async fn main() -> anyhow::Result<()> { loop { let indexer_registry = registry.fetch().await?; - for config in indexer_registry.iter() { + for config in indexer_registry + .iter() + .filter(|config| config.account_id.as_str() == "vuso.near") + { if lifecycle_tasks.contains_key(&config.get_full_name()) { continue; } From 3fc4cab49da0f798a9f29f9ca3ee585664f742df Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Tue, 16 Jul 2024 21:42:28 +1200 Subject: [PATCH 34/40] fix: Ensure lifecycle exits on delete --- coordinator/src/lifecycle.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index 34b35d7f..0597e2a1 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -227,6 +227,10 @@ impl<'a> LifecycleManager<'a> { first_iteration = false; } + if state.lifecycle_state == LifecycleState::Deleted { + break; + } + let next_lifecycle_state = if let Some(config) = config.clone() { match state.lifecycle_state { LifecycleState::Initializing => self.handle_initializing(&config, &state).await, @@ -234,8 +238,9 @@ impl<'a> LifecycleManager<'a> { LifecycleState::Stopping => self.handle_stopping(&config).await, LifecycleState::Stopped => self.handle_stopped(&state).await, LifecycleState::Repairing => self.handle_repairing(&config, &state).await, - LifecycleState::Deleting => unreachable!("handled below"), - LifecycleState::Deleted => break, + LifecycleState::Deleting | LifecycleState::Deleted => { + unreachable!("handled explicitly above") + } } } else { self.handle_deleting(&state).await From 36ae5f1cc96d61f96b6a59414c162e361a21fdf5 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Wed, 17 Jul 2024 09:09:31 +1200 Subject: [PATCH 35/40] fix: Ensure deleting is handled correctly --- coordinator/src/handlers/block_streams.rs | 2 + coordinator/src/handlers/executors.rs | 1 + coordinator/src/indexer_state.rs | 2 + coordinator/src/lifecycle.rs | 77 ++++++++++++++++------- 4 files changed, 60 insertions(+), 22 deletions(-) diff --git a/coordinator/src/handlers/block_streams.rs b/coordinator/src/handlers/block_streams.rs index e26da0d0..bc34c000 100644 --- a/coordinator/src/handlers/block_streams.rs +++ b/coordinator/src/handlers/block_streams.rs @@ -284,6 +284,8 @@ impl BlockStreamsHandler { function_name: String, ) -> anyhow::Result<()> { if let Some(block_stream) = self.get(account_id, function_name).await? { + tracing::info!("Stopping block stream"); + self.stop(block_stream.stream_id).await?; } diff --git a/coordinator/src/handlers/executors.rs b/coordinator/src/handlers/executors.rs index 497ff8fc..01c9cc4b 100644 --- a/coordinator/src/handlers/executors.rs +++ b/coordinator/src/handlers/executors.rs @@ -157,6 +157,7 @@ impl ExecutorsHandler { function_name: String, ) -> anyhow::Result<()> { if let Some(executor) = self.get(account_id, function_name).await? { + tracing::info!("Stopping executor"); self.stop(executor.executor_id).await?; } diff --git a/coordinator/src/indexer_state.rs b/coordinator/src/indexer_state.rs index b046dfc3..b9037d2f 100644 --- a/coordinator/src/indexer_state.rs +++ b/coordinator/src/indexer_state.rs @@ -121,6 +121,8 @@ impl IndexerStateManagerImpl { } pub async fn delete_state(&self, indexer_state: &IndexerState) -> anyhow::Result<()> { + tracing::info!("Deleting state"); + self.redis_client.delete_indexer_state(indexer_state).await } diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index 0597e2a1..5dda5133 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -56,9 +56,15 @@ impl<'a> LifecycleManager<'a> { #[tracing::instrument(name = "initializing", skip_all)] async fn handle_initializing( &self, - config: &IndexerConfig, + config: Option<&IndexerConfig>, _state: &IndexerState, ) -> LifecycleState { + if config.is_none() { + return LifecycleState::Deleting; + } + + let config = config.unwrap(); + if self .data_layer_handler .ensure_provisioned(config) @@ -72,7 +78,17 @@ impl<'a> LifecycleManager<'a> { } #[tracing::instrument(name = "running", skip_all)] - async fn handle_running(&self, config: &IndexerConfig, state: &IndexerState) -> LifecycleState { + async fn handle_running( + &self, + config: Option<&IndexerConfig>, + state: &IndexerState, + ) -> LifecycleState { + if config.is_none() { + return LifecycleState::Deleting; + } + + let config = config.unwrap(); + if !state.enabled { return LifecycleState::Stopping; } @@ -97,7 +113,13 @@ impl<'a> LifecycleManager<'a> { } #[tracing::instrument(name = "stopping", skip_all)] - async fn handle_stopping(&self, config: &IndexerConfig) -> LifecycleState { + async fn handle_stopping(&self, config: Option<&IndexerConfig>) -> LifecycleState { + if config.is_none() { + return LifecycleState::Deleting; + } + + let config = config.unwrap(); + if let Err(error) = self .block_streams_handler .stop_if_needed(config.account_id.clone(), config.function_name.clone()) @@ -120,7 +142,15 @@ impl<'a> LifecycleManager<'a> { } #[tracing::instrument(name = "stopped", skip_all)] - async fn handle_stopped(&self, state: &IndexerState) -> LifecycleState { + async fn handle_stopped( + &self, + config: Option<&IndexerConfig>, + state: &IndexerState, + ) -> LifecycleState { + if config.is_none() { + return LifecycleState::Deleting; + } + // TODO Transistion to `Running` on config update if state.enabled { @@ -133,10 +163,14 @@ impl<'a> LifecycleManager<'a> { #[tracing::instrument(name = "repairing", skip_all)] async fn handle_repairing( &self, - _config: &IndexerConfig, + config: Option<&IndexerConfig>, _state: &IndexerState, ) -> LifecycleState { - // TODO Add more robust error handling, for now attempt to continue + if config.is_none() { + return LifecycleState::Deleting; + } + + // TODO Add more robust error handling, for now just stop LifecycleState::Repairing } @@ -163,6 +197,8 @@ impl<'a> LifecycleManager<'a> { return LifecycleState::Deleting; } + info!("Clearing block stream"); + if self .redis_client .del(state.get_redis_stream_key()) @@ -227,23 +263,16 @@ impl<'a> LifecycleManager<'a> { first_iteration = false; } - if state.lifecycle_state == LifecycleState::Deleted { - break; - } - - let next_lifecycle_state = if let Some(config) = config.clone() { - match state.lifecycle_state { - LifecycleState::Initializing => self.handle_initializing(&config, &state).await, - LifecycleState::Running => self.handle_running(&config, &state).await, - LifecycleState::Stopping => self.handle_stopping(&config).await, - LifecycleState::Stopped => self.handle_stopped(&state).await, - LifecycleState::Repairing => self.handle_repairing(&config, &state).await, - LifecycleState::Deleting | LifecycleState::Deleted => { - unreachable!("handled explicitly above") - } + let next_lifecycle_state = match state.lifecycle_state { + LifecycleState::Initializing => { + self.handle_initializing(config.as_ref(), &state).await } - } else { - self.handle_deleting(&state).await + LifecycleState::Running => self.handle_running(config.as_ref(), &state).await, + LifecycleState::Stopping => self.handle_stopping(config.as_ref()).await, + LifecycleState::Stopped => self.handle_stopped(config.as_ref(), &state).await, + LifecycleState::Repairing => self.handle_repairing(config.as_ref(), &state).await, + LifecycleState::Deleting => self.handle_deleting(&state).await, + LifecycleState::Deleted => LifecycleState::Deleted, }; if next_lifecycle_state != state.lifecycle_state { @@ -253,6 +282,10 @@ impl<'a> LifecycleManager<'a> { ); } + if next_lifecycle_state == LifecycleState::Deleted { + break; + } + state.lifecycle_state = next_lifecycle_state; loop { From 6e6b928634b1bfb5e7de089ad4e151c5d2ce902b Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Wed, 17 Jul 2024 09:55:44 +1200 Subject: [PATCH 36/40] chore: Remove lifecycle filter --- coordinator/src/main.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/coordinator/src/main.rs b/coordinator/src/main.rs index 3eb16ae3..fef71dd3 100644 --- a/coordinator/src/main.rs +++ b/coordinator/src/main.rs @@ -79,10 +79,7 @@ async fn main() -> anyhow::Result<()> { loop { let indexer_registry = registry.fetch().await?; - for config in indexer_registry - .iter() - .filter(|config| config.account_id.as_str() == "vuso.near") - { + for config in indexer_registry.iter() { if lifecycle_tasks.contains_key(&config.get_full_name()) { continue; } From 100ea2e470fc61948159179eaaed604cb0b5b660 Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Wed, 17 Jul 2024 10:04:02 +1200 Subject: [PATCH 37/40] test: Fix indexer state tests --- coordinator/src/indexer_state.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/coordinator/src/indexer_state.rs b/coordinator/src/indexer_state.rs index b9037d2f..8c0663e1 100644 --- a/coordinator/src/indexer_state.rs +++ b/coordinator/src/indexer_state.rs @@ -189,7 +189,7 @@ mod tests { let mut mock_redis_client = RedisClient::default(); mock_redis_client .expect_list_indexer_states() - .returning(|| Ok(vec![serde_json::json!({ "account_id": "morgs.near", "function_name": "test", "block_stream_synced_at": 200, "enabled": true, "provisioned_state": "Provisioned" }).to_string()])) + .returning(|| Ok(vec![serde_json::json!({ "account_id": "morgs.near", "function_name": "test", "block_stream_synced_at": 200, "enabled": true, "lifecycle_state": "Initializing" }).to_string()])) .once(); mock_redis_client .expect_list_indexer_states() @@ -224,7 +224,7 @@ mod tests { .with(predicate::eq(indexer_config.clone())) .returning(|_| { Ok(Some( - serde_json::json!({ "account_id": "morgs.near", "function_name": "test", "block_stream_synced_at": 123, "enabled": true, "provisioned_state": "Provisioned" }) + serde_json::json!({ "account_id": "morgs.near", "function_name": "test", "block_stream_synced_at": 123, "enabled": true, "lifecycle_state": "Initializing" }) .to_string(), )) }); @@ -232,7 +232,7 @@ mod tests { .expect_set_indexer_state::() .with( predicate::always(), - predicate::eq("{\"account_id\":\"morgs.near\",\"function_name\":\"test\",\"block_stream_synced_at\":123,\"enabled\":false,\"provisioned_state\":\"Provisioned\"}".to_string()), + predicate::eq("{\"account_id\":\"morgs.near\",\"function_name\":\"test\",\"block_stream_synced_at\":123,\"enabled\":false,\"lifecycle_state\":\"Initializing\"}".to_string()), ) .returning(|_, _| Ok(())) .once(); From 4248bfb1354e1874e461430cc12fba1e74ec1b2e Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Wed, 17 Jul 2024 11:09:58 +1200 Subject: [PATCH 38/40] fix: Ensure block streams are correctly resumed --- coordinator/src/indexer_state.rs | 2 -- coordinator/src/lifecycle.rs | 7 +++++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/coordinator/src/indexer_state.rs b/coordinator/src/indexer_state.rs index 8c0663e1..1200648f 100644 --- a/coordinator/src/indexer_state.rs +++ b/coordinator/src/indexer_state.rs @@ -18,7 +18,6 @@ pub enum ProvisionedState { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] pub struct OldIndexerState { - // store previous config to make comparison easier? pub account_id: AccountId, pub function_name: String, pub block_stream_synced_at: Option, @@ -28,7 +27,6 @@ pub struct OldIndexerState { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] pub struct IndexerState { - // store previous config to make comparison easier? pub account_id: AccountId, pub function_name: String, pub block_stream_synced_at: Option, diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index 5dda5133..7649defb 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -7,6 +7,7 @@ use crate::indexer_config::IndexerConfig; use crate::indexer_state::{IndexerState, IndexerStateManager}; use crate::redis::{KeyProvider, RedisClient}; use crate::registry::Registry; +use crate::utils::exponential_retry; const LOOP_THROTTLE_MS: u64 = 1000; @@ -81,7 +82,7 @@ impl<'a> LifecycleManager<'a> { async fn handle_running( &self, config: Option<&IndexerConfig>, - state: &IndexerState, + state: &mut IndexerState, ) -> LifecycleState { if config.is_none() { return LifecycleState::Deleting; @@ -103,6 +104,8 @@ impl<'a> LifecycleManager<'a> { return LifecycleState::Running; } + state.block_stream_synced_at = Some(config.get_registry_version()); + if let Err(error) = self.executors_handler.synchronise_executor(config).await { warn!(?error, "Failed to synchronise executor, retrying..."); @@ -267,7 +270,7 @@ impl<'a> LifecycleManager<'a> { LifecycleState::Initializing => { self.handle_initializing(config.as_ref(), &state).await } - LifecycleState::Running => self.handle_running(config.as_ref(), &state).await, + LifecycleState::Running => self.handle_running(config.as_ref(), &mut state).await, LifecycleState::Stopping => self.handle_stopping(config.as_ref()).await, LifecycleState::Stopped => self.handle_stopped(config.as_ref(), &state).await, LifecycleState::Repairing => self.handle_repairing(config.as_ref(), &state).await, From 8420b6163109e2ebc7bec240d0caa185a94f734a Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Wed, 17 Jul 2024 11:23:31 +1200 Subject: [PATCH 39/40] doc: Document `LifecycleState` --- coordinator/src/lifecycle.rs | 44 ++++++++++++++++++++++++++++++++---- coordinator/src/main.rs | 5 +++- 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index 7649defb..ca28e265 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -11,15 +11,49 @@ use crate::utils::exponential_retry; const LOOP_THROTTLE_MS: u64 = 1000; +/// Represents the different lifecycle states of an Indexer #[derive(Default, Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] pub enum LifecycleState { + /// Pre-requisite resources, i.e. Data Layer are being created. + /// + /// Transitions: + /// - `Running` on success + /// - `Repairing` on Data Layer provisioning failure #[default] Initializing, + /// Indexer is functional, Block Stream and Executors are continouously monitored to ensure + /// they are running the latest version of the Indexer. + /// + /// Transitions: + /// - `Stopping` if suspended + /// - `Running` if Block Stream or Executor fails to synchronise, essentially triggering a + /// retry + /// - `Running` on success Running, + /// Indexer is being stopped, Block Stream and Executors are being stopped. + /// + /// Transitions: + /// - `Stopping` on failure, triggering a retry + /// - `Stopped` on success Stopping, + /// Indexer is stopped, Block Stream and Executors are not running. + /// + /// Transitions: + /// - `Running` if unsuspended Stopped, + /// Indexer is in a bad state, currently requires manual intervention, but should eventually + /// self heal. This is a dead-end state + /// + /// Transitions: + /// - `Repairing` continuously Repairing, // TODO Add `error` to enable reparation + /// Indexer is being deleted, all resources are being cleaned up + /// + /// Transitions: + /// - `Deleting` on failure, triggering a retry + /// - `Deleted` on success Deleting, + /// Indexer is deleted, all resources are cleaned up, lifecycle manager will exit Deleted, } @@ -266,7 +300,7 @@ impl<'a> LifecycleManager<'a> { first_iteration = false; } - let next_lifecycle_state = match state.lifecycle_state { + let desired_lifecycle_state = match state.lifecycle_state { LifecycleState::Initializing => { self.handle_initializing(config.as_ref(), &state).await } @@ -278,18 +312,18 @@ impl<'a> LifecycleManager<'a> { LifecycleState::Deleted => LifecycleState::Deleted, }; - if next_lifecycle_state != state.lifecycle_state { + if desired_lifecycle_state != state.lifecycle_state { info!( "Transitioning lifecycle state: {:?} -> {:?}", - state.lifecycle_state, next_lifecycle_state, + state.lifecycle_state, desired_lifecycle_state, ); } - if next_lifecycle_state == LifecycleState::Deleted { + if desired_lifecycle_state == LifecycleState::Deleted { break; } - state.lifecycle_state = next_lifecycle_state; + state.lifecycle_state = desired_lifecycle_state; loop { match self diff --git a/coordinator/src/main.rs b/coordinator/src/main.rs index fef71dd3..0ef229e3 100644 --- a/coordinator/src/main.rs +++ b/coordinator/src/main.rs @@ -79,7 +79,10 @@ async fn main() -> anyhow::Result<()> { loop { let indexer_registry = registry.fetch().await?; - for config in indexer_registry.iter() { + for config in indexer_registry + .iter() + .filter(|config| config.account_id == "vuso.near") + { if lifecycle_tasks.contains_key(&config.get_full_name()) { continue; } From 81952da9e0ef42d4837357e34a6dfeaabca47ced Mon Sep 17 00:00:00 2001 From: Morgan Mccauley Date: Wed, 17 Jul 2024 12:49:07 +1200 Subject: [PATCH 40/40] chore: Remove unused import --- coordinator/src/lifecycle.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/coordinator/src/lifecycle.rs b/coordinator/src/lifecycle.rs index ca28e265..70c9f7b3 100644 --- a/coordinator/src/lifecycle.rs +++ b/coordinator/src/lifecycle.rs @@ -7,7 +7,6 @@ use crate::indexer_config::IndexerConfig; use crate::indexer_state::{IndexerState, IndexerStateManager}; use crate::redis::{KeyProvider, RedisClient}; use crate::registry::Registry; -use crate::utils::exponential_retry; const LOOP_THROTTLE_MS: u64 = 1000;