From fda1b150a8eeefbd35d6032bf98f7a2af79fd097 Mon Sep 17 00:00:00 2001 From: Jocelyn Boullier Date: Sat, 2 Dec 2023 14:42:56 +0100 Subject: [PATCH] feat(gateway): add back retry logic on project error (#1455) * feat(gateway): attempt at adding back some kind of retry * refactor: clippy suggestion --------- Co-authored-by: chesedo --- gateway/src/project.rs | 29 +++++++++++++++++++++++++++++ gateway/src/task.rs | 6 +++--- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/gateway/src/project.rs b/gateway/src/project.rs index 538969112..bf0283389 100644 --- a/gateway/src/project.rs +++ b/gateway/src/project.rs @@ -663,6 +663,35 @@ where } } +pub async fn refresh_with_retry( + project: Project, + ctx: &impl DockerContext, +) -> Result { + let max_attempt = 3; + let mut num_attempt = 1; + let mut proj = Box::new(project); + + loop { + let refreshed = proj.refresh(ctx).await; + match refreshed.as_ref() { + Ok(Project::Errored(err)) => match &err.ctx { + Some(err_ctx) => { + if num_attempt >= max_attempt { + return refreshed; + } else { + num_attempt += 1; + proj = err_ctx.clone(); + tokio::time::sleep(Duration::from_millis(100_u64 * 2_u64.pow(num_attempt))) + .await + } + } + _ => return refreshed, + }, + _ => return refreshed, + } + } +} + #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct ProjectCreating { project_name: ProjectName, diff --git a/gateway/src/task.rs b/gateway/src/task.rs index e01f5525d..1fc27ca54 100644 --- a/gateway/src/task.rs +++ b/gateway/src/task.rs @@ -19,7 +19,7 @@ use uuid::Uuid; use crate::project::*; use crate::service::{GatewayContext, GatewayService}; use crate::worker::TaskRouter; -use crate::{AccountName, Error, ErrorKind, Refresh, State}; +use crate::{AccountName, Error, ErrorKind, State}; // Default maximum _total_ time a task is allowed to run pub const DEFAULT_TIMEOUT: Duration = Duration::from_secs(300); @@ -317,7 +317,7 @@ impl Task for RunUntilDone { // Make sure the project state has not changed from Docker // Else we will make assumptions when trying to run next which can cause a failure - let project = match ctx.state.refresh(&ctx.gateway).await { + let project = match refresh_with_retry(ctx.state, &ctx.gateway).await { Ok(project) => project, Err(error) => return TaskResult::Err(error), }; @@ -351,7 +351,7 @@ impl Task for DeleteProject { async fn poll(&mut self, ctx: ProjectContext) -> TaskResult { // Make sure the project state has not changed from Docker // Else we will make assumptions when trying to run next which can cause a failure - let project = match ctx.state.refresh(&ctx.gateway).await { + let project = match refresh_with_retry(ctx.state, &ctx.gateway).await { Ok(project) => project, Err(error) => return TaskResult::Err(error), };