diff --git a/crates/mysten-common/src/metrics.rs b/crates/mysten-common/src/metrics.rs index d4f74ef20967f8..66ef04586b6df2 100644 --- a/crates/mysten-common/src/metrics.rs +++ b/crates/mysten-common/src/metrics.rs @@ -6,7 +6,7 @@ use prometheus::Encoder; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use tracing::{debug, error, info}; -const DEFAULT_METRICS_PUSH_TIMEOUT: Duration = Duration::from_secs(30); +const METRICS_PUSH_TIMEOUT: Duration = Duration::from_secs(45); pub struct MetricsPushClient { certificate: std::sync::Arc, @@ -77,7 +77,7 @@ pub async fn push_metrics( .header(reqwest::header::CONTENT_ENCODING, "snappy") .header(reqwest::header::CONTENT_TYPE, prometheus::PROTOBUF_FORMAT) .body(compressed) - .timeout(DEFAULT_METRICS_PUSH_TIMEOUT) + .timeout(METRICS_PUSH_TIMEOUT) .send() .await?; diff --git a/crates/sui-bridge/src/metrics.rs b/crates/sui-bridge/src/metrics.rs index adad6c9659acc5..e78cee0913e7f5 100644 --- a/crates/sui-bridge/src/metrics.rs +++ b/crates/sui-bridge/src/metrics.rs @@ -54,15 +54,22 @@ pub fn start_metrics_push_task( let mut interval = tokio::time::interval(interval); interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); + let mut errors = 0; loop { interval.tick().await; - // Retry pushing metrics if there is an error. - while let Err(error) = push_metrics(&client, &url, ®istry).await { - tracing::warn!("unable to push metrics: {error}; new client will be created"); - sleep(Duration::from_secs(1)).await; + if let Err(error) = push_metrics(&client, &url, ®istry).await { + errors += 1; + if errors >= 10 { + // If we hit 10 failures in a row, start logging errors. + tracing::error!("unable to push metrics: {error}; new client will be created"); + } else { + tracing::warn!("unable to push metrics: {error}; new client will be created"); + } // aggressively recreate our client connection if we hit an error client = MetricsPushClient::new(metrics_key_pair.copy()); + } else { + errors = 0; } } }); diff --git a/crates/sui-node/src/metrics.rs b/crates/sui-node/src/metrics.rs index f2d0f522a749a2..edcb0d834d9e3d 100644 --- a/crates/sui-node/src/metrics.rs +++ b/crates/sui-node/src/metrics.rs @@ -45,15 +45,22 @@ pub fn start_metrics_push_task(config: &sui_config::NodeConfig, registry: Regist let mut interval = tokio::time::interval(interval); interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); + let mut errors = 0; loop { interval.tick().await; - // Retry pushing metrics if there is an error. - while let Err(error) = push_metrics(&client, &url, ®istry).await { - tracing::warn!("unable to push metrics: {error}; new client will be created"); - sleep(Duration::from_secs(1)).await; + if let Err(error) = push_metrics(&client, &url, ®istry).await { + errors += 1; + if errors >= 10 { + // If we hit 10 failures in a row, start logging errors. + tracing::error!("unable to push metrics: {error}; new client will be created"); + } else { + tracing::warn!("unable to push metrics: {error}; new client will be created"); + } // aggressively recreate our client connection if we hit an error client = MetricsPushClient::new(config_copy.network_key_pair().copy()); + } else { + errors = 0; } } });