From 418a0d023b2b9d4f04cd4e6a1628b013349a39bc Mon Sep 17 00:00:00 2001 From: Andy Zhang <87735571+Andyz26@users.noreply.github.com> Date: Wed, 6 Nov 2024 16:30:10 -0800 Subject: [PATCH] handle busy agent hb timeout --- .../resourcecluster/ResourceClusterGatewayClient.java | 11 +++++++++++ .../io/mantisrx/server/core/CoreConfiguration.java | 6 +++--- .../runtime/loader/config/WorkerConfiguration.java | 2 +- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/mantis-control-plane/mantis-control-plane-client/src/main/java/io/mantisrx/server/master/resourcecluster/ResourceClusterGatewayClient.java b/mantis-control-plane/mantis-control-plane-client/src/main/java/io/mantisrx/server/master/resourcecluster/ResourceClusterGatewayClient.java index b9b40e282..2f8fd9b3a 100644 --- a/mantis-control-plane/mantis-control-plane-client/src/main/java/io/mantisrx/server/master/resourcecluster/ResourceClusterGatewayClient.java +++ b/mantis-control-plane/mantis-control-plane-client/src/main/java/io/mantisrx/server/master/resourcecluster/ResourceClusterGatewayClient.java @@ -24,6 +24,7 @@ import io.mantisrx.server.core.CoreConfiguration; import io.mantisrx.server.core.master.MasterDescription; import io.mantisrx.shaded.com.fasterxml.jackson.databind.ObjectMapper; +import io.netty.util.concurrent.DefaultThreadFactory; import java.io.Closeable; import java.io.IOException; import java.util.concurrent.CompletableFuture; @@ -129,6 +130,16 @@ private AsyncHttpClient buildCloseableHttpClient(CoreConfiguration configuration .setRequestTimeout(configuration.getAsyncHttpClientRequestTimeoutMs()) .setReadTimeout(configuration.getAsyncHttpClientReadTimeoutMs()) .setFollowRedirect(configuration.getAsyncHttpClientFollowRedirect()) + // set the http client thread priority to max - 1 to ensure control plane signals can still be retrieved + // even when the worker is busy. + .setThreadFactory(new DefaultThreadFactory(generateThreadPoolName(), Thread.MAX_PRIORITY - 1)) .build()); } + + private String generateThreadPoolName() { + return String.format( + "resourceClusterGatewayClient-httpclient-%s-%s", + this.masterDescription.getHostname(), + this.clusterID.getResourceID()); + } } diff --git a/mantis-control-plane/mantis-control-plane-core/src/main/java/io/mantisrx/server/core/CoreConfiguration.java b/mantis-control-plane/mantis-control-plane-core/src/main/java/io/mantisrx/server/core/CoreConfiguration.java index 1c50e16f5..c5c4580b0 100644 --- a/mantis-control-plane/mantis-control-plane-core/src/main/java/io/mantisrx/server/core/CoreConfiguration.java +++ b/mantis-control-plane/mantis-control-plane-core/src/main/java/io/mantisrx/server/core/CoreConfiguration.java @@ -67,15 +67,15 @@ public interface CoreConfiguration { int getAsyncHttpClientMaxConnectionsPerHost(); @Config("mantis.asyncHttpClient.connectionTimeoutMs") - @Default("10000") + @Default("90000") int getAsyncHttpClientConnectionTimeoutMs(); @Config("mantis.asyncHttpClient.requestTimeoutMs") - @Default("10000") + @Default("90000") int getAsyncHttpClientRequestTimeoutMs(); @Config("mantis.asyncHttpClient.readTimeoutMs") - @Default("10000") + @Default("90000") int getAsyncHttpClientReadTimeoutMs(); @Config("mantis.asyncHttpClient.followRedirect") diff --git a/mantis-runtime-loader/src/main/java/io/mantisrx/runtime/loader/config/WorkerConfiguration.java b/mantis-runtime-loader/src/main/java/io/mantisrx/runtime/loader/config/WorkerConfiguration.java index d69092ec8..f61c99b25 100644 --- a/mantis-runtime-loader/src/main/java/io/mantisrx/runtime/loader/config/WorkerConfiguration.java +++ b/mantis-runtime-loader/src/main/java/io/mantisrx/runtime/loader/config/WorkerConfiguration.java @@ -85,7 +85,7 @@ default String getTaskExecutorHostName() { int getTolerableConsecutiveHeartbeatFailures(); @Config("mantis.taskexecutor.heartbeats.timeout.ms") - @Default("5000") + @Default("90000") int heartbeatTimeoutMs(); @Config("mantis.taskexecutor.heartbeats.retry.initial-delay.ms")