Skip to content

Commit

Permalink
refactor HTTP clients (#1678)
Browse files Browse the repository at this point in the history
* add connection-request timeout

* assert connection-request timeout

* only create new client when config changed

* increase default thread-pool

* add task timeout config properties

* add TTL

* fix fallback test

* add TaskTimeoutExecutor

* disable timeout if duration is 0

* stop timeout scheduler in tests

* add more timeout to agent commands

* remove timeout from default config

* add helper method for tests

* add check for timeout isZero()

* replace task timeout with resilience4j timelimiter

* add null check for time-limit

* only create new client when agent-commands config changed

* add tests

* remove default ttl

* fix: use cached thread pool

* change logs & comments
  • Loading branch information
EddeCCC authored Dec 18, 2024
1 parent 63a8230 commit 4135aa1
Show file tree
Hide file tree
Showing 20 changed files with 824 additions and 124 deletions.
6 changes: 4 additions & 2 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ orgSpringframework = "5.3.39"
# @pin 2.7.18 is the latest release which runs on Java 8, this version marks the en of open source support for Sb 2.x
orgSpringframeworkBoot = "2.7.18"
orgTestcontainers = "1.20.3"
# @pin resilience4j 2.* uses Java 17 APIs
ioGithubResilience4j = "1.7.1"

[libraries]
# @pin 1.3.* are the latest versions, which support java 8
Expand All @@ -42,8 +44,8 @@ commonsBeanutils = "commons-beanutils:commons-beanutils:1.9.4"
commonsIo = "commons-io:commons-io:2.17.0"
ioGithubNetmikeyLogunitLogunitCore = { module = "io.github.netmikey.logunit:logunit-core", version.ref = "ioGithubNetmikeyLogunit" }
ioGithubNetmikeyLogunitLogunitLogback = { module = "io.github.netmikey.logunit:logunit-logback", version.ref = "ioGithubNetmikeyLogunit" }
# @pin resilience4j 2.* uses Java 17 APIs
ioGithubResilience4jResilience4jRetry = "io.github.resilience4j:resilience4j-retry:1.7.1"
ioGithubResilience4jResilience4jRetry = { module = "io.github.resilience4j:resilience4j-retry", version.ref = "ioGithubResilience4j" }
ioGithubResilience4jResilience4jTimelimiter = { module = "io.github.resilience4j:resilience4j-timelimiter", version.ref = "ioGithubResilience4j" }
ioGrpcGrpcNettyShaded = { module = "io.grpc:grpc-netty-shaded", version.ref = "ioGrpc" }
ioGrpcGrpcStub = { module = "io.grpc:grpc-stub", version.ref = "ioGrpc" }
# The following dependency is required for the OC-exporter to work correctly and must be matched against the grpc version
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import lombok.NoArgsConstructor;
import rocks.inspectit.ocelot.config.model.config.RetrySettings;

import javax.validation.Valid;
import javax.validation.constraints.AssertFalse;
import java.net.URL;
import java.time.Duration;

Expand Down Expand Up @@ -32,16 +34,41 @@ public class AgentCommandSettings {
private String agentCommandPath;

/**
* The timeout duration used for requests when the agent is in discovery mode. Defining how long the agent will wait for
* The timeout duration used to establish the connection with the remote host in discovery mode.
*/
private Duration liveConnectionTimeout;

/**
* The timeout duration the client will wait to acquire a connection from the connection pool in discovery mode.
*/
private Duration liveConnectionRequestTimeout;

/**
* The timeout duration used for requests when the agent is in discovery mode. Defining how long the agent will wait for
* new commands.
*/
private Duration liveSocketTimeout;

/**
* The timeout duration used to establish the connection with the remote host in normal mode.
*/
private Duration connectionTimeout;

/**
* The timeout duration the client will wait to acquire a connection from the connection pool in normal mode.
*/
private Duration connectionRequestTimeout;

/**
* The timeout duration used for requests when the agent is in normal mode.
*/
private Duration socketTimeout;

/**
* The TTL - the time to keep an HTTP connection alive
*/
private Duration timeToLive;

/**
* The used interval for polling commands.
*/
Expand All @@ -55,5 +82,21 @@ public class AgentCommandSettings {
/**
* Settings how retries are handled regarding fetching an agent command.
*/
@Valid
private RetrySettings retry;

@AssertFalse(message = "The specified time values should not be negative!")
public boolean isNegativeTimeout() {
boolean negativeLiveConnectionTimeout = liveConnectionTimeout != null && liveConnectionTimeout.isNegative();
boolean negativeConnectionTimeout = connectionTimeout != null && connectionTimeout.isNegative();
boolean negativeLiveConnectionRequestTimeout = liveConnectionRequestTimeout != null && liveConnectionRequestTimeout.isNegative();
boolean negativeConnectionRequestTimeout = connectionRequestTimeout != null && connectionRequestTimeout.isNegative();
boolean negativeLiveSocketTimeout = liveSocketTimeout != null && liveSocketTimeout.isNegative();
boolean negativeSocketTimeout = socketTimeout != null && socketTimeout.isNegative();
boolean negativeTTL = timeToLive != null && timeToLive.isNegative();
return negativeLiveConnectionTimeout || negativeConnectionTimeout ||
negativeLiveConnectionRequestTimeout || negativeConnectionRequestTimeout ||
negativeLiveSocketTimeout || negativeSocketTimeout
|| negativeTTL;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
public class HttpConfigSettings {

/**
* Whether a HTTP property source should be used.
* Whether an HTTP property source should be used.
*/
private boolean enabled;

Expand Down Expand Up @@ -53,21 +53,34 @@ public class HttpConfigSettings {
*/
private Duration connectionTimeout;

/**
* The connection-request timeout to use - the time the client will wait to acquire a connection from the connection pool
*/
private Duration connectionRequestTimeout;

/**
* The socket timeout to use - the time waiting for data after establishing the connection; maximum time of inactivity between two data packets.
*/
private Duration socketTimeout;

/**
* The TTL - the time to keep an HTTP connection alive
*/
private Duration timeToLive;

/**
* Settings how retries are handled regarding fetching an HTTP property source.
*/
@Valid
private RetrySettings retry;

@AssertFalse(message = "The specified timeout values should not be negative!")
@AssertFalse(message = "The specified time values should not be negative!")
public boolean isNegativeTimeout() {
boolean negativeConnectionTimeout = connectionTimeout != null && connectionTimeout.isNegative();
boolean negativeConnectionRequestTimeout = connectionRequestTimeout != null && connectionRequestTimeout.isNegative();
boolean negativeReadTimeout = socketTimeout != null && socketTimeout.isNegative();
return negativeConnectionTimeout || negativeReadTimeout;
boolean negativeTTL = timeToLive != null && timeToLive.isNegative();
return negativeConnectionTimeout || negativeConnectionRequestTimeout || negativeReadTimeout ||
negativeTTL;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,10 @@ public class RetrySettings {
@NotNull
// We use a BigDecimal as there is no support for double in hibernate validator
private BigDecimal randomizationFactor;

/**
* The maximum amount of time one retry is allowed to take. May not be lower than 1
*/
@DurationMin(millis = 0, inclusive = false)
private Duration timeLimit;
}
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ inspectit:
include-service-name: true

# defines how many threads inspectIT may start for its internal tasks
thread-pool-size: 2
thread-pool-size: 4

# settings for the agent commands and communication
agent-commands:
Expand All @@ -136,7 +136,7 @@ inspectit:
socket-timeout: 5s
# the used interval for polling commands
polling-interval: 15s
# how long the agent will staying in the live mode, before falling back to the normal mode
# how long the agent will stay in the live mode, before falling back to the normal mode
live-mode-duration: 2m
retry:
# true if retries are enabled, false otherwise
Expand All @@ -149,6 +149,8 @@ inspectit:
multiplier: 2
# This factor introduces randomness to what the actual wait interval will be. This prevents that a lot of agents will issue requests towards the configuration server at the same time.
randomization-factor: 0.1
# The maximum duration one retry may take
time-limit: 32m

log-preloading:
enabled: false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,12 @@ inspectit:
# true if retries are enabled, false otherwise
enabled: true
# The maximum number of attempts to try to fetch the configuration
max-attempts: 7
max-attempts: 6
# The initial interval to wait after the first failed attempt.
initial-interval: 30s
# For each retry the last interval to wait is multiplied with this number to calculate the next interval to wait
multiplier: 2
# This factor introduces randomness to what the actual wait interval will be. This prevents that a lot of agents will issue requests towards the configuration server at the same time.
randomization-factor: 0.1
# The maximum duration one retry may take
time-limit: 32m
1 change: 1 addition & 0 deletions inspectit-ocelot-core/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ dependencies {
libs.piccolo,
libs.comFasterxmlJacksonCoreJacksonDatabind,
libs.ioGithubResilience4jResilience4jRetry,
libs.ioGithubResilience4jResilience4jTimelimiter,

libs.orgJavassist,

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.annotations.VisibleForTesting;
import io.github.resilience4j.retry.Retry;
import io.github.resilience4j.timelimiter.TimeLimiter;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
Expand All @@ -17,6 +18,9 @@

import java.io.IOException;
import java.io.InputStream;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

/**
* Component which handles the fetching of new agent commands and execution of it.
Expand Down Expand Up @@ -53,10 +57,15 @@ public class CommandHandler {
*/
private boolean liveMode = false;

/**
* Executor to cancel one command fetch after a time limit was exceeded.
*/
private final ExecutorService timeLimitExecutor = Executors.newCachedThreadPool();

/**
* Tries fetching and executing a new agent command from the server.
*/
public void nextCommand() {
public void nextCommand() throws Exception {
nextCommand(null);
}

Expand All @@ -67,7 +76,7 @@ public void nextCommand() {
*
* @param payload a {@link CommandResponse} to send with the next request
*/
private void nextCommand(CommandResponse payload) {
private void nextCommand(CommandResponse payload) throws Exception {
CommandResponse commandResponse = payload;

do {
Expand Down Expand Up @@ -101,10 +110,23 @@ private boolean isLiveModeExpired() {
return System.currentTimeMillis() >= liveModeStart + settings.getLiveModeDuration().toMillis();
}

private Command getCommandWithRetry(CommandResponse commandResponse) {
private Command getCommandWithRetry(CommandResponse commandResponse) throws Exception {
Retry retry = buildRetry();
if (retry != null) {
return retry.executeSupplier(() -> getCommand(commandResponse));
log.debug("Using Retries...");
Callable<Command> getCommand;

TimeLimiter timeLimiter = buildTimeLimiter();
if(timeLimiter != null) {
log.debug("Using TimeLimiter...");
// Use time limiter for every function call
getCommand = timeLimiter.decorateFutureSupplier(() -> timeLimitExecutor.submit(() -> getCommand(commandResponse)));
}
else getCommand = () -> getCommand(commandResponse);

Command command = retry.executeCallable(getCommand);
return command;

} else {
return getCommand(commandResponse);
}
Expand All @@ -116,6 +138,12 @@ private Retry buildRetry() {
.getRetry(), "agent-commands");
}

private TimeLimiter buildTimeLimiter() {
return RetryUtils.buildTimeLimiter(environment.getCurrentConfig()
.getAgentCommands()
.getRetry(), "agent-commands");
}

/**
* Fetches a command and processes the response.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,15 @@
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import rocks.inspectit.ocelot.commons.models.command.Command;
import rocks.inspectit.ocelot.commons.models.command.CommandResponse;
import rocks.inspectit.ocelot.config.model.command.AgentCommandSettings;
import rocks.inspectit.ocelot.core.command.http.CommandHttpClientHolder;
import rocks.inspectit.ocelot.core.config.InspectitEnvironment;

import java.io.IOException;
Expand Down Expand Up @@ -43,50 +42,16 @@ public class HttpCommandFetcher {
private static final String META_HEADER_PREFIX = "X-OCELOT-";

/**
* Http client used in the normal mode.
* The holder of the HTTP clients for agent commands.
*/
private HttpClient normalHttpClient;

/**
* Http client used in the live mode (longer timeouts).
*/
private HttpClient liveHttpClient;
private final CommandHttpClientHolder clientHolder = new CommandHttpClientHolder();

/**
* The URI for fetching commands.
*/
@Setter
private URI commandUri;

/**
* Returns the {@link HttpClient} which is used for fetching commands.
*
* @return A new {@link HttpClient} instance.
*/
private HttpClient getHttpClient(boolean liveClient) {
if (normalHttpClient == null || liveHttpClient == null) {
updateHttpClients();
}

return liveClient ? liveHttpClient : normalHttpClient;
}

/**
* Updating the http clients.
*/
private void updateHttpClients() {
AgentCommandSettings settings = environment.getCurrentConfig().getAgentCommands();
int timeout = (int) settings.getSocketTimeout().toMillis();
int liveTimeout = (int) settings.getLiveSocketTimeout().toMillis();

RequestConfig normalConfig = RequestConfig.custom().setSocketTimeout(timeout).build();

RequestConfig liveConfig = RequestConfig.custom().setSocketTimeout(liveTimeout).build();

normalHttpClient = HttpClientBuilder.create().setDefaultRequestConfig(normalConfig).build();
liveHttpClient = HttpClientBuilder.create().setDefaultRequestConfig(liveConfig).build();
}

/**
* Fetches a {@link Command} by sending the given {@link CommandResponse} as payload and uses the given timeout-int as timeout.
*
Expand Down Expand Up @@ -148,4 +113,17 @@ private void setAgentMetaHeaders(HttpPost httpPost) {

httpPost.setHeader(META_HEADER_PREFIX + "AGENT-ID", runtime.getName());
}

/**
* Returns the {@link HttpClient} which is used for fetching commands.
*
* @param liveClient true, if live-mode is active
* @return A {@link HttpClient} instance.
*/
private HttpClient getHttpClient(boolean liveClient) throws IOException {
AgentCommandSettings currentSettings = environment.getCurrentConfig().getAgentCommands();

if(liveClient) return clientHolder.getLiveHttpClient(currentSettings);
else return clientHolder.getDiscoveryHttpClient(currentSettings);
}
}
Loading

0 comments on commit 4135aa1

Please sign in to comment.