Skip to content

Commit

Permalink
[PLAT-14471][PLAT-14576] Added configurable deadline, keepAlive and u…
Browse files Browse the repository at this point in the history
…navailable retries to Ybc Java client

Summary:
[PLAT-14471] Contains YBC commit: yugabyte/ybc@62939b0

Added configurable deadline, keepAlive and unavailable retries to Ybc Java client. These
settings are configurable via global runtime configs.

ISSUE
Saw hung connections without a proper RCA. The DB node had an established TCP connection but the client does not seem to receive response. The Ybc Java client thread hung forever.

CHANGES
Adding the following 3 changes to client code:

- Upgraded the grpc-java version to 1.63.1
- Added deadline to in-progress RPCs. Added 20 minutes deadline to in-flight RPCs. If the RPC does not receive a response within 20 minutes, the client call will fail with DEADLINE_EXCEEDED. This will not be retried.
- Added keepAlive and keepAliveTimeout. The keepAlive makes periodic calls to grpc server and waits for reply. This happens only when an RPC is in-flight. If the ping does not reply within keepAliveTimeout timeframe, the call will fail with UNAVAILABLE. The RPC will then be retried by the Ybc Java client. Current values are 6 minutes for keepAlive and 2 minutes for keepAliveTimeout.

TEST
Tested with making modifications such that the call does not return in 20 minutes. The exception raised was DEADLINE_EXCEEDED as expected.

[PLAT-14576] Contains YBC commit: yugabyte/ybc@d423da2

Fix catalog version mechanism to use database being backed up

ISSUE
Earlier DB used to have a global catalog version so we used template1 DB to run the yb_catalog_version() sql command.
But now we have catalog version per DB so we should use the DB to be backed up to fetch the catalog version.

CHANGES
The PR changes catalog_version sql to connect to database being backed up.

Added grpc-inprocess 1.63.1 dependency.

Test Plan: Manually verified

Reviewers: kkg, nsingh

Reviewed By: kkg

Subscribers: yugaware

Differential Revision: https://phorge.dev.yugabyte.com/D36087
  • Loading branch information
kv83821-yb committed Jul 16, 2024
1 parent 68cb1d2 commit db445ce
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 16 deletions.
3 changes: 2 additions & 1 deletion managed/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ libraryDependencies ++= Seq(
"com.icegreen" % "greenmail-junit4" % "2.0.1" % Test,
"com.squareup.okhttp3" % "mockwebserver" % "4.9.2" % Test,
"io.grpc" % "grpc-testing" % "1.48.0" % Test,
"io.grpc" % "grpc-inprocess" % "1.63.1" % Test,
"io.zonky.test" % "embedded-postgres" % "2.0.1" % Test,
"org.springframework" % "spring-test" % "5.3.9" % Test,
"com.yugabyte" % "yba-client-v2" % "0.1.0-SNAPSHOT" % Test,
Expand Down Expand Up @@ -926,7 +927,7 @@ runPlatform := {
}

libraryDependencies += "org.yb" % "yb-client" % "0.8.92-SNAPSHOT"
libraryDependencies += "org.yb" % "ybc-client" % "2.1.0.0-b9"
libraryDependencies += "org.yb" % "ybc-client" % "2.2.0.0-b2"
libraryDependencies += "org.yb" % "yb-perf-advisor" % "1.0.0-b33"

libraryDependencies ++= Seq(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1363,4 +1363,55 @@ public class GlobalConfKeys extends RuntimeConfigKeysModule {
"Interval at which the XCluster Metrics Scheduler runs",
ConfDataType.DurationType,
ImmutableList.of(ConfKeyTags.BETA));
public static final ConfKeyInfo<Integer> ybcClientMaxUnavailableRetries =
new ConfKeyInfo<>(
"ybc.client_settings.max_unavailable_retries",
ScopeType.GLOBAL,
"Max retries on UNAVAILABLE status",
"Max client side retries when server returns UNAVAILABLE status",
ConfDataType.IntegerType,
ImmutableList.of(ConfKeyTags.BETA));
public static final ConfKeyInfo<Integer> ybcClientWaitEachUnavailableRetryMs =
new ConfKeyInfo<>(
"ybc.client_settings.wait_each_unavailable_retry_ms",
ScopeType.GLOBAL,
"Wait( in milliseconds ) between each retries on UNAVAILABLE status",
"Wait( in milliseconds ) between client side retries when server returns UNAVAILABLE"
+ " status",
ConfDataType.IntegerType,
ImmutableList.of(ConfKeyTags.BETA));
public static final ConfKeyInfo<Integer> ybcClientMaxInboundMsgSize =
new ConfKeyInfo<>(
"ybc.client_settings.max_inbound_msg_size_bytes",
ScopeType.GLOBAL,
"Max size of YB-Controller RPC response",
"Max size( in bytes ) of YB-Controller RPC response",
ConfDataType.IntegerType,
ImmutableList.of(ConfKeyTags.BETA));
public static final ConfKeyInfo<Integer> ybcClientDeadlineMs =
new ConfKeyInfo<>(
"ybc.client_settings.deadline_ms",
ScopeType.GLOBAL,
"Wait( in milliseconds ) for YB-Controller RPC response",
"Wait( in milliseconds ) for YB-Controller RPC response before throwing client-side"
+ " DEADLINE_EXCEEDED",
ConfDataType.IntegerType,
ImmutableList.of(ConfKeyTags.BETA));
public static final ConfKeyInfo<Integer> ybcClientKeepAlivePingsMs =
new ConfKeyInfo<>(
"ybc.client_settings.keep_alive_ping_ms",
ScopeType.GLOBAL,
"Wait between each KeepAlive ping to YB-Controller server",
"Wait( in milliseconds ) between each KeepAlive ping to YB-Controller server",
ConfDataType.IntegerType,
ImmutableList.of(ConfKeyTags.BETA));
public static final ConfKeyInfo<Integer> ybcClientKeepAlivePingsTimeoutMs =
new ConfKeyInfo<>(
"ybc.client_settings.keep_alive_ping_timeout_ms",
ScopeType.GLOBAL,
"Wait( in milliseconds ) for KeepAlive ping response from YB-Controller server",
"Wait( in milliseconds ) for KeepAlive ping response from YB-Controller server before"
+ " throwing UNAVAILABLE",
ConfDataType.IntegerType,
ImmutableList.of(ConfKeyTags.BETA));
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,83 @@

package com.yugabyte.yw.common.services;

import com.google.inject.Inject;
import com.yugabyte.yw.common.config.GlobalConfKeys;
import com.yugabyte.yw.common.config.RuntimeConfGetter;
import javax.inject.Singleton;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.yb.client.YbcClient;
import org.yb.client.YbcClient.YbcClientBuilder;
import org.yb.ybc.VersionRequest;
import org.yb.ybc.VersionResponse;

@Singleton
@Slf4j
public class YbcClientService {

private YbcClient getClient(String nodeIp, int ybcPort) {
try {
log.info("Creating ybc client for node: {} on port: {}", nodeIp, ybcPort);
return new YbcClient(nodeIp, ybcPort);
} catch (Exception e) {
throw new RuntimeException(
String.format("Error while creating YbcClient: %s", e.getMessage()));
}
private final RuntimeConfGetter confGetter;

@Inject
public YbcClientService(RuntimeConfGetter confGetter) {
this.confGetter = confGetter;
}

private Integer getMaxUnavailableRetries() {
return confGetter.getGlobalConf(GlobalConfKeys.ybcClientMaxUnavailableRetries);
}

private Integer getWaitEachUnavailableRetryMs() {
return confGetter.getGlobalConf(GlobalConfKeys.ybcClientWaitEachUnavailableRetryMs);
}

private Integer getMaxInboundMsgSize() {
return confGetter.getGlobalConf(GlobalConfKeys.ybcClientMaxInboundMsgSize);
}

private Integer getDeadlineMs() {
return confGetter.getGlobalConf(GlobalConfKeys.ybcClientDeadlineMs);
}

private Integer getMaxKeepAlivePingsMs() {
return confGetter.getGlobalConf(GlobalConfKeys.ybcClientKeepAlivePingsMs);
}

private Integer getMaxKeepAlivePingsTimeoutMs() {
return confGetter.getGlobalConf(GlobalConfKeys.ybcClientKeepAlivePingsTimeoutMs);
}

private YbcClient getClient(String nodeIp, int ybcPort, String certFile) {
try {
log.info(
"Creating ybc client for node: {} on port: {} with cert: {}", nodeIp, ybcPort, certFile);
return new YbcClient(nodeIp, ybcPort, certFile);
YbcClientBuilder clientBuilder = YbcClient.builder();
clientBuilder
.withYbcIp(nodeIp)
.withYbcPort(ybcPort)
.withNumRetries(getMaxUnavailableRetries())
.withWaitEachRetryMs(getWaitEachUnavailableRetryMs())
.withMaxInboundSize(getMaxInboundMsgSize())
.withDeadlineMs(getDeadlineMs())
.withKeepAliveMs(getMaxKeepAlivePingsMs())
.withKeepAliveTimeoutMs(getMaxKeepAlivePingsTimeoutMs());
if (StringUtils.isNotBlank(certFile)) {
clientBuilder.withCertsFilepath(certFile);
}
return clientBuilder.build();
} catch (Exception e) {
throw new RuntimeException(
String.format("Error while creating YbcClient: %s", e.getMessage()));
}
}

public YbcClient getNewClient(String nodeIp, int ybcPort, String certFile) {
if (certFile == null) {
return getClient(nodeIp, ybcPort);
if (StringUtils.isBlank(certFile)) {
log.info("Creating ybc client for node: {} on port: {}", nodeIp, ybcPort);
return getClient(nodeIp, ybcPort, null);
} else {
log.info(
"Creating ybc client for node: {} on port: {} with cert: {}", nodeIp, ybcPort, certFile);
return getClient(nodeIp, ybcPort, certFile);
}
return getClient(nodeIp, ybcPort, certFile);
}

public void closeClient(YbcClient client) {
Expand Down
10 changes: 9 additions & 1 deletion managed/src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -1248,7 +1248,7 @@ yb {

ybc {
releases {
stable_version = "2.2.0.0-b1"
stable_version = "2.2.0.0-b2"
path = "/opt/yugabyte/ybc/releases"
}
compatible_db_version = "2.15.0.0-b1"
Expand Down Expand Up @@ -1278,6 +1278,14 @@ ybc {
universe {
enabled = true
}
client_settings {
max_unavailable_retries = 10
wait_each_unavailable_retry_ms = 2000
max_inbound_msg_size_bytes = 104857600
deadline_ms = 1200000
keep_alive_ping_ms = 360000
keep_alive_ping_timeout_ms = 120000
}
}

runtime_config {
Expand Down

0 comments on commit db445ce

Please sign in to comment.