Skip to content

Commit

Permalink
xds: Add xDS node ID in few control plane errors (#11519)
Browse files Browse the repository at this point in the history
  • Loading branch information
DNVindhya authored Sep 12, 2024
1 parent 15cd2f9 commit f3cf7c3
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 22 deletions.
18 changes: 11 additions & 7 deletions xds/src/main/java/io/grpc/xds/CdsLoadBalancer2.java
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,9 @@ private void handleClusterDiscovered() {
}
loopStatus = Status.UNAVAILABLE.withDescription(String.format(
"CDS error: circular aggregate clusters directly under %s for "
+ "root cluster %s, named %s",
clusterState.name, root.name, namesCausingLoops));
+ "root cluster %s, named %s, xDS node ID: %s",
clusterState.name, root.name, namesCausingLoops,
xdsClient.getBootstrapInfo().node().getId()));
}
}
}
Expand All @@ -224,9 +225,9 @@ private void handleClusterDiscovered() {
childLb.shutdown();
childLb = null;
}
Status unavailable =
Status.UNAVAILABLE.withDescription("CDS error: found 0 leaf (logical DNS or EDS) "
+ "clusters for root cluster " + root.name);
Status unavailable = Status.UNAVAILABLE.withDescription(String.format(
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster %s"
+ " xDS node ID: %s", root.name, xdsClient.getBootstrapInfo().node().getId()));
helper.updateBalancingState(
TRANSIENT_FAILURE, new FixedResultPicker(PickResult.withError(unavailable)));
return;
Expand Down Expand Up @@ -288,11 +289,14 @@ private void addAncestors(Set<String> ancestors, ClusterState clusterState,
}

private void handleClusterDiscoveryError(Status error) {
String description = error.getDescription() == null ? "" : error.getDescription() + " ";
Status errorWithNodeId = error.withDescription(
description + "xDS node ID: " + xdsClient.getBootstrapInfo().node().getId());
if (childLb != null) {
childLb.handleNameResolutionError(error);
childLb.handleNameResolutionError(errorWithNodeId);
} else {
helper.updateBalancingState(
TRANSIENT_FAILURE, new FixedResultPicker(PickResult.withError(error)));
TRANSIENT_FAILURE, new FixedResultPicker(PickResult.withError(errorWithNodeId)));
}
}

Expand Down
4 changes: 3 additions & 1 deletion xds/src/main/java/io/grpc/xds/XdsNameResolver.java
Original file line number Diff line number Diff line change
Expand Up @@ -815,10 +815,12 @@ private void cleanUpRoutes(String error) {
// the config selector handles the error message itself. Once the LB API allows providing
// failure information for addresses yet still providing a service config, the config seector
// could be avoided.
String errorWithNodeId =
error + ", xDS node ID: " + xdsClient.getBootstrapInfo().node().getId();
listener.onResult(ResolutionResult.newBuilder()
.setAttributes(Attributes.newBuilder()
.set(InternalConfigSelector.KEY,
new FailingConfigSelector(Status.UNAVAILABLE.withDescription(error)))
new FailingConfigSelector(Status.UNAVAILABLE.withDescription(errorWithNodeId)))
.build())
.setServiceConfig(emptyServiceConfig)
.build());
Expand Down
15 changes: 11 additions & 4 deletions xds/src/main/java/io/grpc/xds/XdsServerWrapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,8 @@ public void onResourceDoesNotExist(final String resourceName) {
return;
}
StatusException statusException = Status.UNAVAILABLE.withDescription(
"Listener " + resourceName + " unavailable").asException();
String.format("Listener %s unavailable, xDS node ID: %s", resourceName,
xdsClient.getBootstrapInfo().node().getId())).asException();
handleConfigNotFound(statusException);
}

Expand All @@ -434,9 +435,12 @@ public void onError(final Status error) {
if (stopped) {
return;
}
logger.log(Level.FINE, "Error from XdsClient", error);
String description = error.getDescription() == null ? "" : error.getDescription() + " ";
Status errorWithNodeId = error.withDescription(
description + "xDS node ID: " + xdsClient.getBootstrapInfo().node().getId());
logger.log(Level.FINE, "Error from XdsClient", errorWithNodeId);
if (!isServing) {
listener.onNotServing(error.asException());
listener.onNotServing(errorWithNodeId.asException());
}
}

Expand Down Expand Up @@ -664,8 +668,11 @@ public void run() {
if (!routeDiscoveryStates.containsKey(resourceName)) {
return;
}
String description = error.getDescription() == null ? "" : error.getDescription() + " ";
Status errorWithNodeId = error.withDescription(
description + "xDS node ID: " + xdsClient.getBootstrapInfo().node().getId());
logger.log(Level.WARNING, "Error loading RDS resource {0} from XdsClient: {1}.",
new Object[]{resourceName, error});
new Object[]{resourceName, errorWithNodeId});
maybeUpdateSelector();
}
});
Expand Down
44 changes: 34 additions & 10 deletions xds/src/test/java/io/grpc/xds/CdsLoadBalancer2Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@
import io.grpc.xds.LeastRequestLoadBalancer.LeastRequestConfig;
import io.grpc.xds.RingHashLoadBalancer.RingHashConfig;
import io.grpc.xds.XdsClusterResource.CdsUpdate;
import io.grpc.xds.client.Bootstrapper.BootstrapInfo;
import io.grpc.xds.client.Bootstrapper.ServerInfo;
import io.grpc.xds.client.EnvoyProtoData;
import io.grpc.xds.client.XdsClient;
import io.grpc.xds.client.XdsResourceType;
import io.grpc.xds.internal.security.CommonTlsContextTestsUtil;
Expand Down Expand Up @@ -94,6 +96,16 @@ public class CdsLoadBalancer2Test {
private static final String DNS_HOST_NAME = "backend-service-dns.googleapis.com:443";
private static final ServerInfo LRS_SERVER_INFO =
ServerInfo.create("lrs.googleapis.com", InsecureChannelCredentials.create());
private static final String SERVER_URI = "trafficdirector.googleapis.com";
private static final String NODE_ID =
"projects/42/networks/default/nodes/5c85b298-6f5b-4722-b74a-f7d1f0ccf5ad";
private static final EnvoyProtoData.Node BOOTSTRAP_NODE =
EnvoyProtoData.Node.newBuilder().setId(NODE_ID).build();
private static final BootstrapInfo BOOTSTRAP_INFO = BootstrapInfo.builder()
.servers(ImmutableList.of(
ServerInfo.create(SERVER_URI, InsecureChannelCredentials.create())))
.node(BOOTSTRAP_NODE)
.build();
private final UpstreamTlsContext upstreamTlsContext =
CommonTlsContextTestsUtil.buildUpstreamTlsContext("google_cloud_private_spiffe", true);
private final OutlierDetection outlierDetection = OutlierDetection.create(
Expand Down Expand Up @@ -211,7 +223,8 @@ public void nonAggregateCluster_resourceNotExist_returnErrorPicker() {
verify(helper).updateBalancingState(
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
Status unavailable = Status.UNAVAILABLE.withDescription(
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
+ " xDS node ID: " + NODE_ID);
assertPicker(pickerCaptor.getValue(), unavailable, null);
assertThat(childBalancers).isEmpty();
}
Expand Down Expand Up @@ -254,7 +267,8 @@ public void nonAggregateCluster_resourceRevoked() {
xdsClient.deliverResourceNotExist(CLUSTER);
assertThat(childBalancer.shutdown).isTrue();
Status unavailable = Status.UNAVAILABLE.withDescription(
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
+ " xDS node ID: " + NODE_ID);
verify(helper).updateBalancingState(
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
assertPicker(pickerCaptor.getValue(), unavailable, null);
Expand Down Expand Up @@ -331,7 +345,8 @@ public void aggregateCluster_noNonAggregateClusterExits_returnErrorPicker() {
verify(helper).updateBalancingState(
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
Status unavailable = Status.UNAVAILABLE.withDescription(
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
+ " xDS node ID: " + NODE_ID);
assertPicker(pickerCaptor.getValue(), unavailable, null);
assertThat(childBalancers).isEmpty();
}
Expand Down Expand Up @@ -379,7 +394,8 @@ public void aggregateCluster_descendantClustersRevoked() {
verify(helper).updateBalancingState(
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
Status unavailable = Status.UNAVAILABLE.withDescription(
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
+ " xDS node ID: " + NODE_ID);
assertPicker(pickerCaptor.getValue(), unavailable, null);
assertThat(childBalancer.shutdown).isTrue();
assertThat(childBalancers).isEmpty();
Expand Down Expand Up @@ -418,7 +434,8 @@ public void aggregateCluster_rootClusterRevoked() {
verify(helper).updateBalancingState(
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
Status unavailable = Status.UNAVAILABLE.withDescription(
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
+ " xDS node ID: " + NODE_ID);
assertPicker(pickerCaptor.getValue(), unavailable, null);
assertThat(childBalancer.shutdown).isTrue();
assertThat(childBalancers).isEmpty();
Expand Down Expand Up @@ -466,7 +483,8 @@ public void aggregateCluster_intermediateClusterChanges() {
verify(helper).updateBalancingState(
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
Status unavailable = Status.UNAVAILABLE.withDescription(
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
+ " xDS node ID: " + NODE_ID);
assertPicker(pickerCaptor.getValue(), unavailable, null);
assertThat(childBalancer.shutdown).isTrue();
assertThat(childBalancers).isEmpty();
Expand Down Expand Up @@ -507,7 +525,7 @@ public void aggregateCluster_withLoops() {
Status unavailable = Status.UNAVAILABLE.withDescription(
"CDS error: circular aggregate clusters directly under cluster-02.googleapis.com for root"
+ " cluster cluster-foo.googleapis.com, named [cluster-01.googleapis.com,"
+ " cluster-02.googleapis.com]");
+ " cluster-02.googleapis.com], xDS node ID: " + NODE_ID);
assertPicker(pickerCaptor.getValue(), unavailable, null);
}

Expand Down Expand Up @@ -549,7 +567,7 @@ public void aggregateCluster_withLoops_afterEds() {
Status unavailable = Status.UNAVAILABLE.withDescription(
"CDS error: circular aggregate clusters directly under cluster-02.googleapis.com for root"
+ " cluster cluster-foo.googleapis.com, named [cluster-01.googleapis.com,"
+ " cluster-02.googleapis.com]");
+ " cluster-02.googleapis.com], xDS node ID: " + NODE_ID);
assertPicker(pickerCaptor.getValue(), unavailable, null);
}

Expand Down Expand Up @@ -617,7 +635,7 @@ public void aggregateCluster_discoveryErrorBeforeChildLbCreated_returnErrorPicke
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
Status expectedError = Status.UNAVAILABLE.withDescription(
"Unable to load CDS cluster-foo.googleapis.com. xDS server returned: "
+ "RESOURCE_EXHAUSTED: OOM");
+ "RESOURCE_EXHAUSTED: OOM xDS node ID: " + NODE_ID);
assertPicker(pickerCaptor.getValue(), expectedError, null);
assertThat(childBalancers).isEmpty();
}
Expand Down Expand Up @@ -647,7 +665,8 @@ public void aggregateCluster_discoveryErrorAfterChildLbCreated_propagateToChildL

@Test
public void handleNameResolutionErrorFromUpstream_beforeChildLbCreated_returnErrorPicker() {
Status upstreamError = Status.UNAVAILABLE.withDescription("unreachable");
Status upstreamError = Status.UNAVAILABLE.withDescription(
"unreachable xDS node ID: " + NODE_ID);
loadBalancer.handleNameResolutionError(upstreamError);
verify(helper).updateBalancingState(
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
Expand Down Expand Up @@ -821,6 +840,11 @@ public <T extends ResourceUpdate> void cancelXdsResourceWatch(XdsResourceType<T>
}
}

@Override
public BootstrapInfo getBootstrapInfo() {
return BOOTSTRAP_INFO;
}

private void deliverCdsUpdate(String clusterName, CdsUpdate update) {
if (watchers.containsKey(clusterName)) {
List<ResourceWatcher<CdsUpdate>> resourceWatchers =
Expand Down

0 comments on commit f3cf7c3

Please sign in to comment.