Skip to content

Commit

Permalink
Fix NOT_STARTED statuses appearing inappropirately during node shut…
Browse files Browse the repository at this point in the history
…down (#75750) (#76634)

* Fix `NOT_STARTED` statuses appearing inappropirately during node shutdown (#75750)

This PR fixes two situations where `NOT_STARTED` can appear as the shard migration status inappropriately:
1. When the node is actually shut down after having all the shards migrate away.
2. When a non-data-node is registered for shutdown.

It also adds tests to ensure these cases are handled correctly.

* Fix compilation for backport

* Fix compilation for backport
  • Loading branch information
gwbrown authored Aug 18, 2021
1 parent 7d23843 commit f5df6b9
Show file tree
Hide file tree
Showing 8 changed files with 357 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
public static final String STARTED_AT_READABLE_FIELD = "shutdown_started";
public static final ParseField STARTED_AT_MILLIS_FIELD = new ParseField(STARTED_AT_READABLE_FIELD + "millis");
public static final ParseField ALLOCATION_DELAY_FIELD = new ParseField("allocation_delay");
public static final ParseField NODE_SEEN_FIELD = new ParseField("node_seen");

public static final ConstructingObjectParser<SingleNodeShutdownMetadata, Void> PARSER = new ConstructingObjectParser<>(
"node_shutdown_info",
Expand All @@ -47,7 +48,8 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
Type.valueOf((String) a[1]),
(String) a[2],
(long) a[3],
(TimeValue) a[4]
(boolean) a[4],
(TimeValue) a[5]
)
);

Expand All @@ -56,6 +58,7 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
PARSER.declareString(ConstructingObjectParser.constructorArg(), TYPE_FIELD);
PARSER.declareString(ConstructingObjectParser.constructorArg(), REASON_FIELD);
PARSER.declareLong(ConstructingObjectParser.constructorArg(), STARTED_AT_MILLIS_FIELD);
PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), NODE_SEEN_FIELD);
PARSER.declareField(
ConstructingObjectParser.optionalConstructorArg(),
(p, c) -> TimeValue.parseTimeValue(p.textOrNull(), ALLOCATION_DELAY_FIELD.getPreferredName()), ALLOCATION_DELAY_FIELD,
Expand All @@ -73,6 +76,7 @@ public static SingleNodeShutdownMetadata parse(XContentParser parser) {
private final Type type;
private final String reason;
private final long startedAtMillis;
private final boolean nodeSeen;
@Nullable private final TimeValue allocationDelay;

/**
Expand All @@ -86,12 +90,14 @@ private SingleNodeShutdownMetadata(
Type type,
String reason,
long startedAtMillis,
boolean nodeSeen,
@Nullable TimeValue allocationDelay
) {
this.nodeId = Objects.requireNonNull(nodeId, "node ID must not be null");
this.type = Objects.requireNonNull(type, "shutdown type must not be null");
this.reason = Objects.requireNonNull(reason, "shutdown reason must not be null");
this.startedAtMillis = startedAtMillis;
this.nodeSeen = nodeSeen;
if (allocationDelay != null && Type.RESTART.equals(type) == false) {
throw new IllegalArgumentException("shard allocation delay is only valid for RESTART-type shutdowns");
}
Expand All @@ -103,6 +109,7 @@ public SingleNodeShutdownMetadata(StreamInput in) throws IOException {
this.type = in.readEnum(Type.class);
this.reason = in.readString();
this.startedAtMillis = in.readVLong();
this.nodeSeen = in.readBoolean();
this.allocationDelay = in.readOptionalTimeValue();
}

Expand Down Expand Up @@ -134,6 +141,13 @@ public long getStartedAtMillis() {
return startedAtMillis;
}

/**
* @return A boolean indicated whether this node has been seen in the cluster since the shutdown was registered.
*/
public boolean getNodeSeen() {
return nodeSeen;
}

/**
* @return The amount of time shard reallocation should be delayed for shards on this node, so that they will not be automatically
* reassigned while the node is restarting. Will be {@code null} for non-restart shutdowns.
Expand All @@ -154,6 +168,7 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeEnum(type);
out.writeString(reason);
out.writeVLong(startedAtMillis);
out.writeBoolean(nodeSeen);
out.writeOptionalTimeValue(allocationDelay);
}

Expand All @@ -165,6 +180,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
builder.field(TYPE_FIELD.getPreferredName(), type);
builder.field(REASON_FIELD.getPreferredName(), reason);
builder.timeField(STARTED_AT_MILLIS_FIELD.getPreferredName(), STARTED_AT_READABLE_FIELD, startedAtMillis);
builder.field(NODE_SEEN_FIELD.getPreferredName(), nodeSeen);
if (allocationDelay != null) {
builder.field(ALLOCATION_DELAY_FIELD.getPreferredName(), allocationDelay.getStringRep());
}
Expand All @@ -183,6 +199,7 @@ public boolean equals(Object o) {
&& getNodeId().equals(that.getNodeId())
&& getType() == that.getType()
&& getReason().equals(that.getReason())
&& getNodeSeen() == that.getNodeSeen()
&& Objects.equals(allocationDelay, that.allocationDelay);
}

Expand All @@ -193,6 +210,7 @@ public int hashCode() {
getType(),
getReason(),
getStartedAtMillis(),
getNodeSeen(),
allocationDelay
);
}
Expand All @@ -209,14 +227,16 @@ public static Builder builder(SingleNodeShutdownMetadata original) {
.setNodeId(original.getNodeId())
.setType(original.getType())
.setReason(original.getReason())
.setStartedAtMillis(original.getStartedAtMillis());
.setStartedAtMillis(original.getStartedAtMillis())
.setNodeSeen(original.getNodeSeen());
}

public static class Builder {
private String nodeId;
private Type type;
private String reason;
private long startedAtMillis = -1;
private boolean nodeSeen = false;
private TimeValue allocationDelay;

private Builder() {}
Expand Down Expand Up @@ -257,6 +277,15 @@ public Builder setStartedAtMillis(long startedAtMillis) {
return this;
}

/**
* @param nodeSeen Whether or not the node has been seen since the shutdown was registered.
* @return This builder.
*/
public Builder setNodeSeen(boolean nodeSeen) {
this.nodeSeen = nodeSeen;
return this;
}

/**
* @param allocationDelay The amount of time shard reallocation should be delayed while this node is offline.
* @return This builder.
Expand All @@ -275,7 +304,9 @@ public SingleNodeShutdownMetadata build() {
nodeId,
type,
reason,
startedAtMillis, allocationDelay
startedAtMillis,
nodeSeen,
allocationDelay
);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ private SingleNodeShutdownMetadata randomNodeShutdownInfo() {
if (type.equals(SingleNodeShutdownMetadata.Type.RESTART) && randomBoolean()) {
builder.setAllocationDelay(TimeValue.parseTimeValue(randomTimeValue(), this.getTestName()));
}
return builder.build();
return builder.setNodeSeen(randomBoolean())
.build();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.shutdown;

import org.elasticsearch.Build;
import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.InternalTestCluster;

import java.util.Arrays;
import java.util.Collection;

import static org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata.Status.COMPLETE;
import static org.hamcrest.Matchers.equalTo;

@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, numClientNodes = 0, transportClientRatio = 0)
public class NodeShutdownShardsIT extends ESIntegTestCase {

@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(ShutdownPlugin.class);
}

/**
* Verifies that a node that's removed from the cluster with zero shards stays in the `COMPLETE` status after it leaves, rather than
* reverting to `NOT_STARTED` (this was a bug in the initial implementation).
*/
public void testShardStatusStaysCompleteAfterNodeLeaves() throws Exception {
assumeTrue("must be on a snapshot build of ES to run in order for the feature flag to be set", Build.CURRENT.isSnapshot());
final String nodeToRestartName = internalCluster().startNode();
final String nodeToRestartId = getNodeId(nodeToRestartName);
internalCluster().startNode();

// Mark the node for shutdown
PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request(
nodeToRestartId,
SingleNodeShutdownMetadata.Type.REMOVE,
this.getTestName(),
null
);
AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get();
assertTrue(putShutdownResponse.isAcknowledged());

internalCluster().stopNode(nodeToRestartName);

NodesInfoResponse nodes = client().admin().cluster().prepareNodesInfo().clear().get();
assertThat(nodes.getNodes().size(), equalTo(1));

GetShutdownStatusAction.Response getResp = client().execute(
GetShutdownStatusAction.INSTANCE,
new GetShutdownStatusAction.Request(nodeToRestartId)
).get();

assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE));
}

/**
* Similar to the previous test, but ensures that the status stays at `COMPLETE` when the node is offline when the shutdown is
* registered. This may happen if {@link NodeSeenService} isn't working as expected.
*/
public void testShardStatusStaysCompleteAfterNodeLeavesIfRegisteredWhileNodeOffline() throws Exception {
assumeTrue("must be on a snapshot build of ES to run in order for the feature flag to be set", Build.CURRENT.isSnapshot());
final String nodeToRestartName = internalCluster().startNode();
final String nodeToRestartId = getNodeId(nodeToRestartName);
internalCluster().startNode();

// Stop the node we're going to shut down and mark it as shutting down while it's offline. This checks that the cluster state
// listener is working correctly.
internalCluster().restartNode(nodeToRestartName, new InternalTestCluster.RestartCallback() {
@Override
public Settings onNodeStopped(String nodeName) throws Exception {
PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request(
nodeToRestartId,
SingleNodeShutdownMetadata.Type.REMOVE,
"testShardStatusStaysCompleteAfterNodeLeavesIfRegisteredWhileNodeOffline",
null
);
AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get();
assertTrue(putShutdownResponse.isAcknowledged());

return super.onNodeStopped(nodeName);
}
});

internalCluster().stopNode(nodeToRestartName);

NodesInfoResponse nodes = client().admin().cluster().prepareNodesInfo().clear().get();
assertThat(nodes.getNodes().size(), equalTo(1));

GetShutdownStatusAction.Response getResp = client().execute(
GetShutdownStatusAction.INSTANCE,
new GetShutdownStatusAction.Request(nodeToRestartId)
).get();

assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE));
}

/**
* Checks that non-data nodes that are registered for shutdown have a shard migration status of `COMPLETE` rather than `NOT_STARTED`.
* (this was a bug in the initial implementation).
*/
public void testShardStatusIsCompleteOnNonDataNodes() throws Exception {
assumeTrue("must be on a snapshot build of ES to run in order for the feature flag to be set", Build.CURRENT.isSnapshot());
final String nodeToShutDownName = internalCluster().startMasterOnlyNode();
internalCluster().startMasterOnlyNode(); // Just to have at least one other node
final String nodeToRestartId = getNodeId(nodeToShutDownName);

// Mark the node for shutdown
PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request(
nodeToRestartId,
SingleNodeShutdownMetadata.Type.REMOVE,
this.getTestName(),
null
);
AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get();
assertTrue(putShutdownResponse.isAcknowledged());

GetShutdownStatusAction.Response getResp = client().execute(
GetShutdownStatusAction.INSTANCE,
new GetShutdownStatusAction.Request(nodeToRestartId)
).get();

assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE));
}

private String getNodeId(String nodeName) throws Exception {
NodesInfoResponse nodes = client().admin().cluster().prepareNodesInfo().clear().get();
return nodes.getNodes()
.stream()
.map(NodeInfo::getNode)
.filter(node -> node.getName().equals(nodeName))
.map(DiscoveryNode::getId)
.findFirst()
.orElseThrow(() -> new AssertionError("requested node name [" + nodeName + "] not found"));
}
}
Loading

0 comments on commit f5df6b9

Please sign in to comment.