Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix NOT_STARTED statuses appearing inappropirately during node shutdown #75750

Merged
merged 23 commits into from
Aug 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8021fab
Add (failing) test to reproduce bug
gwbrown Jul 27, 2021
f3960d0
Add another test for another case of the bug
gwbrown Jul 27, 2021
2aed2f1
Minor cleanup
gwbrown Jul 27, 2021
d619654
Remove unused imports
gwbrown Jul 27, 2021
946bdd4
Merge branch 'master' into decom/not-started-bug
gwbrown Aug 2, 2021
9a0f6d5
Fix `NOT_STARTED` shard migration status appearing for non-data nodes
gwbrown Aug 2, 2021
d3b68d6
Merge branch 'master' into decom/not-started-bug
gwbrown Aug 12, 2021
2d740f9
Imports
gwbrown Aug 12, 2021
14f681f
spotless
gwbrown Aug 12, 2021
ca88ade
Add node-seen tracking to shutdown metadata
gwbrown Aug 12, 2021
8a7f0de
Fix test names
gwbrown Aug 12, 2021
aafb153
Actually start the service + add test to make sure we did
gwbrown Aug 12, 2021
e8e7914
ShutdownService javadoc
gwbrown Aug 13, 2021
65fcfdd
Add `nodesAdded()` check
gwbrown Aug 13, 2021
3e371fa
Check both nodesNotPreviouslySeen and current known nodes in update task
gwbrown Aug 13, 2021
04957be
Check to make sure the update isn't a no-op before making a new Clust…
gwbrown Aug 13, 2021
749a821
Test javadoc
gwbrown Aug 13, 2021
f6c7f42
Merge branch 'master' into decom/not-started-bug
gwbrown Aug 13, 2021
a090c7f
spotless
gwbrown Aug 13, 2021
52230a4
Merge branch 'master' into decom/not-started-bug
gwbrown Aug 17, 2021
d3f31ed
Merge branch 'master' into decom/not-started-bug
gwbrown Aug 17, 2021
8832c42
ShutdownService -> NodeSeenService
gwbrown Aug 17, 2021
960b353
Merge branch 'master' into decom/not-started-bug
gwbrown Aug 17, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
public static final String STARTED_AT_READABLE_FIELD = "shutdown_started";
public static final ParseField STARTED_AT_MILLIS_FIELD = new ParseField(STARTED_AT_READABLE_FIELD + "millis");
public static final ParseField ALLOCATION_DELAY_FIELD = new ParseField("allocation_delay");
public static final ParseField NODE_SEEN_FIELD = new ParseField("node_seen");

public static final ConstructingObjectParser<SingleNodeShutdownMetadata, Void> PARSER = new ConstructingObjectParser<>(
"node_shutdown_info",
Expand All @@ -47,7 +48,8 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
Type.valueOf((String) a[1]),
(String) a[2],
(long) a[3],
(TimeValue) a[4]
(boolean) a[4],
(TimeValue) a[5]
)
);

Expand All @@ -56,6 +58,7 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
PARSER.declareString(ConstructingObjectParser.constructorArg(), TYPE_FIELD);
PARSER.declareString(ConstructingObjectParser.constructorArg(), REASON_FIELD);
PARSER.declareLong(ConstructingObjectParser.constructorArg(), STARTED_AT_MILLIS_FIELD);
PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), NODE_SEEN_FIELD);
PARSER.declareField(
ConstructingObjectParser.optionalConstructorArg(),
(p, c) -> TimeValue.parseTimeValue(p.textOrNull(), ALLOCATION_DELAY_FIELD.getPreferredName()), ALLOCATION_DELAY_FIELD,
Expand All @@ -73,6 +76,7 @@ public static SingleNodeShutdownMetadata parse(XContentParser parser) {
private final Type type;
private final String reason;
private final long startedAtMillis;
private final boolean nodeSeen;
@Nullable private final TimeValue allocationDelay;

/**
Expand All @@ -86,12 +90,14 @@ private SingleNodeShutdownMetadata(
Type type,
String reason,
long startedAtMillis,
boolean nodeSeen,
@Nullable TimeValue allocationDelay
) {
this.nodeId = Objects.requireNonNull(nodeId, "node ID must not be null");
this.type = Objects.requireNonNull(type, "shutdown type must not be null");
this.reason = Objects.requireNonNull(reason, "shutdown reason must not be null");
this.startedAtMillis = startedAtMillis;
this.nodeSeen = nodeSeen;
if (allocationDelay != null && Type.RESTART.equals(type) == false) {
throw new IllegalArgumentException("shard allocation delay is only valid for RESTART-type shutdowns");
}
Expand All @@ -103,6 +109,7 @@ public SingleNodeShutdownMetadata(StreamInput in) throws IOException {
this.type = in.readEnum(Type.class);
this.reason = in.readString();
this.startedAtMillis = in.readVLong();
this.nodeSeen = in.readBoolean();
this.allocationDelay = in.readOptionalTimeValue();
}

Expand Down Expand Up @@ -134,6 +141,13 @@ public long getStartedAtMillis() {
return startedAtMillis;
}

/**
* @return A boolean indicated whether this node has been seen in the cluster since the shutdown was registered.
*/
public boolean getNodeSeen() {
return nodeSeen;
}

/**
* @return The amount of time shard reallocation should be delayed for shards on this node, so that they will not be automatically
* reassigned while the node is restarting. Will be {@code null} for non-restart shutdowns.
Expand All @@ -154,6 +168,7 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeEnum(type);
out.writeString(reason);
out.writeVLong(startedAtMillis);
out.writeBoolean(nodeSeen);
out.writeOptionalTimeValue(allocationDelay);
}

Expand All @@ -165,6 +180,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
builder.field(TYPE_FIELD.getPreferredName(), type);
builder.field(REASON_FIELD.getPreferredName(), reason);
builder.timeField(STARTED_AT_MILLIS_FIELD.getPreferredName(), STARTED_AT_READABLE_FIELD, startedAtMillis);
builder.field(NODE_SEEN_FIELD.getPreferredName(), nodeSeen);
if (allocationDelay != null) {
builder.field(ALLOCATION_DELAY_FIELD.getPreferredName(), allocationDelay.getStringRep());
}
Expand All @@ -183,6 +199,7 @@ public boolean equals(Object o) {
&& getNodeId().equals(that.getNodeId())
&& getType() == that.getType()
&& getReason().equals(that.getReason())
&& getNodeSeen() == that.getNodeSeen()
&& Objects.equals(allocationDelay, that.allocationDelay);
}

Expand All @@ -193,6 +210,7 @@ public int hashCode() {
getType(),
getReason(),
getStartedAtMillis(),
getNodeSeen(),
allocationDelay
);
}
Expand All @@ -209,14 +227,16 @@ public static Builder builder(SingleNodeShutdownMetadata original) {
.setNodeId(original.getNodeId())
.setType(original.getType())
.setReason(original.getReason())
.setStartedAtMillis(original.getStartedAtMillis());
.setStartedAtMillis(original.getStartedAtMillis())
.setNodeSeen(original.getNodeSeen());
}

public static class Builder {
private String nodeId;
private Type type;
private String reason;
private long startedAtMillis = -1;
private boolean nodeSeen = false;
private TimeValue allocationDelay;

private Builder() {}
Expand Down Expand Up @@ -257,6 +277,15 @@ public Builder setStartedAtMillis(long startedAtMillis) {
return this;
}

/**
* @param nodeSeen Whether or not the node has been seen since the shutdown was registered.
* @return This builder.
*/
public Builder setNodeSeen(boolean nodeSeen) {
this.nodeSeen = nodeSeen;
return this;
}

/**
* @param allocationDelay The amount of time shard reallocation should be delayed while this node is offline.
* @return This builder.
Expand All @@ -275,7 +304,9 @@ public SingleNodeShutdownMetadata build() {
nodeId,
type,
reason,
startedAtMillis, allocationDelay
startedAtMillis,
nodeSeen,
allocationDelay
);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ private SingleNodeShutdownMetadata randomNodeShutdownInfo() {
if (type.equals(SingleNodeShutdownMetadata.Type.RESTART) && randomBoolean()) {
builder.setAllocationDelay(TimeValue.parseTimeValue(randomTimeValue(), this.getTestName()));
}
return builder.build();
return builder.setNodeSeen(randomBoolean())
.build();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.shutdown;

import org.elasticsearch.Build;
import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.InternalTestCluster;

import java.util.Arrays;
import java.util.Collection;

import static org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata.Status.COMPLETE;
import static org.hamcrest.Matchers.equalTo;

@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, numClientNodes = 0)
public class NodeShutdownShardsIT extends ESIntegTestCase {

@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(ShutdownPlugin.class);
}

/**
* Verifies that a node that's removed from the cluster with zero shards stays in the `COMPLETE` status after it leaves, rather than
* reverting to `NOT_STARTED` (this was a bug in the initial implementation).
*/
public void testShardStatusStaysCompleteAfterNodeLeaves() throws Exception {
assumeTrue("must be on a snapshot build of ES to run in order for the feature flag to be set", Build.CURRENT.isSnapshot());
final String nodeToRestartName = internalCluster().startNode();
final String nodeToRestartId = getNodeId(nodeToRestartName);
internalCluster().startNode();

// Mark the node for shutdown
PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request(
nodeToRestartId,
SingleNodeShutdownMetadata.Type.REMOVE,
this.getTestName(),
null
);
AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get();
assertTrue(putShutdownResponse.isAcknowledged());

internalCluster().stopNode(nodeToRestartName);

NodesInfoResponse nodes = client().admin().cluster().prepareNodesInfo().clear().get();
assertThat(nodes.getNodes().size(), equalTo(1));

GetShutdownStatusAction.Response getResp = client().execute(
GetShutdownStatusAction.INSTANCE,
new GetShutdownStatusAction.Request(nodeToRestartId)
).get();

assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE));
}

/**
* Similar to the previous test, but ensures that the status stays at `COMPLETE` when the node is offline when the shutdown is
* registered. This may happen if {@link NodeSeenService} isn't working as expected.
*/
public void testShardStatusStaysCompleteAfterNodeLeavesIfRegisteredWhileNodeOffline() throws Exception {
assumeTrue("must be on a snapshot build of ES to run in order for the feature flag to be set", Build.CURRENT.isSnapshot());
final String nodeToRestartName = internalCluster().startNode();
final String nodeToRestartId = getNodeId(nodeToRestartName);
internalCluster().startNode();

// Stop the node we're going to shut down and mark it as shutting down while it's offline. This checks that the cluster state
// listener is working correctly.
internalCluster().restartNode(nodeToRestartName, new InternalTestCluster.RestartCallback() {
@Override
public Settings onNodeStopped(String nodeName) throws Exception {
PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request(
nodeToRestartId,
SingleNodeShutdownMetadata.Type.REMOVE,
"testShardStatusStaysCompleteAfterNodeLeavesIfRegisteredWhileNodeOffline",
null
);
AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get();
assertTrue(putShutdownResponse.isAcknowledged());

return super.onNodeStopped(nodeName);
}
});

internalCluster().stopNode(nodeToRestartName);

NodesInfoResponse nodes = client().admin().cluster().prepareNodesInfo().clear().get();
assertThat(nodes.getNodes().size(), equalTo(1));

GetShutdownStatusAction.Response getResp = client().execute(
GetShutdownStatusAction.INSTANCE,
new GetShutdownStatusAction.Request(nodeToRestartId)
).get();

assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE));
}

/**
* Checks that non-data nodes that are registered for shutdown have a shard migration status of `COMPLETE` rather than `NOT_STARTED`.
* (this was a bug in the initial implementation).
*/
public void testShardStatusIsCompleteOnNonDataNodes() throws Exception {
assumeTrue("must be on a snapshot build of ES to run in order for the feature flag to be set", Build.CURRENT.isSnapshot());
final String nodeToShutDownName = internalCluster().startMasterOnlyNode();
internalCluster().startMasterOnlyNode(); // Just to have at least one other node
final String nodeToRestartId = getNodeId(nodeToShutDownName);

// Mark the node for shutdown
PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request(
nodeToRestartId,
SingleNodeShutdownMetadata.Type.REMOVE,
this.getTestName(),
null
);
AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get();
assertTrue(putShutdownResponse.isAcknowledged());

GetShutdownStatusAction.Response getResp = client().execute(
GetShutdownStatusAction.INSTANCE,
new GetShutdownStatusAction.Request(nodeToRestartId)
).get();

assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE));
}

private String getNodeId(String nodeName) throws Exception {
NodesInfoResponse nodes = client().admin().cluster().prepareNodesInfo().clear().get();
return nodes.getNodes()
.stream()
.map(NodeInfo::getNode)
.filter(node -> node.getName().equals(nodeName))
.map(DiscoveryNode::getId)
.findFirst()
.orElseThrow();
}
}
Loading