Fix NOT_STARTED statuses appearing inappropirately during node shut…

…down (elastic#75750) This PR fixes two situations where `NOT_STARTED` can appear as the shard migration status inappropriately: 1. When the node is actually shut down after having all the shards migrate away. 2. When a non-data-node is registered for shutdown. It also adds tests to ensure these cases are handled correctly.
gwbrown · Aug 18, 2021 · 22f257b · 22f257b
1 parent 608aced
commit 22f257b
Show file tree

Hide file tree

Showing 8 changed files with 357 additions and 28 deletions.
diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/SingleNodeShutdownMetadata.java b/server/src/main/java/org/elasticsearch/cluster/metadata/SingleNodeShutdownMetadata.java
@@ -39,6 +39,7 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
     public static final String STARTED_AT_READABLE_FIELD = "shutdown_started";
     public static final ParseField STARTED_AT_MILLIS_FIELD = new ParseField(STARTED_AT_READABLE_FIELD + "millis");
     public static final ParseField ALLOCATION_DELAY_FIELD = new ParseField("allocation_delay");
+    public static final ParseField NODE_SEEN_FIELD = new ParseField("node_seen");
 
     public static final ConstructingObjectParser<SingleNodeShutdownMetadata, Void> PARSER = new ConstructingObjectParser<>(
         "node_shutdown_info",
@@ -47,7 +48,8 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
             Type.valueOf((String) a[1]),
             (String) a[2],
             (long) a[3],
-            (TimeValue) a[4]
+            (boolean) a[4],
+            (TimeValue) a[5]
         )
     );
 
@@ -56,6 +58,7 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
         PARSER.declareString(ConstructingObjectParser.constructorArg(), TYPE_FIELD);
         PARSER.declareString(ConstructingObjectParser.constructorArg(), REASON_FIELD);
         PARSER.declareLong(ConstructingObjectParser.constructorArg(), STARTED_AT_MILLIS_FIELD);
+        PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), NODE_SEEN_FIELD);
         PARSER.declareField(
             ConstructingObjectParser.optionalConstructorArg(),
             (p, c) -> TimeValue.parseTimeValue(p.textOrNull(), ALLOCATION_DELAY_FIELD.getPreferredName()), ALLOCATION_DELAY_FIELD,
@@ -73,6 +76,7 @@ public static SingleNodeShutdownMetadata parse(XContentParser parser) {
     private final Type type;
     private final String reason;
     private final long startedAtMillis;
+    private final boolean nodeSeen;
     @Nullable private final TimeValue allocationDelay;
 
     /**
@@ -86,12 +90,14 @@ private SingleNodeShutdownMetadata(
         Type type,
         String reason,
         long startedAtMillis,
+        boolean nodeSeen,
         @Nullable TimeValue allocationDelay
     ) {
         this.nodeId = Objects.requireNonNull(nodeId, "node ID must not be null");
         this.type = Objects.requireNonNull(type, "shutdown type must not be null");
         this.reason = Objects.requireNonNull(reason, "shutdown reason must not be null");
         this.startedAtMillis = startedAtMillis;
+        this.nodeSeen = nodeSeen;
         if (allocationDelay != null && Type.RESTART.equals(type) == false) {
             throw new IllegalArgumentException("shard allocation delay is only valid for RESTART-type shutdowns");
         }
@@ -103,6 +109,7 @@ public SingleNodeShutdownMetadata(StreamInput in) throws IOException {
         this.type = in.readEnum(Type.class);
         this.reason = in.readString();
         this.startedAtMillis = in.readVLong();
+        this.nodeSeen = in.readBoolean();
         this.allocationDelay = in.readOptionalTimeValue();
     }
 
@@ -134,6 +141,13 @@ public long getStartedAtMillis() {
         return startedAtMillis;
     }
 
+    /**
+     * @return A boolean indicated whether this node has been seen in the cluster since the shutdown was registered.
+     */
+    public boolean getNodeSeen() {
+        return nodeSeen;
+    }
+
     /**
      * @return The amount of time shard reallocation should be delayed for shards on this node, so that they will not be automatically
      * reassigned while the node is restarting. Will be {@code null} for non-restart shutdowns.
@@ -154,6 +168,7 @@ public void writeTo(StreamOutput out) throws IOException {
         out.writeEnum(type);
         out.writeString(reason);
         out.writeVLong(startedAtMillis);
+        out.writeBoolean(nodeSeen);
         out.writeOptionalTimeValue(allocationDelay);
     }
 
@@ -165,6 +180,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
             builder.field(TYPE_FIELD.getPreferredName(), type);
             builder.field(REASON_FIELD.getPreferredName(), reason);
             builder.timeField(STARTED_AT_MILLIS_FIELD.getPreferredName(), STARTED_AT_READABLE_FIELD, startedAtMillis);
+            builder.field(NODE_SEEN_FIELD.getPreferredName(), nodeSeen);
             if (allocationDelay != null) {
                 builder.field(ALLOCATION_DELAY_FIELD.getPreferredName(), allocationDelay.getStringRep());
             }
@@ -183,6 +199,7 @@ public boolean equals(Object o) {
             && getNodeId().equals(that.getNodeId())
             && getType() == that.getType()
             && getReason().equals(that.getReason())
+            && getNodeSeen() == that.getNodeSeen()
             && Objects.equals(allocationDelay, that.allocationDelay);
     }
 
@@ -193,6 +210,7 @@ public int hashCode() {
             getType(),
             getReason(),
             getStartedAtMillis(),
+            getNodeSeen(),
             allocationDelay
         );
     }
@@ -209,14 +227,16 @@ public static Builder builder(SingleNodeShutdownMetadata original) {
             .setNodeId(original.getNodeId())
             .setType(original.getType())
             .setReason(original.getReason())
-            .setStartedAtMillis(original.getStartedAtMillis());
+            .setStartedAtMillis(original.getStartedAtMillis())
+            .setNodeSeen(original.getNodeSeen());
     }
 
     public static class Builder {
         private String nodeId;
         private Type type;
         private String reason;
         private long startedAtMillis = -1;
+        private boolean nodeSeen = false;
         private TimeValue allocationDelay;
 
         private Builder() {}
@@ -257,6 +277,15 @@ public Builder setStartedAtMillis(long startedAtMillis) {
             return this;
         }
 
+        /**
+         * @param nodeSeen Whether or not the node has been seen since the shutdown was registered.
+         * @return This builder.
+         */
+        public Builder setNodeSeen(boolean nodeSeen) {
+            this.nodeSeen = nodeSeen;
+            return this;
+        }
+
         /**
          * @param allocationDelay The amount of time shard reallocation should be delayed while this node is offline.
          * @return This builder.
@@ -275,7 +304,9 @@ public SingleNodeShutdownMetadata build() {
                 nodeId,
                 type,
                 reason,
-                startedAtMillis, allocationDelay
+                startedAtMillis,
+                nodeSeen,
+                allocationDelay
             );
         }
     }

diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/NodesShutdownMetadataTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/NodesShutdownMetadataTests.java
@@ -88,7 +88,8 @@ private SingleNodeShutdownMetadata randomNodeShutdownInfo() {
         if (type.equals(SingleNodeShutdownMetadata.Type.RESTART) && randomBoolean()) {
             builder.setAllocationDelay(TimeValue.parseTimeValue(randomTimeValue(), this.getTestName()));
         }
-        return builder.build();
+        return builder.setNodeSeen(randomBoolean())
+            .build();
     }
 
     @Override

diff --git a/...n/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java b/...n/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java
@@ -0,0 +1,147 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.shutdown;
+
+import org.elasticsearch.Build;
+import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
+import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
+import org.elasticsearch.action.support.master.AcknowledgedResponse;
+import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata;
+import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.test.ESIntegTestCase;
+import org.elasticsearch.test.InternalTestCluster;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+import static org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata.Status.COMPLETE;
+import static org.hamcrest.Matchers.equalTo;
+
+@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, numClientNodes = 0)
+public class NodeShutdownShardsIT extends ESIntegTestCase {
+
+    @Override
+    protected Collection<Class<? extends Plugin>> nodePlugins() {
+        return Arrays.asList(ShutdownPlugin.class);
+    }
+
+    /**
+     * Verifies that a node that's removed from the cluster with zero shards stays in the `COMPLETE` status after it leaves, rather than
+     * reverting to `NOT_STARTED` (this was a bug in the initial implementation).
+     */
+    public void testShardStatusStaysCompleteAfterNodeLeaves() throws Exception {
+        assumeTrue("must be on a snapshot build of ES to run in order for the feature flag to be set", Build.CURRENT.isSnapshot());
+        final String nodeToRestartName = internalCluster().startNode();
+        final String nodeToRestartId = getNodeId(nodeToRestartName);
+        internalCluster().startNode();
+
+        // Mark the node for shutdown
+        PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request(
+            nodeToRestartId,
+            SingleNodeShutdownMetadata.Type.REMOVE,
+            this.getTestName(),
+            null
+        );
+        AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get();
+        assertTrue(putShutdownResponse.isAcknowledged());
+
+        internalCluster().stopNode(nodeToRestartName);
+
+        NodesInfoResponse nodes = client().admin().cluster().prepareNodesInfo().clear().get();
+        assertThat(nodes.getNodes().size(), equalTo(1));
+
+        GetShutdownStatusAction.Response getResp = client().execute(
+            GetShutdownStatusAction.INSTANCE,
+            new GetShutdownStatusAction.Request(nodeToRestartId)
+        ).get();
+
+        assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE));
+    }
+
+    /**
+     * Similar to the previous test, but ensures that the status stays at `COMPLETE` when the node is offline when the shutdown is
+     * registered. This may happen if {@link NodeSeenService} isn't working as expected.
+     */
+    public void testShardStatusStaysCompleteAfterNodeLeavesIfRegisteredWhileNodeOffline() throws Exception {
+        assumeTrue("must be on a snapshot build of ES to run in order for the feature flag to be set", Build.CURRENT.isSnapshot());
+        final String nodeToRestartName = internalCluster().startNode();
+        final String nodeToRestartId = getNodeId(nodeToRestartName);
+        internalCluster().startNode();
+
+        // Stop the node we're going to shut down and mark it as shutting down while it's offline. This checks that the cluster state
+        // listener is working correctly.
+        internalCluster().restartNode(nodeToRestartName, new InternalTestCluster.RestartCallback() {
+            @Override
+            public Settings onNodeStopped(String nodeName) throws Exception {
+                PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request(
+                    nodeToRestartId,
+                    SingleNodeShutdownMetadata.Type.REMOVE,
+                    "testShardStatusStaysCompleteAfterNodeLeavesIfRegisteredWhileNodeOffline",
+                    null
+                );
+                AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get();
+                assertTrue(putShutdownResponse.isAcknowledged());
+
+                return super.onNodeStopped(nodeName);
+            }
+        });
+
+        internalCluster().stopNode(nodeToRestartName);
+
+        NodesInfoResponse nodes = client().admin().cluster().prepareNodesInfo().clear().get();
+        assertThat(nodes.getNodes().size(), equalTo(1));
+
+        GetShutdownStatusAction.Response getResp = client().execute(
+            GetShutdownStatusAction.INSTANCE,
+            new GetShutdownStatusAction.Request(nodeToRestartId)
+        ).get();
+
+        assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE));
+    }
+
+    /**
+     * Checks that non-data nodes that are registered for shutdown have a shard migration status of `COMPLETE` rather than `NOT_STARTED`.
+     * (this was a bug in the initial implementation).
+     */
+    public void testShardStatusIsCompleteOnNonDataNodes() throws Exception {
+        assumeTrue("must be on a snapshot build of ES to run in order for the feature flag to be set", Build.CURRENT.isSnapshot());
+        final String nodeToShutDownName = internalCluster().startMasterOnlyNode();
+        internalCluster().startMasterOnlyNode(); // Just to have at least one other node
+        final String nodeToRestartId = getNodeId(nodeToShutDownName);
+
+        // Mark the node for shutdown
+        PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request(
+            nodeToRestartId,
+            SingleNodeShutdownMetadata.Type.REMOVE,
+            this.getTestName(),
+            null
+        );
+        AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get();
+        assertTrue(putShutdownResponse.isAcknowledged());
+
+        GetShutdownStatusAction.Response getResp = client().execute(
+            GetShutdownStatusAction.INSTANCE,
+            new GetShutdownStatusAction.Request(nodeToRestartId)
+        ).get();
+
+        assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE));
+    }
+
+    private String getNodeId(String nodeName) throws Exception {
+        NodesInfoResponse nodes = client().admin().cluster().prepareNodesInfo().clear().get();
+        return nodes.getNodes()
+            .stream()
+            .map(NodeInfo::getNode)
+            .filter(node -> node.getName().equals(nodeName))
+            .map(DiscoveryNode::getId)
+            .findFirst()
+            .orElseThrow();
+    }
+}