-
Notifications
You must be signed in to change notification settings - Fork 24.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix NOT_STARTED
statuses appearing inappropirately during node shutdown
#75750
Changes from 11 commits
8021fab
f3960d0
2aed2f1
d619654
946bdd4
9a0f6d5
d3b68d6
2d740f9
14f681f
ca88ade
8a7f0de
aafb153
e8e7914
65fcfdd
3e371fa
04957be
749a821
f6c7f42
a090c7f
52230a4
d3f31ed
8832c42
960b353
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
package org.elasticsearch.xpack.shutdown; | ||
|
||
import org.elasticsearch.Build; | ||
import org.elasticsearch.action.admin.cluster.node.info.NodeInfo; | ||
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse; | ||
import org.elasticsearch.action.support.master.AcknowledgedResponse; | ||
import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; | ||
import org.elasticsearch.cluster.node.DiscoveryNode; | ||
import org.elasticsearch.plugins.Plugin; | ||
import org.elasticsearch.test.ESIntegTestCase; | ||
|
||
import java.util.Arrays; | ||
import java.util.Collection; | ||
|
||
import static org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata.Status.COMPLETE; | ||
import static org.hamcrest.Matchers.equalTo; | ||
|
||
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, numClientNodes = 0) | ||
public class NodeShutdownShardsIT extends ESIntegTestCase { | ||
|
||
@Override | ||
protected Collection<Class<? extends Plugin>> nodePlugins() { | ||
return Arrays.asList(ShutdownPlugin.class); | ||
} | ||
|
||
public void testShardStatusStaysCompleteAfterNodeLeaves() throws Exception { | ||
assumeTrue("must be on a snapshot build of ES to run in order for the feature flag to be set", Build.CURRENT.isSnapshot()); | ||
final String nodeToRestartName = internalCluster().startNode(); | ||
final String nodeToRestartId = getNodeId(nodeToRestartName); | ||
internalCluster().startNode(); | ||
|
||
// Mark the node for shutdown | ||
PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request( | ||
nodeToRestartId, | ||
SingleNodeShutdownMetadata.Type.REMOVE, | ||
this.getTestName() | ||
); | ||
AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get(); | ||
assertTrue(putShutdownResponse.isAcknowledged()); | ||
|
||
internalCluster().stopNode(nodeToRestartName); | ||
|
||
NodesInfoResponse nodes = client().admin().cluster().prepareNodesInfo().clear().get(); | ||
assertThat(nodes.getNodes().size(), equalTo(1)); | ||
|
||
GetShutdownStatusAction.Response getResp = client().execute( | ||
GetShutdownStatusAction.INSTANCE, | ||
new GetShutdownStatusAction.Request(nodeToRestartId) | ||
).get(); | ||
|
||
assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE)); | ||
} | ||
|
||
public void testShardStatusIsCompleteOnNonDataNodes() throws Exception { | ||
assumeTrue("must be on a snapshot build of ES to run in order for the feature flag to be set", Build.CURRENT.isSnapshot()); | ||
final String nodeToShutDownName = internalCluster().startMasterOnlyNode(); | ||
internalCluster().startMasterOnlyNode(); // Just to have at least one other node | ||
final String nodeToRestartId = getNodeId(nodeToShutDownName); | ||
|
||
// Mark the node for shutdown | ||
PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request( | ||
nodeToRestartId, | ||
SingleNodeShutdownMetadata.Type.REMOVE, | ||
this.getTestName() | ||
); | ||
AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get(); | ||
assertTrue(putShutdownResponse.isAcknowledged()); | ||
|
||
GetShutdownStatusAction.Response getResp = client().execute( | ||
GetShutdownStatusAction.INSTANCE, | ||
new GetShutdownStatusAction.Request(nodeToRestartId) | ||
).get(); | ||
|
||
assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE)); | ||
} | ||
|
||
private String getNodeId(String nodeName) throws Exception { | ||
NodesInfoResponse nodes = client().admin().cluster().prepareNodesInfo().clear().get(); | ||
return nodes.getNodes() | ||
.stream() | ||
.map(NodeInfo::getNode) | ||
.filter(node -> node.getName().equals(nodeName)) | ||
.map(DiscoveryNode::getId) | ||
.findFirst() | ||
.orElseThrow(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
package org.elasticsearch.xpack.shutdown; | ||
|
||
import org.apache.log4j.LogManager; | ||
import org.apache.log4j.Logger; | ||
import org.apache.logging.log4j.message.ParameterizedMessage; | ||
import org.elasticsearch.cluster.ClusterChangedEvent; | ||
import org.elasticsearch.cluster.ClusterState; | ||
import org.elasticsearch.cluster.ClusterStateListener; | ||
import org.elasticsearch.cluster.ClusterStateUpdateTask; | ||
import org.elasticsearch.cluster.metadata.Metadata; | ||
import org.elasticsearch.cluster.metadata.NodesShutdownMetadata; | ||
import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; | ||
import org.elasticsearch.cluster.service.ClusterService; | ||
|
||
import java.util.Map; | ||
import java.util.Set; | ||
import java.util.function.Function; | ||
import java.util.stream.Collectors; | ||
|
||
public class ShutdownService implements ClusterStateListener { | ||
private static final Logger logger = LogManager.getLogger(ShutdownService.class); | ||
|
||
final ClusterService clusterService; | ||
|
||
public ShutdownService(ClusterService clusterService) { | ||
this.clusterService = clusterService; | ||
clusterService.addListener(this); | ||
} | ||
|
||
@Override | ||
public void clusterChanged(ClusterChangedEvent event) { | ||
if (event.state().nodes().isLocalNodeElectedMaster() == false) { | ||
// Only do this if we're the current master node. | ||
return; | ||
} | ||
dakrone marked this conversation as resolved.
Show resolved
Hide resolved
|
||
NodesShutdownMetadata eventShutdownMetadata = event.state().metadata().custom(NodesShutdownMetadata.TYPE); | ||
final Set<String> nodesNotPreviouslySeen = eventShutdownMetadata.getAllNodeMetadataMap() | ||
.values() | ||
.stream() | ||
.filter(singleNodeShutdownMetadata -> singleNodeShutdownMetadata.getNodeSeen() == false) | ||
.map(SingleNodeShutdownMetadata::getNodeId) | ||
.filter(nodeId -> event.state().nodes().nodeExists(nodeId)) | ||
.collect(Collectors.toUnmodifiableSet()); | ||
|
||
if (nodesNotPreviouslySeen.isEmpty() == false) { | ||
clusterService.submitStateUpdateTask("shutdown-seen-nodes-updater", new ClusterStateUpdateTask() { | ||
@Override | ||
public ClusterState execute(ClusterState currentState) throws Exception { | ||
NodesShutdownMetadata shutdownMetadata = currentState.metadata().custom(NodesShutdownMetadata.TYPE); | ||
|
||
final Map<String, SingleNodeShutdownMetadata> newShutdownMetadataMap = shutdownMetadata.getAllNodeMetadataMap() | ||
.values() | ||
.stream() | ||
.map(singleNodeShutdownMetadata -> { | ||
if (nodesNotPreviouslySeen.contains(singleNodeShutdownMetadata.getNodeId())) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should recalculate the |
||
return SingleNodeShutdownMetadata.builder(singleNodeShutdownMetadata).setNodeSeen(true).build(); | ||
} | ||
return singleNodeShutdownMetadata; | ||
}) | ||
.collect(Collectors.toUnmodifiableMap(SingleNodeShutdownMetadata::getNodeId, Function.identity())); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be worth it to add |
||
return ClusterState.builder(currentState) | ||
.metadata( | ||
Metadata.builder(currentState.metadata()) | ||
.putCustom(NodesShutdownMetadata.TYPE, new NodesShutdownMetadata(newShutdownMetadataMap)) | ||
.build() | ||
) | ||
.build(); | ||
} | ||
|
||
@Override | ||
public void onFailure(String source, Exception e) { | ||
logger.warn(new ParameterizedMessage("failed to mark shutting down nodes as seen: {}", nodesNotPreviouslySeen), e); | ||
} | ||
}); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add javadocs to this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now that I look at it, this class name is a little strange since this service doesn't actually do anything related to shutting down the nodes.
I wonder if it would be better named something like
NodeSeenService
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Renamed! Good call, I didn't really think about the naming at the time.