diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c4204a403ea4..7aa95ffc6000b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,11 +25,14 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Added missing no-jdk distributions ([#4722](https://github.com/opensearch-project/OpenSearch/pull/4722)) - Copy `build.sh` over from opensearch-build ([#4887](https://github.com/opensearch-project/OpenSearch/pull/4887)) - Update GeoGrid base class access modifier to support extensibility ([#4921](https://github.com/opensearch-project/OpenSearch/pull/4921)) +- Recommission API changes for service layer ([#4320](https://github.com/opensearch-project/OpenSearch/pull/4320)) +- Recommissioning of zone. REST layer support. ([#4624](https://github.com/opensearch-project/OpenSearch/pull/4604)) - Build no-jdk distributions as part of release build ([#4902](https://github.com/opensearch-project/OpenSearch/pull/4902)) - Use getParameterCount instead of getParameterTypes ([#4821](https://github.com/opensearch-project/OpenSearch/pull/4821)) - Added in-flight cancellation of SearchShardTask based on resource consumption ([#4565](https://github.com/opensearch-project/OpenSearch/pull/4565)) - Added resource usage trackers for in-flight cancellation of SearchShardTask ([#4805](https://github.com/opensearch-project/OpenSearch/pull/4805)) - Added search backpressure stats API ([#4932](https://github.com/opensearch-project/OpenSearch/pull/4932)) +- Added changes for graceful node decommission ([#4586](https://github.com/opensearch-project/OpenSearch/pull/4586)) ### Dependencies - Bumps `com.diffplug.spotless` from 6.9.1 to 6.10.0 @@ -64,6 +67,11 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - GET api for weighted shard routing([#4275](https://github.com/opensearch-project/OpenSearch/pull/4275/)) - Delete api for weighted shard routing([#4400](https://github.com/opensearch-project/OpenSearch/pull/4400/)) - Fix weighted routing metadata deserialization error on process restart ([#4691](https://github.com/opensearch-project/OpenSearch/pull/4691)) +- Add DecommissionService and helper to execute awareness attribute decommissioning ([#4084](https://github.com/opensearch-project/OpenSearch/pull/4084)) +- Add APIs (GET/PUT) to decommission awareness attribute ([#4261](https://github.com/opensearch-project/OpenSearch/pull/4261)) +- Controlling discovery for decommissioned nodes ([#4590](https://github.com/opensearch-project/OpenSearch/pull/4590)) +- Integ Tests for Awareness Attribute Decommissioning ([#4715](https://github.com/opensearch-project/OpenSearch/pull/4715)) +- Fail weight update when decommission ongoing and fail decommission when attribute not weighed away ([#4839](https://github.com/opensearch-project/OpenSearch/pull/4839)) ### Deprecated ### Removed @@ -89,10 +97,14 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - [Bug]: Fixed invalid location of JDK dependency for arm64 architecture([#4613](https://github.com/opensearch-project/OpenSearch/pull/4613)) - [Bug]: Alias filter lost after rollover ([#4499](https://github.com/opensearch-project/OpenSearch/pull/4499)) - Fixing Gradle warnings associated with publishPluginZipPublicationToXxx tasks ([#4696](https://github.com/opensearch-project/OpenSearch/pull/4696)) +- [BUG]: Remove redundant field from GetDecommissionStateResponse ([#4751](https://github.com/opensearch-project/OpenSearch/pull/4751)) - Fixed randomly failing test ([4774](https://github.com/opensearch-project/OpenSearch/pull/4774)) - Fix recovery path for searchable snapshots ([4813](https://github.com/opensearch-project/OpenSearch/pull/4813)) - Fix a bug on handling an invalid array value for point type field #4900([#4900](https://github.com/opensearch-project/OpenSearch/pull/4900)) +- Fix decommission status update to non leader nodes ([4800](https://github.com/opensearch-project/OpenSearch/pull/4800)) +- Fix bug in AwarenessAttributeDecommissionIT([4822](https://github.com/opensearch-project/OpenSearch/pull/4822)) - Fix for failing checkExtraction, checkLicense and checkNotice tasks for windows gradle check ([#4941](https://github.com/opensearch-project/OpenSearch/pull/4941)) +- [BUG]: Allow decommission to support delay timeout [#4930](https://github.com/opensearch-project/OpenSearch/pull/4930)) ### Security - CVE-2022-25857 org.yaml:snakeyaml DOS vulnerability ([#4341](https://github.com/opensearch-project/OpenSearch/pull/4341)) diff --git a/client/rest-high-level/src/test/java/org/opensearch/client/RestHighLevelClientTests.java b/client/rest-high-level/src/test/java/org/opensearch/client/RestHighLevelClientTests.java index 5003d4aaeeb53..a50cc811a87dc 100644 --- a/client/rest-high-level/src/test/java/org/opensearch/client/RestHighLevelClientTests.java +++ b/client/rest-high-level/src/test/java/org/opensearch/client/RestHighLevelClientTests.java @@ -891,7 +891,10 @@ public void testApiNamingConventions() throws Exception { "remote_store.restore", "cluster.put_weighted_routing", "cluster.get_weighted_routing", - "cluster.delete_weighted_routing", }; + "cluster.delete_weighted_routing", + "cluster.put_decommission_awareness", + "cluster.get_decommission_awareness", + "cluster.delete_decommission_awareness", }; List booleanReturnMethods = Arrays.asList("security.enable_user", "security.disable_user", "security.change_password"); Set deprecatedMethods = new HashSet<>(); deprecatedMethods.add("indices.force_merge"); diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.delete_decommission_awareness.json b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.delete_decommission_awareness.json new file mode 100644 index 0000000000000..13ea101169e60 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.delete_decommission_awareness.json @@ -0,0 +1,19 @@ +{ + "cluster.delete_decommission_awareness": { + "documentation": { + "url": "https://opensearch.org/docs/latest/opensearch/rest-api/decommission/", + "description": "Delete any existing decommission." + }, + "stability": "experimental", + "url": { + "paths": [ + { + "path": "/_cluster/decommission/awareness/", + "methods": [ + "DELETE" + ] + } + ] + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.get_decommission_awareness.json b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.get_decommission_awareness.json new file mode 100644 index 0000000000000..302dea4ec31a7 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.get_decommission_awareness.json @@ -0,0 +1,25 @@ +{ + "cluster.get_decommission_awareness": { + "documentation": { + "url": "https://opensearch.org/docs/latest/opensearch/rest-api/decommission/", + "description": "Get details and status of decommissioned attribute" + }, + "stability": "experimental", + "url": { + "paths": [ + { + "path":"/_cluster/decommission/awareness/{awareness_attribute_name}/_status", + "methods":[ + "GET" + ], + "parts":{ + "awareness_attribute_name":{ + "type":"string", + "description":"Awareness attribute name" + } + } + } + ] + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.put_decommission_awareness.json b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.put_decommission_awareness.json new file mode 100644 index 0000000000000..bf4ffd454d9df --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.put_decommission_awareness.json @@ -0,0 +1,29 @@ +{ + "cluster.put_decommission_awareness": { + "documentation": { + "url": "https://opensearch.org/docs/latest/opensearch/rest-api/decommission/", + "description": "Decommissions an awareness attribute" + }, + "stability": "experimental", + "url": { + "paths": [ + { + "path": "/_cluster/decommission/awareness/{awareness_attribute_name}/{awareness_attribute_value}", + "methods": [ + "PUT" + ], + "parts": { + "awareness_attribute_name": { + "type": "string", + "description": "Awareness attribute name" + }, + "awareness_attribute_value": { + "type": "string", + "description": "Awareness attribute value" + } + } + } + ] + } + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/AwarenessAttributeDecommissionIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/AwarenessAttributeDecommissionIT.java new file mode 100644 index 0000000000000..067b127a667b4 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/AwarenessAttributeDecommissionIT.java @@ -0,0 +1,874 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.coordination; + +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.core.LogEvent; +import org.junit.After; +import org.opensearch.OpenSearchTimeoutException; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateAction; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateAction; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionAction; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionResponse; +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; +import org.opensearch.action.admin.cluster.shards.routing.weighted.put.ClusterPutWeightedRoutingResponse; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionService; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.cluster.decommission.DecommissioningFailedException; +import org.opensearch.cluster.decommission.NodeDecommissionedException; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.routing.WeightedRouting; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.Priority; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.discovery.Discovery; +import org.opensearch.plugins.Plugin; +import org.opensearch.test.MockLogAppender; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.transport.MockTransportService; +import org.opensearch.transport.RemoteTransportException; +import org.opensearch.transport.Transport; +import org.opensearch.transport.TransportService; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; + +import static org.opensearch.test.NodeRoles.onlyRole; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertNoTimeout; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class AwarenessAttributeDecommissionIT extends OpenSearchIntegTestCase { + private final Logger logger = LogManager.getLogger(AwarenessAttributeDecommissionIT.class); + + @Override + protected Collection> nodePlugins() { + return Collections.singletonList(MockTransportService.TestPlugin.class); + } + + @After + public void cleanup() throws Exception { + assertNoTimeout(client().admin().cluster().prepareHealth().get()); + } + + public void testDecommissionFailedWhenNotZoneAware() throws Exception { + Settings commonSettings = Settings.builder().build(); + // Start 3 cluster manager eligible nodes + internalCluster().startClusterManagerOnlyNodes(3, Settings.builder().put(commonSettings).build()); + // start 3 data nodes + internalCluster().startDataOnlyNodes(3, Settings.builder().put(commonSettings).build()); + ensureStableCluster(6); + ClusterHealthResponse health = client().admin() + .cluster() + .prepareHealth() + .setWaitForEvents(Priority.LANGUID) + .setWaitForGreenStatus() + .setWaitForNodes(Integer.toString(6)) + .execute() + .actionGet(); + assertFalse(health.isTimedOut()); + + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); + DecommissionRequest decommissionRequest = new DecommissionRequest(decommissionAttribute); + assertBusy(() -> { + DecommissioningFailedException ex = expectThrows( + DecommissioningFailedException.class, + () -> client().execute(DecommissionAction.INSTANCE, decommissionRequest).actionGet() + ); + assertTrue(ex.getMessage().contains("invalid awareness attribute requested for decommissioning")); + }); + } + + public void testDecommissionFailedWhenNotForceZoneAware() throws Exception { + Settings commonSettings = Settings.builder().put("cluster.routing.allocation.awareness.attributes", "zone").build(); + // Start 3 cluster manager eligible nodes + logger.info("--> start 3 cluster manager nodes on zones 'a' & 'b' & 'c'"); + internalCluster().startNodes( + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "a") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "b") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "c") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build() + ); + logger.info("--> starting data node each on zones 'a' & 'b' & 'c'"); + internalCluster().startDataOnlyNode(Settings.builder().put(commonSettings).put("node.attr.zone", "a").build()); + internalCluster().startDataOnlyNode(Settings.builder().put(commonSettings).put("node.attr.zone", "b").build()); + internalCluster().startDataOnlyNode(Settings.builder().put(commonSettings).put("node.attr.zone", "c").build()); + ensureStableCluster(6); + ClusterHealthResponse health = client().admin() + .cluster() + .prepareHealth() + .setWaitForEvents(Priority.LANGUID) + .setWaitForGreenStatus() + .setWaitForNodes(Integer.toString(6)) + .execute() + .actionGet(); + assertFalse(health.isTimedOut()); + + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "a"); + DecommissionRequest decommissionRequest = new DecommissionRequest(decommissionAttribute); + assertBusy(() -> { + DecommissioningFailedException ex = expectThrows( + DecommissioningFailedException.class, + () -> client().execute(DecommissionAction.INSTANCE, decommissionRequest).actionGet() + ); + assertTrue(ex.getMessage().contains("doesn't have the decommissioning attribute")); + }); + } + + public void testNodesRemovedAfterZoneDecommission_ClusterManagerNotInToBeDecommissionedZone() throws Exception { + assertNodesRemovedAfterZoneDecommission(false); + } + + public void testNodesRemovedAfterZoneDecommission_ClusterManagerInToBeDecommissionedZone() throws Exception { + assertNodesRemovedAfterZoneDecommission(true); + } + + public void testInvariantsAndLogsOnDecommissionedNodes() throws Exception { + Settings commonSettings = Settings.builder() + .put("cluster.routing.allocation.awareness.attributes", "zone") + .put("cluster.routing.allocation.awareness.force.zone.values", "a,b,c") + .build(); + + logger.info("--> start 3 cluster manager nodes on zones 'a' & 'b' & 'c'"); + List clusterManagerNodes = internalCluster().startNodes( + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "a") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "b") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "c") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build() + ); + logger.info("--> start 3 data nodes on zones 'a' & 'b' & 'c'"); + List dataNodes = internalCluster().startNodes( + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "a") + .put(onlyRole(commonSettings, DiscoveryNodeRole.DATA_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "b") + .put(onlyRole(commonSettings, DiscoveryNodeRole.DATA_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "c") + .put(onlyRole(commonSettings, DiscoveryNodeRole.DATA_ROLE)) + .build() + ); + + ensureStableCluster(6); + ClusterHealthResponse health = client().admin() + .cluster() + .prepareHealth() + .setWaitForEvents(Priority.LANGUID) + .setWaitForGreenStatus() + .setWaitForNodes(Integer.toString(6)) + .execute() + .actionGet(); + assertFalse(health.isTimedOut()); + + logger.info("--> setting shard routing weights for weighted round robin"); + Map weights = Map.of("a", 0.0, "b", 1.0, "c", 1.0); + WeightedRouting weightedRouting = new WeightedRouting("zone", weights); + + ClusterPutWeightedRoutingResponse weightedRoutingResponse = client().admin() + .cluster() + .prepareWeightedRouting() + .setWeightedRouting(weightedRouting) + .get(); + assertTrue(weightedRoutingResponse.isAcknowledged()); + + logger.info("--> starting decommissioning nodes in zone {}", 'a'); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "a"); + DecommissionRequest decommissionRequest = new DecommissionRequest(decommissionAttribute); + decommissionRequest.setNoDelay(true); + DecommissionResponse decommissionResponse = client().execute(DecommissionAction.INSTANCE, decommissionRequest).get(); + assertTrue(decommissionResponse.isAcknowledged()); + + // Will wait for all events to complete + client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get(); + + String currentClusterManager = internalCluster().getClusterManagerName(); + String decommissionedNode = randomFrom(clusterManagerNodes.get(0), dataNodes.get(0)); + + ClusterService decommissionedNodeClusterService = internalCluster().getInstance(ClusterService.class, decommissionedNode); + DecommissionAttributeMetadata metadata = decommissionedNodeClusterService.state() + .metadata() + .custom(DecommissionAttributeMetadata.TYPE); + // The decommissioned node would not be having status as SUCCESS as it was kicked out later + // and not receiving any further state updates + // This also helps to test metadata status updates was received by this node until it got kicked by the leader + assertEquals(metadata.decommissionAttribute(), decommissionAttribute); + assertNotNull(metadata.status()); + assertEquals(metadata.status(), DecommissionStatus.IN_PROGRESS); + + // assert the node has decommissioned attribute + assertEquals(decommissionedNodeClusterService.localNode().getAttributes().get("zone"), "a"); + + // assert exception on decommissioned node + Logger clusterLogger = LogManager.getLogger(JoinHelper.class); + MockLogAppender mockLogAppender = MockLogAppender.createForLoggers(clusterLogger); + mockLogAppender.addExpectation( + new MockLogAppender.PatternSeenEventExpectation( + "test", + JoinHelper.class.getCanonicalName(), + Level.INFO, + "local node is decommissioned \\[.*]\\. Will not be able to join the cluster" + ) + ); + mockLogAppender.addExpectation( + new MockLogAppender.SeenEventExpectation("test", JoinHelper.class.getCanonicalName(), Level.INFO, "failed to join") { + @Override + public boolean innerMatch(LogEvent event) { + return event.getThrown() != null + && event.getThrown().getClass() == RemoteTransportException.class + && event.getThrown().getCause() != null + && event.getThrown().getCause().getClass() == NodeDecommissionedException.class; + } + } + ); + TransportService clusterManagerTransportService = internalCluster().getInstance( + TransportService.class, + internalCluster().getClusterManagerName() + ); + MockTransportService decommissionedNodeTransportService = (MockTransportService) internalCluster().getInstance( + TransportService.class, + decommissionedNode + ); + final CountDownLatch countDownLatch = new CountDownLatch(2); + decommissionedNodeTransportService.addSendBehavior( + clusterManagerTransportService, + (connection, requestId, action, request, options) -> { + if (action.equals(JoinHelper.JOIN_ACTION_NAME)) { + countDownLatch.countDown(); + } + connection.sendRequest(requestId, action, request, options); + } + ); + decommissionedNodeTransportService.addConnectBehavior(clusterManagerTransportService, Transport::openConnection); + countDownLatch.await(); + mockLogAppender.assertAllExpectationsMatched(); + + // decommissioned node should have Coordinator#localNodeCommissioned = false + Coordinator coordinator = (Coordinator) internalCluster().getInstance(Discovery.class, decommissionedNode); + assertFalse(coordinator.localNodeCommissioned()); + + // Recommissioning the zone back to gracefully succeed the test once above tests succeeds + DeleteDecommissionStateResponse deleteDecommissionStateResponse = client(currentClusterManager).execute( + DeleteDecommissionStateAction.INSTANCE, + new DeleteDecommissionStateRequest() + ).get(); + assertTrue(deleteDecommissionStateResponse.isAcknowledged()); + + // Will wait for all events to complete + client(currentClusterManager).admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get(); + // will wait for cluster to stabilise with a timeout of 2 min as by then all nodes should have joined the cluster + ensureStableCluster(6, TimeValue.timeValueSeconds(121)); + } + + private void assertNodesRemovedAfterZoneDecommission(boolean originalClusterManagerDecommission) throws Exception { + int dataNodeCountPerAZ = 4; + List zones = new ArrayList<>(Arrays.asList("a", "b", "c")); + Settings commonSettings = Settings.builder() + .put("cluster.routing.allocation.awareness.attributes", "zone") + .put("cluster.routing.allocation.awareness.force.zone.values", "a,b,c") + .build(); + + logger.info("--> start 3 cluster manager nodes on zones 'a' & 'b' & 'c'"); + List clusterManagerNodes = internalCluster().startNodes( + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "a") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "b") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "c") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build() + ); + Map clusterManagerNameToZone = new HashMap<>(); + clusterManagerNameToZone.put(clusterManagerNodes.get(0), "a"); + clusterManagerNameToZone.put(clusterManagerNodes.get(1), "b"); + clusterManagerNameToZone.put(clusterManagerNodes.get(2), "c"); + + logger.info("--> starting 4 data nodes each on zones 'a' & 'b' & 'c'"); + List nodes_in_zone_a = internalCluster().startDataOnlyNodes( + dataNodeCountPerAZ, + Settings.builder().put(commonSettings).put("node.attr.zone", "a").build() + ); + List nodes_in_zone_b = internalCluster().startDataOnlyNodes( + dataNodeCountPerAZ, + Settings.builder().put(commonSettings).put("node.attr.zone", "b").build() + ); + List nodes_in_zone_c = internalCluster().startDataOnlyNodes( + dataNodeCountPerAZ, + Settings.builder().put(commonSettings).put("node.attr.zone", "c").build() + ); + ensureStableCluster(15); + ClusterHealthResponse health = client().admin() + .cluster() + .prepareHealth() + .setWaitForEvents(Priority.LANGUID) + .setWaitForGreenStatus() + .setWaitForNodes(Integer.toString(15)) + .execute() + .actionGet(); + assertFalse(health.isTimedOut()); + + String originalClusterManager = internalCluster().getClusterManagerName(); + String originalClusterManagerZone = clusterManagerNameToZone.get(originalClusterManager); + logger.info("--> original cluster manager - name {}, zone {}", originalClusterManager, originalClusterManagerZone); + + String zoneToDecommission = originalClusterManagerZone; + + if (originalClusterManagerDecommission == false) { + // decommission one zone where active cluster manager is not present + List tempZones = new ArrayList<>(zones); + tempZones.remove(originalClusterManagerZone); + zoneToDecommission = randomFrom(tempZones); + } + + logger.info("--> setting shard routing weights for weighted round robin"); + Map weights = new HashMap<>(Map.of("a", 1.0, "b", 1.0, "c", 1.0)); + WeightedRouting weightedRouting = new WeightedRouting("zone", weights); + weights.put(zoneToDecommission, 0.0); + + ClusterPutWeightedRoutingResponse weightedRoutingResponse = client().admin() + .cluster() + .prepareWeightedRouting() + .setWeightedRouting(weightedRouting) + .get(); + assertTrue(weightedRoutingResponse.isAcknowledged()); + + logger.info("--> starting decommissioning nodes in zone {}", zoneToDecommission); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", zoneToDecommission); + DecommissionRequest decommissionRequest = new DecommissionRequest(decommissionAttribute); + decommissionRequest.setNoDelay(true); + DecommissionResponse decommissionResponse = client().execute(DecommissionAction.INSTANCE, decommissionRequest).get(); + assertTrue(decommissionResponse.isAcknowledged()); + + client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get(); + + ClusterState clusterState = client().admin().cluster().prepareState().execute().actionGet().getState(); + + // assert that number of nodes should be 10 ( 2 cluster manager nodes + 8 data nodes ) + assertEquals(clusterState.nodes().getNodes().size(), 10); + assertEquals(clusterState.nodes().getDataNodes().size(), 8); + assertEquals(clusterState.nodes().getClusterManagerNodes().size(), 2); + + Iterator discoveryNodeIterator = clusterState.nodes().getNodes().valuesIt(); + while (discoveryNodeIterator.hasNext()) { + // assert no node has decommissioned attribute + DiscoveryNode node = discoveryNodeIterator.next(); + assertNotEquals(node.getAttributes().get("zone"), zoneToDecommission); + + // assert no node is decommissioned from Coordinator#localNodeCommissioned + Coordinator coordinator = (Coordinator) internalCluster().getInstance(Discovery.class, node.getName()); + assertTrue(coordinator.localNodeCommissioned()); + } + + // assert that decommission status is successful + GetDecommissionStateResponse response = client().execute( + GetDecommissionStateAction.INSTANCE, + new GetDecommissionStateRequest(decommissionAttribute.attributeName()) + ).get(); + assertEquals(response.getAttributeValue(), decommissionAttribute.attributeValue()); + assertEquals(response.getDecommissionStatus(), DecommissionStatus.SUCCESSFUL); + + // assert that no node present in Voting Config Exclusion + assertEquals(clusterState.metadata().coordinationMetadata().getVotingConfigExclusions().size(), 0); + + String currentClusterManager = internalCluster().getClusterManagerName(); + assertNotNull(currentClusterManager); + if (originalClusterManagerDecommission) { + // assert that cluster manager switched during the test + assertNotEquals(originalClusterManager, currentClusterManager); + } else { + // assert that cluster manager didn't switch during test + assertEquals(originalClusterManager, currentClusterManager); + } + + // Will wait for all events to complete + client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get(); + + // Recommissioning the zone back to gracefully succeed the test once above tests succeeds + DeleteDecommissionStateResponse deleteDecommissionStateResponse = client(currentClusterManager).execute( + DeleteDecommissionStateAction.INSTANCE, + new DeleteDecommissionStateRequest() + ).get(); + assertTrue(deleteDecommissionStateResponse.isAcknowledged()); + + // will wait for cluster to stabilise with a timeout of 2 min as by then all nodes should have joined the cluster + ensureStableCluster(15, TimeValue.timeValueMinutes(2)); + } + + public void testDecommissionFailedWhenDifferentAttributeAlreadyDecommissioned() throws Exception { + Settings commonSettings = Settings.builder() + .put("cluster.routing.allocation.awareness.attributes", "zone") + .put("cluster.routing.allocation.awareness.force.zone.values", "a,b,c") + .build(); + + logger.info("--> start 3 cluster manager nodes on zones 'a' & 'b' & 'c'"); + internalCluster().startNodes( + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "a") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "b") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "c") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build() + ); + logger.info("--> starting 1 nodes each on zones 'a' & 'b' & 'c'"); + internalCluster().startDataOnlyNode(Settings.builder().put(commonSettings).put("node.attr.zone", "a").build()); + internalCluster().startDataOnlyNode(Settings.builder().put(commonSettings).put("node.attr.zone", "b").build()); + String node_in_c = internalCluster().startDataOnlyNode(Settings.builder().put(commonSettings).put("node.attr.zone", "c").build()); + ensureStableCluster(6); + ClusterHealthResponse health = client().admin() + .cluster() + .prepareHealth() + .setWaitForEvents(Priority.LANGUID) + .setWaitForGreenStatus() + .setWaitForNodes(Integer.toString(6)) + .execute() + .actionGet(); + assertFalse(health.isTimedOut()); + + logger.info("--> setting shard routing weights for weighted round robin"); + Map weights = Map.of("a", 0.0, "b", 1.0, "c", 1.0); + WeightedRouting weightedRouting = new WeightedRouting("zone", weights); + + ClusterPutWeightedRoutingResponse weightedRoutingResponse = client().admin() + .cluster() + .prepareWeightedRouting() + .setWeightedRouting(weightedRouting) + .get(); + assertTrue(weightedRoutingResponse.isAcknowledged()); + + logger.info("--> starting decommissioning nodes in zone {}", 'a'); + DecommissionRequest decommissionRequest = new DecommissionRequest(new DecommissionAttribute("zone", "a")); + DecommissionResponse decommissionResponse = client().execute(DecommissionAction.INSTANCE, decommissionRequest).get(); + assertTrue(decommissionResponse.isAcknowledged()); + + DecommissionRequest newDecommissionRequest = new DecommissionRequest(new DecommissionAttribute("zone", "b")); + assertBusy( + () -> expectThrows( + DecommissioningFailedException.class, + () -> client(node_in_c).execute(DecommissionAction.INSTANCE, newDecommissionRequest).actionGet() + ) + ); + + // Will wait for all events to complete + client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get(); + + // Recommissioning the zone back to gracefully succeed the test once above tests succeeds + DeleteDecommissionStateResponse deleteDecommissionStateResponse = client(node_in_c).execute( + DeleteDecommissionStateAction.INSTANCE, + new DeleteDecommissionStateRequest() + ).get(); + assertTrue(deleteDecommissionStateResponse.isAcknowledged()); + + // will wait for cluster to stabilise with a timeout of 2 min as by then all nodes should have joined the cluster + ensureStableCluster(6, TimeValue.timeValueMinutes(2)); + } + + public void testDecommissionStatusUpdatePublishedToAllNodes() throws ExecutionException, InterruptedException { + Settings commonSettings = Settings.builder() + .put("cluster.routing.allocation.awareness.attributes", "zone") + .put("cluster.routing.allocation.awareness.force.zone.values", "a,b,c") + .build(); + + logger.info("--> start 3 cluster manager nodes on zones 'a' & 'b' & 'c'"); + List clusterManagerNodes = internalCluster().startNodes( + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "a") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "b") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "c") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build() + ); + + logger.info("--> start 3 data nodes on zones 'a' & 'b' & 'c'"); + List dataNodes = internalCluster().startNodes( + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "a") + .put(onlyRole(commonSettings, DiscoveryNodeRole.DATA_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "b") + .put(onlyRole(commonSettings, DiscoveryNodeRole.DATA_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "c") + .put(onlyRole(commonSettings, DiscoveryNodeRole.DATA_ROLE)) + .build() + ); + + ensureStableCluster(6); + + logger.info("--> setting shard routing weights for weighted round robin"); + Map weights = Map.of("a", 1.0, "b", 1.0, "c", 0.0); + WeightedRouting weightedRouting = new WeightedRouting("zone", weights); + + ClusterPutWeightedRoutingResponse weightedRoutingResponse = client().admin() + .cluster() + .prepareWeightedRouting() + .setWeightedRouting(weightedRouting) + .get(); + assertTrue(weightedRoutingResponse.isAcknowledged()); + + logger.info("--> starting decommissioning nodes in zone {}", 'c'); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "c"); + // Set the timeout to 0 to do immediate Decommission + DecommissionRequest decommissionRequest = new DecommissionRequest(decommissionAttribute); + decommissionRequest.setNoDelay(true); + DecommissionResponse decommissionResponse = client().execute(DecommissionAction.INSTANCE, decommissionRequest).get(); + assertTrue(decommissionResponse.isAcknowledged()); + + // Will wait for all events to complete + client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get(); + + logger.info("--> Received LANGUID event"); + + // assert that decommission status is successful + GetDecommissionStateResponse response = client(clusterManagerNodes.get(0)).execute( + GetDecommissionStateAction.INSTANCE, + new GetDecommissionStateRequest(decommissionAttribute.attributeName()) + ).get(); + assertEquals(response.getAttributeValue(), decommissionAttribute.attributeValue()); + assertEquals(DecommissionStatus.SUCCESSFUL, response.getDecommissionStatus()); + + logger.info("--> Decommission status is successful"); + ClusterState clusterState = client(clusterManagerNodes.get(0)).admin().cluster().prepareState().execute().actionGet().getState(); + assertEquals(4, clusterState.nodes().getSize()); + + logger.info("--> Got cluster state with 4 nodes."); + // assert status on nodes that are part of cluster currently + Iterator discoveryNodeIterator = clusterState.nodes().getNodes().valuesIt(); + DiscoveryNode clusterManagerNodeAfterDecommission = null; + while (discoveryNodeIterator.hasNext()) { + // assert no node has decommissioned attribute + DiscoveryNode node = discoveryNodeIterator.next(); + assertNotEquals(node.getAttributes().get("zone"), "c"); + if (node.isClusterManagerNode()) { + clusterManagerNodeAfterDecommission = node; + } + // assert all the nodes has status as SUCCESSFUL + ClusterService localNodeClusterService = internalCluster().getInstance(ClusterService.class, node.getName()); + assertEquals( + localNodeClusterService.state().metadata().decommissionAttributeMetadata().status(), + DecommissionStatus.SUCCESSFUL + ); + } + assertNotNull("Cluster Manager not found after decommission", clusterManagerNodeAfterDecommission); + logger.info("--> Cluster Manager node found after decommission"); + + // assert status on decommissioned node + // Here we will verify that until it got kicked out, it received appropriate status updates + // decommissioned nodes hence will have status as IN_PROGRESS as it will be kicked out later after this + // and won't receive status update to SUCCESSFUL + String randomDecommissionedNode = randomFrom(clusterManagerNodes.get(2), dataNodes.get(2)); + ClusterService decommissionedNodeClusterService = internalCluster().getInstance(ClusterService.class, randomDecommissionedNode); + assertEquals( + decommissionedNodeClusterService.state().metadata().decommissionAttributeMetadata().status(), + DecommissionStatus.IN_PROGRESS + ); + logger.info("--> Verified the decommissioned node has in_progress state."); + + // Will wait for all events to complete + client(clusterManagerNodeAfterDecommission.getName()).admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get(); + logger.info("--> Got LANGUID event"); + // Recommissioning the zone back to gracefully succeed the test once above tests succeeds + DeleteDecommissionStateResponse deleteDecommissionStateResponse = client(clusterManagerNodeAfterDecommission.getName()).execute( + DeleteDecommissionStateAction.INSTANCE, + new DeleteDecommissionStateRequest() + ).get(); + assertTrue(deleteDecommissionStateResponse.isAcknowledged()); + logger.info("--> Deleting decommission done."); + + // will wait for cluster to stabilise with a timeout of 2 min (findPeerInterval for decommissioned nodes) + // as by then all nodes should have joined the cluster + ensureStableCluster(6, TimeValue.timeValueSeconds(121)); + } + + public void testDecommissionFailedWhenAttributeNotWeighedAway() throws Exception { + Settings commonSettings = Settings.builder() + .put("cluster.routing.allocation.awareness.attributes", "zone") + .put("cluster.routing.allocation.awareness.force.zone.values", "a,b,c") + .build(); + // Start 3 cluster manager eligible nodes + internalCluster().startClusterManagerOnlyNodes(3, Settings.builder().put(commonSettings).build()); + // start 3 data nodes + internalCluster().startDataOnlyNodes(3, Settings.builder().put(commonSettings).build()); + ensureStableCluster(6); + ClusterHealthResponse health = client().admin() + .cluster() + .prepareHealth() + .setWaitForEvents(Priority.LANGUID) + .setWaitForGreenStatus() + .setWaitForNodes(Integer.toString(6)) + .execute() + .actionGet(); + assertFalse(health.isTimedOut()); + + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "c"); + DecommissionRequest decommissionRequest = new DecommissionRequest(decommissionAttribute); + decommissionRequest.setNoDelay(true); + assertBusy(() -> { + DecommissioningFailedException ex = expectThrows( + DecommissioningFailedException.class, + () -> client().execute(DecommissionAction.INSTANCE, decommissionRequest).actionGet() + ); + assertTrue( + ex.getMessage() + .contains("no weights are set to the attribute. Please set appropriate weights before triggering decommission action") + ); + }); + + logger.info("--> setting shard routing weights for weighted round robin"); + Map weights = Map.of("a", 1.0, "b", 1.0, "c", 1.0); + WeightedRouting weightedRouting = new WeightedRouting("zone", weights); + + ClusterPutWeightedRoutingResponse weightedRoutingResponse = client().admin() + .cluster() + .prepareWeightedRouting() + .setWeightedRouting(weightedRouting) + .get(); + assertTrue(weightedRoutingResponse.isAcknowledged()); + + assertBusy(() -> { + DecommissioningFailedException ex = expectThrows( + DecommissioningFailedException.class, + () -> client().execute(DecommissionAction.INSTANCE, decommissionRequest).actionGet() + ); + assertTrue(ex.getMessage().contains("weight for decommissioned attribute is expected to be [0.0] but found [1.0]")); + }); + } + + public void testDecommissionFailedWithOnlyOneAttributeValue() throws Exception { + Settings commonSettings = Settings.builder() + .put("cluster.routing.allocation.awareness.attributes", "zone") + .put("cluster.routing.allocation.awareness.force.zone.values", "a") + .build(); + // Start 3 cluster manager eligible nodes + internalCluster().startClusterManagerOnlyNodes(3, Settings.builder().put(commonSettings).put("node.attr.zone", "a").build()); + // start 3 data nodes + internalCluster().startDataOnlyNodes(3, Settings.builder().put(commonSettings).put("node.attr.zone", "a").build()); + ensureStableCluster(6); + ClusterHealthResponse health = client().admin() + .cluster() + .prepareHealth() + .setWaitForEvents(Priority.LANGUID) + .setWaitForGreenStatus() + .setWaitForNodes(Integer.toString(6)) + .execute() + .actionGet(); + assertFalse(health.isTimedOut()); + + logger.info("--> setting shard routing weights"); + Map weights = Map.of("a", 0.0); + WeightedRouting weightedRouting = new WeightedRouting("zone", weights); + + ClusterPutWeightedRoutingResponse weightedRoutingResponse = client().admin() + .cluster() + .prepareWeightedRouting() + .setWeightedRouting(weightedRouting) + .get(); + assertTrue(weightedRoutingResponse.isAcknowledged()); + + // prepare request to attempt to decommission zone 'a' + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "a"); + DecommissionRequest decommissionRequest = new DecommissionRequest(decommissionAttribute); + decommissionRequest.setNoDelay(true); + + // since there is just one zone present in the cluster, and on initiating decommission for that zone, + // although all the nodes would be added to voting config exclusion list, but those nodes won't be able to + // abdicate themselves as we wouldn't have any other leader eligible node which would be declare itself cluster manager + // and hence due to which the leader won't get abdicated and decommission request should eventually fail. + // And in this case, to ensure decommission request doesn't leave mutating change in the cluster, we ensure + // that no exclusion is set to the cluster and state for decommission is marked as FAILED + Logger clusterLogger = LogManager.getLogger(DecommissionService.class); + MockLogAppender mockLogAppender = MockLogAppender.createForLoggers(clusterLogger); + mockLogAppender.addExpectation( + new MockLogAppender.SeenEventExpectation( + "test", + DecommissionService.class.getCanonicalName(), + Level.ERROR, + "failure in removing to-be-decommissioned cluster manager eligible nodes" + ) + ); + + assertBusy(() -> { + OpenSearchTimeoutException ex = expectThrows( + OpenSearchTimeoutException.class, + () -> client().execute(DecommissionAction.INSTANCE, decommissionRequest).actionGet() + ); + assertTrue(ex.getMessage().contains("timed out waiting for voting config exclusions")); + }); + + ClusterService leaderClusterService = internalCluster().getInstance( + ClusterService.class, + internalCluster().getClusterManagerName() + ); + ClusterStateObserver clusterStateObserver = new ClusterStateObserver( + leaderClusterService, + null, + logger, + client(internalCluster().getClusterManagerName()).threadPool().getThreadContext() + ); + CountDownLatch expectedStateLatch = new CountDownLatch(1); + + ClusterState currentState = internalCluster().clusterService().state(); + if (currentState.getVotingConfigExclusions().isEmpty()) { + logger.info("exclusion already cleared"); + expectedStateLatch.countDown(); + } else { + clusterStateObserver.waitForNextChange(new WaitForClearVotingConfigExclusion(expectedStateLatch)); + } + // if the below condition is passed, then we are sure exclusion is cleared + assertTrue(expectedStateLatch.await(30, TimeUnit.SECONDS)); + + expectedStateLatch = new CountDownLatch(1); + currentState = internalCluster().clusterService().state(); + DecommissionAttributeMetadata decommissionAttributeMetadata = currentState.metadata().decommissionAttributeMetadata(); + if (decommissionAttributeMetadata != null && decommissionAttributeMetadata.status().equals(DecommissionStatus.FAILED)) { + logger.info("decommission status has already turned false"); + expectedStateLatch.countDown(); + } else { + clusterStateObserver.waitForNextChange(new WaitForFailedDecommissionState(expectedStateLatch)); + } + + // if the below condition is passed, then we are sure current decommission status is marked FAILED + assertTrue(expectedStateLatch.await(30, TimeUnit.SECONDS)); + mockLogAppender.assertAllExpectationsMatched(); + + // ensure all nodes are part of cluster + ensureStableCluster(6, TimeValue.timeValueMinutes(2)); + } + + private static class WaitForFailedDecommissionState implements ClusterStateObserver.Listener { + + final CountDownLatch doneLatch; + + WaitForFailedDecommissionState(CountDownLatch latch) { + this.doneLatch = latch; + } + + @Override + public void onNewClusterState(ClusterState state) { + DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().decommissionAttributeMetadata(); + if (decommissionAttributeMetadata != null && decommissionAttributeMetadata.status().equals(DecommissionStatus.FAILED)) { + doneLatch.countDown(); + } + } + + @Override + public void onClusterServiceClose() { + throw new AssertionError("unexpected close"); + } + + @Override + public void onTimeout(TimeValue timeout) { + throw new AssertionError("unexpected timeout"); + } + } + + private static class WaitForClearVotingConfigExclusion implements ClusterStateObserver.Listener { + + final CountDownLatch doneLatch; + + WaitForClearVotingConfigExclusion(CountDownLatch latch) { + this.doneLatch = latch; + } + + @Override + public void onNewClusterState(ClusterState state) { + if (state.getVotingConfigExclusions().isEmpty()) { + doneLatch.countDown(); + } + } + + @Override + public void onClusterServiceClose() { + throw new AssertionError("unexpected close"); + } + + @Override + public void onTimeout(TimeValue timeout) { + throw new AssertionError("unexpected timeout"); + } + } +} diff --git a/server/src/main/java/org/opensearch/OpenSearchException.java b/server/src/main/java/org/opensearch/OpenSearchException.java index 1641f00a82f5c..76a24cf88970c 100644 --- a/server/src/main/java/org/opensearch/OpenSearchException.java +++ b/server/src/main/java/org/opensearch/OpenSearchException.java @@ -69,6 +69,7 @@ import static java.util.Collections.unmodifiableMap; import static org.opensearch.Version.V_2_1_0; import static org.opensearch.Version.V_2_3_0; +import static org.opensearch.Version.V_2_4_0; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_UUID_NA_VALUE; import static org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken; import static org.opensearch.common.xcontent.XContentParserUtils.ensureFieldName; @@ -1608,6 +1609,18 @@ private enum OpenSearchExceptionHandle { org.opensearch.index.shard.PrimaryShardClosedException::new, 162, V_2_3_0 + ), + DECOMMISSIONING_FAILED_EXCEPTION( + org.opensearch.cluster.decommission.DecommissioningFailedException.class, + org.opensearch.cluster.decommission.DecommissioningFailedException::new, + 163, + V_2_4_0 + ), + NODE_DECOMMISSIONED_EXCEPTION( + org.opensearch.cluster.decommission.NodeDecommissionedException.class, + org.opensearch.cluster.decommission.NodeDecommissionedException::new, + 164, + V_2_4_0 ); final Class exceptionClass; diff --git a/server/src/main/java/org/opensearch/action/ActionModule.java b/server/src/main/java/org/opensearch/action/ActionModule.java index 89eb82de97da8..841085946bc13 100644 --- a/server/src/main/java/org/opensearch/action/ActionModule.java +++ b/server/src/main/java/org/opensearch/action/ActionModule.java @@ -40,6 +40,12 @@ import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsAction; import org.opensearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsAction; import org.opensearch.action.admin.cluster.configuration.TransportClearVotingConfigExclusionsAction; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateAction; +import org.opensearch.action.admin.cluster.decommission.awareness.get.TransportGetDecommissionStateAction; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateAction; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.TransportDeleteDecommissionStateAction; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionAction; +import org.opensearch.action.admin.cluster.decommission.awareness.put.TransportDecommissionAction; import org.opensearch.action.admin.cluster.health.ClusterHealthAction; import org.opensearch.action.admin.cluster.health.TransportClusterHealthAction; import org.opensearch.action.admin.cluster.node.hotthreads.NodesHotThreadsAction; @@ -309,9 +315,11 @@ import org.opensearch.rest.action.admin.cluster.RestClusterStatsAction; import org.opensearch.rest.action.admin.cluster.RestClusterUpdateSettingsAction; import org.opensearch.rest.action.admin.cluster.RestCreateSnapshotAction; +import org.opensearch.rest.action.admin.cluster.RestDeleteDecommissionStateAction; import org.opensearch.rest.action.admin.cluster.RestDeleteRepositoryAction; import org.opensearch.rest.action.admin.cluster.RestDeleteSnapshotAction; import org.opensearch.rest.action.admin.cluster.RestDeleteStoredScriptAction; +import org.opensearch.rest.action.admin.cluster.RestGetDecommissionStateAction; import org.opensearch.rest.action.admin.cluster.RestGetRepositoriesAction; import org.opensearch.rest.action.admin.cluster.RestGetScriptContextAction; import org.opensearch.rest.action.admin.cluster.RestGetScriptLanguageAction; @@ -324,6 +332,7 @@ import org.opensearch.rest.action.admin.cluster.RestNodesStatsAction; import org.opensearch.rest.action.admin.cluster.RestNodesUsageAction; import org.opensearch.rest.action.admin.cluster.RestPendingClusterTasksAction; +import org.opensearch.rest.action.admin.cluster.RestDecommissionAction; import org.opensearch.rest.action.admin.cluster.RestPutRepositoryAction; import org.opensearch.rest.action.admin.cluster.RestPutStoredScriptAction; import org.opensearch.rest.action.admin.cluster.RestReloadSecureSettingsAction; @@ -694,6 +703,10 @@ public void reg actions.register(PitSegmentsAction.INSTANCE, TransportPitSegmentsAction.class); actions.register(GetAllPitsAction.INSTANCE, TransportGetAllPitsAction.class); + // Decommission actions + actions.register(DecommissionAction.INSTANCE, TransportDecommissionAction.class); + actions.register(GetDecommissionStateAction.INSTANCE, TransportGetDecommissionStateAction.class); + actions.register(DeleteDecommissionStateAction.INSTANCE, TransportDeleteDecommissionStateAction.class); return unmodifiableMap(actions.getRegistry()); } @@ -875,6 +888,7 @@ public void initRestHandlers(Supplier nodesInCluster) { registerHandler.accept(new RestDeletePitAction()); registerHandler.accept(new RestGetAllPitsAction(nodesInCluster)); registerHandler.accept(new RestPitSegmentsAction(nodesInCluster)); + registerHandler.accept(new RestDeleteDecommissionStateAction()); for (ActionPlugin plugin : actionPlugins) { for (RestHandler handler : plugin.getRestHandlers( @@ -890,6 +904,8 @@ public void initRestHandlers(Supplier nodesInCluster) { } } registerHandler.accept(new RestCatAction(catActions)); + registerHandler.accept(new RestDecommissionAction()); + registerHandler.accept(new RestGetDecommissionStateAction()); // Remote Store APIs if (FeatureFlags.isEnabled(FeatureFlags.REMOTE_STORE)) { diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateAction.java new file mode 100644 index 0000000000000..3aff666d388be --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateAction.java @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.delete; + +import org.opensearch.action.ActionType; + +/** + * Delete decommission state action. + * + * @opensearch.internal + */ +public class DeleteDecommissionStateAction extends ActionType { + public static final DeleteDecommissionStateAction INSTANCE = new DeleteDecommissionStateAction(); + public static final String NAME = "cluster:admin/decommission/awareness/delete"; + + private DeleteDecommissionStateAction() { + super(NAME, DeleteDecommissionStateResponse::new); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateRequest.java new file mode 100644 index 0000000000000..205be54a36c33 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateRequest.java @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.delete; + +import org.opensearch.action.ActionRequestValidationException; +import org.opensearch.action.support.clustermanager.ClusterManagerNodeRequest; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; + +import java.io.IOException; + +/** + * Request for deleting decommission request. + * + * @opensearch.internal + */ +public class DeleteDecommissionStateRequest extends ClusterManagerNodeRequest { + + public DeleteDecommissionStateRequest() {} + + public DeleteDecommissionStateRequest(StreamInput in) throws IOException { + super(in); + } + + @Override + public ActionRequestValidationException validate() { + return null; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateRequestBuilder.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateRequestBuilder.java new file mode 100644 index 0000000000000..08f194c53f18e --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateRequestBuilder.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.delete; + +import org.opensearch.action.support.clustermanager.ClusterManagerNodeOperationRequestBuilder; +import org.opensearch.client.OpenSearchClient; + +/** + * Builder for Delete decommission request. + * + * @opensearch.internal + */ +public class DeleteDecommissionStateRequestBuilder extends ClusterManagerNodeOperationRequestBuilder< + DeleteDecommissionStateRequest, + DeleteDecommissionStateResponse, + DeleteDecommissionStateRequestBuilder> { + + public DeleteDecommissionStateRequestBuilder(OpenSearchClient client, DeleteDecommissionStateAction action) { + super(client, action, new DeleteDecommissionStateRequest()); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateResponse.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateResponse.java new file mode 100644 index 0000000000000..2ff634966586a --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateResponse.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.delete; + +import org.opensearch.action.support.master.AcknowledgedResponse; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; + +import java.io.IOException; + +/** + * Response returned after deletion of decommission request. + * + * @opensearch.internal + */ +public class DeleteDecommissionStateResponse extends AcknowledgedResponse { + + public DeleteDecommissionStateResponse(StreamInput in) throws IOException { + super(in); + } + + public DeleteDecommissionStateResponse(boolean acknowledged) { + super(acknowledged); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/TransportDeleteDecommissionStateAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/TransportDeleteDecommissionStateAction.java new file mode 100644 index 0000000000000..7d8f4bdd8304c --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/TransportDeleteDecommissionStateAction.java @@ -0,0 +1,86 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.delete; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.action.ActionListener; +import org.opensearch.action.support.ActionFilters; +import org.opensearch.action.support.clustermanager.TransportClusterManagerNodeAction; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.block.ClusterBlockException; +import org.opensearch.cluster.block.ClusterBlockLevel; +import org.opensearch.cluster.decommission.DecommissionService; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.inject.Inject; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.io.IOException; + +/** + * Transport action for delete decommission. + * + * @opensearch.internal + */ +public class TransportDeleteDecommissionStateAction extends TransportClusterManagerNodeAction< + DeleteDecommissionStateRequest, + DeleteDecommissionStateResponse> { + + private static final Logger logger = LogManager.getLogger(TransportDeleteDecommissionStateAction.class); + private final DecommissionService decommissionService; + + @Inject + public TransportDeleteDecommissionStateAction( + TransportService transportService, + ClusterService clusterService, + ThreadPool threadPool, + ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver, + DecommissionService decommissionService + ) { + super( + DeleteDecommissionStateAction.NAME, + transportService, + clusterService, + threadPool, + actionFilters, + DeleteDecommissionStateRequest::new, + indexNameExpressionResolver + ); + this.decommissionService = decommissionService; + } + + @Override + protected String executor() { + return ThreadPool.Names.SAME; + } + + @Override + protected DeleteDecommissionStateResponse read(StreamInput in) throws IOException { + return new DeleteDecommissionStateResponse(in); + } + + @Override + protected ClusterBlockException checkBlock(DeleteDecommissionStateRequest request, ClusterState state) { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE); + } + + @Override + protected void clusterManagerOperation( + DeleteDecommissionStateRequest request, + ClusterState state, + ActionListener listener + ) { + logger.info("Received delete decommission Request [{}]", request); + this.decommissionService.startRecommissionAction(listener); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/package-info.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/package-info.java new file mode 100644 index 0000000000000..c2cfc03baa45e --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/package-info.java @@ -0,0 +1,10 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** Delete decommission transport handlers. */ +package org.opensearch.action.admin.cluster.decommission.awareness.delete; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateAction.java new file mode 100644 index 0000000000000..72fd1a26cb860 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateAction.java @@ -0,0 +1,26 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.get; + +import org.opensearch.action.ActionType; + +/** + * Get decommission action + * + * @opensearch.internal + */ +public class GetDecommissionStateAction extends ActionType { + + public static final GetDecommissionStateAction INSTANCE = new GetDecommissionStateAction(); + public static final String NAME = "cluster:admin/decommission/awareness/get"; + + private GetDecommissionStateAction() { + super(NAME, GetDecommissionStateResponse::new); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequest.java new file mode 100644 index 0000000000000..1f301aa2b5273 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequest.java @@ -0,0 +1,80 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.get; + +import org.opensearch.action.ActionRequestValidationException; +import org.opensearch.action.support.clustermanager.ClusterManagerNodeReadRequest; +import org.opensearch.common.Strings; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; + +import java.io.IOException; + +import static org.opensearch.action.ValidateActions.addValidationError; + +/** + * Get Decommissioned attribute request + * + * @opensearch.internal + */ +public class GetDecommissionStateRequest extends ClusterManagerNodeReadRequest { + + private String attributeName; + + public GetDecommissionStateRequest() {} + + /** + * Constructs a new get decommission state request with given attribute name + * + * @param attributeName name of the attribute + */ + public GetDecommissionStateRequest(String attributeName) { + this.attributeName = attributeName; + } + + public GetDecommissionStateRequest(StreamInput in) throws IOException { + super(in); + attributeName = in.readString(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(attributeName); + } + + @Override + public ActionRequestValidationException validate() { + ActionRequestValidationException validationException = null; + if (attributeName == null || Strings.isEmpty(attributeName)) { + validationException = addValidationError("attribute name is missing", validationException); + } + return validationException; + } + + /** + * Sets attribute name + * + * @param attributeName attribute name + * @return this request + */ + public GetDecommissionStateRequest attributeName(String attributeName) { + this.attributeName = attributeName; + return this; + } + + /** + * Returns attribute name + * + * @return attributeName name of attribute + */ + public String attributeName() { + return this.attributeName; + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequestBuilder.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequestBuilder.java new file mode 100644 index 0000000000000..e766e9c674ff7 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequestBuilder.java @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.get; + +import org.opensearch.action.support.clustermanager.ClusterManagerNodeReadOperationRequestBuilder; +import org.opensearch.client.OpenSearchClient; + +/** + * Get decommission request builder + * + * @opensearch.internal + */ +public class GetDecommissionStateRequestBuilder extends ClusterManagerNodeReadOperationRequestBuilder< + GetDecommissionStateRequest, + GetDecommissionStateResponse, + GetDecommissionStateRequestBuilder> { + + /** + * Creates new get decommissioned attributes request builder + */ + public GetDecommissionStateRequestBuilder(OpenSearchClient client, GetDecommissionStateAction action) { + super(client, action, new GetDecommissionStateRequest()); + } + + /** + * @param attributeName name of attribute + * @return current object + */ + public GetDecommissionStateRequestBuilder setAttributeName(String attributeName) { + request.attributeName(attributeName); + return this; + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateResponse.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateResponse.java new file mode 100644 index 0000000000000..ec0bd7cf7e7eb --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateResponse.java @@ -0,0 +1,121 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.get; + +import org.opensearch.OpenSearchParseException; +import org.opensearch.action.ActionResponse; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.xcontent.ToXContentObject; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.util.Locale; +import java.util.Objects; + +import static org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken; + +/** + * Response for decommission status + * + * @opensearch.internal + */ +public class GetDecommissionStateResponse extends ActionResponse implements ToXContentObject { + + private String attributeValue; + private DecommissionStatus status; + + GetDecommissionStateResponse() { + this(null, null); + } + + GetDecommissionStateResponse(String attributeValue, DecommissionStatus status) { + this.attributeValue = attributeValue; + this.status = status; + } + + GetDecommissionStateResponse(StreamInput in) throws IOException { + // read decommissioned attribute and status only if it is present + if (in.readBoolean()) { + this.attributeValue = in.readString(); + this.status = DecommissionStatus.fromString(in.readString()); + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + // if decommissioned attribute value is null or status is null then mark its absence + if (attributeValue == null || status == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + out.writeString(attributeValue); + out.writeString(status.status()); + } + } + + public String getAttributeValue() { + return attributeValue; + } + + public DecommissionStatus getDecommissionStatus() { + return status; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + if (attributeValue != null && status != null) { + builder.field(attributeValue, status); + } + builder.endObject(); + return builder; + } + + public static GetDecommissionStateResponse fromXContent(XContentParser parser) throws IOException { + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser); + XContentParser.Token token; + String attributeValue = null; + DecommissionStatus status = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + attributeValue = parser.currentName(); + if (parser.nextToken() != XContentParser.Token.VALUE_STRING) { + throw new OpenSearchParseException("failed to parse status of decommissioning, expected string but found unknown type"); + } + status = DecommissionStatus.fromString(parser.text().toLowerCase(Locale.ROOT)); + } else { + throw new OpenSearchParseException( + "failed to parse decommission state, expected [{}] but found [{}]", + XContentParser.Token.FIELD_NAME, + token + ); + } + } + return new GetDecommissionStateResponse(attributeValue, status); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + GetDecommissionStateResponse that = (GetDecommissionStateResponse) o; + if (!Objects.equals(attributeValue, that.attributeValue)) { + return false; + } + return Objects.equals(status, that.status); + } + + @Override + public int hashCode() { + return Objects.hash(attributeValue, status); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/TransportGetDecommissionStateAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/TransportGetDecommissionStateAction.java new file mode 100644 index 0000000000000..d811ab8cf6948 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/TransportGetDecommissionStateAction.java @@ -0,0 +1,89 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.get; + +import org.opensearch.action.ActionListener; +import org.opensearch.action.support.ActionFilters; +import org.opensearch.action.support.clustermanager.TransportClusterManagerNodeReadAction; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.block.ClusterBlockException; +import org.opensearch.cluster.block.ClusterBlockLevel; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.inject.Inject; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.io.IOException; + +/** + * Transport action for getting decommission status + * + * @opensearch.internal + */ +public class TransportGetDecommissionStateAction extends TransportClusterManagerNodeReadAction< + GetDecommissionStateRequest, + GetDecommissionStateResponse> { + + @Inject + public TransportGetDecommissionStateAction( + TransportService transportService, + ClusterService clusterService, + ThreadPool threadPool, + ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver + ) { + super( + GetDecommissionStateAction.NAME, + transportService, + clusterService, + threadPool, + actionFilters, + GetDecommissionStateRequest::new, + indexNameExpressionResolver + ); + } + + @Override + protected String executor() { + return ThreadPool.Names.SAME; + } + + @Override + protected GetDecommissionStateResponse read(StreamInput in) throws IOException { + return new GetDecommissionStateResponse(in); + } + + @Override + protected void clusterManagerOperation( + GetDecommissionStateRequest request, + ClusterState state, + ActionListener listener + ) throws Exception { + DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().decommissionAttributeMetadata(); + if (decommissionAttributeMetadata != null + && request.attributeName().equals(decommissionAttributeMetadata.decommissionAttribute().attributeName())) { + listener.onResponse( + new GetDecommissionStateResponse( + decommissionAttributeMetadata.decommissionAttribute().attributeValue(), + decommissionAttributeMetadata.status() + ) + ); + } else { + listener.onResponse(new GetDecommissionStateResponse()); + } + } + + @Override + protected ClusterBlockException checkBlock(GetDecommissionStateRequest request, ClusterState state) { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_READ); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/package-info.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/package-info.java new file mode 100644 index 0000000000000..5b88e91cf4f9d --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/package-info.java @@ -0,0 +1,10 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** Transport handlers for getting status of decommission request */ +package org.opensearch.action.admin.cluster.decommission.awareness.get; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/package-info.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/package-info.java new file mode 100644 index 0000000000000..e1260e638c91d --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/package-info.java @@ -0,0 +1,10 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** Decommission transport handlers. */ +package org.opensearch.action.admin.cluster.decommission.awareness; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionAction.java new file mode 100644 index 0000000000000..56678650f6e35 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionAction.java @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.put; + +import org.opensearch.action.ActionType; + +/** + * Register decommission action + * + * @opensearch.internal + */ +public final class DecommissionAction extends ActionType { + public static final DecommissionAction INSTANCE = new DecommissionAction(); + public static final String NAME = "cluster:admin/decommission/awareness/put"; + + private DecommissionAction() { + super(NAME, DecommissionResponse::new); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequest.java new file mode 100644 index 0000000000000..7ec2cea769069 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequest.java @@ -0,0 +1,127 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.put; + +import org.opensearch.action.ActionRequestValidationException; +import org.opensearch.action.support.clustermanager.ClusterManagerNodeRequest; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.common.Strings; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.unit.TimeValue; + +import java.io.IOException; + +import static org.opensearch.action.ValidateActions.addValidationError; + +/** + * Register decommission request. + *

+ * Registers a decommission request with decommission attribute and timeout + * + * @opensearch.internal + */ +public class DecommissionRequest extends ClusterManagerNodeRequest { + + public static final TimeValue DEFAULT_NODE_DRAINING_TIMEOUT = TimeValue.timeValueSeconds(120); + + private DecommissionAttribute decommissionAttribute; + + private TimeValue delayTimeout = DEFAULT_NODE_DRAINING_TIMEOUT; + + // holder for no_delay param. To avoid draining time timeout. + private boolean noDelay = false; + + public DecommissionRequest() {} + + public DecommissionRequest(DecommissionAttribute decommissionAttribute) { + this.decommissionAttribute = decommissionAttribute; + } + + public DecommissionRequest(StreamInput in) throws IOException { + super(in); + decommissionAttribute = new DecommissionAttribute(in); + this.delayTimeout = in.readTimeValue(); + this.noDelay = in.readBoolean(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + decommissionAttribute.writeTo(out); + out.writeTimeValue(delayTimeout); + out.writeBoolean(noDelay); + } + + /** + * Sets decommission attribute for decommission request + * + * @param decommissionAttribute attribute key-value that needs to be decommissioned + * @return this request + */ + public DecommissionRequest setDecommissionAttribute(DecommissionAttribute decommissionAttribute) { + this.decommissionAttribute = decommissionAttribute; + return this; + } + + /** + * @return Returns the decommission attribute key-value + */ + public DecommissionAttribute getDecommissionAttribute() { + return this.decommissionAttribute; + } + + public void setDelayTimeout(TimeValue delayTimeout) { + this.delayTimeout = delayTimeout; + } + + public TimeValue getDelayTimeout() { + return this.delayTimeout; + } + + public void setNoDelay(boolean noDelay) { + if (noDelay) { + this.delayTimeout = TimeValue.ZERO; + } + this.noDelay = noDelay; + } + + public boolean isNoDelay() { + return noDelay; + } + + @Override + public ActionRequestValidationException validate() { + ActionRequestValidationException validationException = null; + if (decommissionAttribute == null) { + validationException = addValidationError("decommission attribute is missing", validationException); + return validationException; + } + if (decommissionAttribute.attributeName() == null || Strings.isEmpty(decommissionAttribute.attributeName())) { + validationException = addValidationError("attribute name is missing", validationException); + } + if (decommissionAttribute.attributeValue() == null || Strings.isEmpty(decommissionAttribute.attributeValue())) { + validationException = addValidationError("attribute value is missing", validationException); + } + // This validation should not fail since we are not allowing delay timeout to be set externally. + // Still keeping it for double check. + if (noDelay && delayTimeout.getSeconds() > 0) { + final String validationMessage = "Invalid decommission request. no_delay is true and delay_timeout is set to " + + delayTimeout.getSeconds() + + "] Seconds"; + validationException = addValidationError(validationMessage, validationException); + } + return validationException; + } + + @Override + public String toString() { + return "DecommissionRequest{" + "decommissionAttribute=" + decommissionAttribute + '}'; + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequestBuilder.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequestBuilder.java new file mode 100644 index 0000000000000..1c7a03fa10e76 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequestBuilder.java @@ -0,0 +1,49 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.put; + +import org.opensearch.action.ActionType; +import org.opensearch.action.support.clustermanager.ClusterManagerNodeOperationRequestBuilder; +import org.opensearch.client.OpenSearchClient; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.common.unit.TimeValue; + +/** + * Register decommission request builder + * + * @opensearch.internal + */ +public class DecommissionRequestBuilder extends ClusterManagerNodeOperationRequestBuilder< + DecommissionRequest, + DecommissionResponse, + DecommissionRequestBuilder> { + + public DecommissionRequestBuilder(OpenSearchClient client, ActionType action, DecommissionRequest request) { + super(client, action, request); + } + + /** + * @param decommissionAttribute decommission attribute + * @return current object + */ + public DecommissionRequestBuilder setDecommissionedAttribute(DecommissionAttribute decommissionAttribute) { + request.setDecommissionAttribute(decommissionAttribute); + return this; + } + + public DecommissionRequestBuilder setDelayTimeOut(TimeValue delayTimeOut) { + request.setDelayTimeout(delayTimeOut); + return this; + } + + public DecommissionRequestBuilder setNoDelay(boolean noDelay) { + request.setNoDelay(noDelay); + return this; + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionResponse.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionResponse.java new file mode 100644 index 0000000000000..499f403c8cd64 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionResponse.java @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.put; + +import org.opensearch.action.support.master.AcknowledgedResponse; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.xcontent.ToXContentObject; + +import java.io.IOException; + +/** + * Response for decommission request + * + * @opensearch.internal + */ +public class DecommissionResponse extends AcknowledgedResponse implements ToXContentObject { + + public DecommissionResponse(StreamInput in) throws IOException { + super(in); + } + + public DecommissionResponse(boolean acknowledged) { + super(acknowledged); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/TransportDecommissionAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/TransportDecommissionAction.java new file mode 100644 index 0000000000000..6f4e3cf82d2ce --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/TransportDecommissionAction.java @@ -0,0 +1,81 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.put; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.action.ActionListener; +import org.opensearch.action.support.ActionFilters; +import org.opensearch.action.support.clustermanager.TransportClusterManagerNodeAction; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.block.ClusterBlockException; +import org.opensearch.cluster.block.ClusterBlockLevel; +import org.opensearch.cluster.decommission.DecommissionService; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.inject.Inject; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.io.IOException; + +/** + * Transport action for registering decommission + * + * @opensearch.internal + */ +public class TransportDecommissionAction extends TransportClusterManagerNodeAction { + + private static final Logger logger = LogManager.getLogger(TransportDecommissionAction.class); + private final DecommissionService decommissionService; + + @Inject + public TransportDecommissionAction( + TransportService transportService, + ClusterService clusterService, + DecommissionService decommissionService, + ThreadPool threadPool, + ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver + ) { + super( + DecommissionAction.NAME, + transportService, + clusterService, + threadPool, + actionFilters, + DecommissionRequest::new, + indexNameExpressionResolver + ); + this.decommissionService = decommissionService; + } + + @Override + protected String executor() { + return ThreadPool.Names.SAME; + } + + @Override + protected DecommissionResponse read(StreamInput in) throws IOException { + return new DecommissionResponse(in); + } + + @Override + protected ClusterBlockException checkBlock(DecommissionRequest request, ClusterState state) { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE); + } + + @Override + protected void clusterManagerOperation(DecommissionRequest request, ClusterState state, ActionListener listener) + throws Exception { + logger.info("starting awareness attribute [{}] decommissioning", request.getDecommissionAttribute().toString()); + decommissionService.startDecommissionAction(request, listener); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/package-info.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/package-info.java new file mode 100644 index 0000000000000..c361f4b95a484 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/package-info.java @@ -0,0 +1,10 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** Transport handlers for putting a new decommission request */ +package org.opensearch.action.admin.cluster.decommission.awareness.put; diff --git a/server/src/main/java/org/opensearch/client/ClusterAdminClient.java b/server/src/main/java/org/opensearch/client/ClusterAdminClient.java index 47fb5bd8d4600..4ab438ec064f1 100644 --- a/server/src/main/java/org/opensearch/client/ClusterAdminClient.java +++ b/server/src/main/java/org/opensearch/client/ClusterAdminClient.java @@ -37,6 +37,15 @@ import org.opensearch.action.admin.cluster.allocation.ClusterAllocationExplainRequest; import org.opensearch.action.admin.cluster.allocation.ClusterAllocationExplainRequestBuilder; import org.opensearch.action.admin.cluster.allocation.ClusterAllocationExplainResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequestBuilder; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateRequestBuilder; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequestBuilder; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionResponse; import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; import org.opensearch.action.admin.cluster.health.ClusterHealthRequestBuilder; import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; @@ -846,4 +855,48 @@ public interface ClusterAdminClient extends OpenSearchClient { */ ClusterDeleteWeightedRoutingRequestBuilder prepareDeleteWeightedRouting(); + /** + * Decommission awareness attribute + */ + ActionFuture decommission(DecommissionRequest request); + + /** + * Decommission awareness attribute + */ + void decommission(DecommissionRequest request, ActionListener listener); + + /** + * Decommission awareness attribute + */ + DecommissionRequestBuilder prepareDecommission(DecommissionRequest request); + + /** + * Get Decommissioned attribute + */ + ActionFuture getDecommissionState(GetDecommissionStateRequest request); + + /** + * Get Decommissioned attribute + */ + void getDecommissionState(GetDecommissionStateRequest request, ActionListener listener); + + /** + * Get Decommissioned attribute + */ + GetDecommissionStateRequestBuilder prepareGetDecommissionState(); + + /** + * Deletes the decommission metadata. + */ + ActionFuture deleteDecommissionState(DeleteDecommissionStateRequest request); + + /** + * Deletes the decommission metadata. + */ + void deleteDecommissionState(DeleteDecommissionStateRequest request, ActionListener listener); + + /** + * Deletes the decommission metadata. + */ + DeleteDecommissionStateRequestBuilder prepareDeleteDecommissionRequest(); } diff --git a/server/src/main/java/org/opensearch/client/Requests.java b/server/src/main/java/org/opensearch/client/Requests.java index 107cfe5f04d06..21f2a2d906602 100644 --- a/server/src/main/java/org/opensearch/client/Requests.java +++ b/server/src/main/java/org/opensearch/client/Requests.java @@ -32,6 +32,9 @@ package org.opensearch.client; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest; import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; import org.opensearch.action.admin.cluster.node.info.NodesInfoRequest; import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest; @@ -578,4 +581,29 @@ public static ClusterGetWeightedRoutingRequest getWeightedRoutingRequest(String public static ClusterDeleteWeightedRoutingRequest deleteWeightedRoutingRequest() { return new ClusterDeleteWeightedRoutingRequest(); } + + /** + * Creates a new decommission request. + * + * @return returns put decommission request + */ + public static DecommissionRequest decommissionRequest() { + return new DecommissionRequest(); + } + + /** + * Get decommissioned attribute from metadata + * + * @return returns get decommission request + */ + public static GetDecommissionStateRequest getDecommissionStateRequest() { + return new GetDecommissionStateRequest(); + } + + /** + * Creates a new delete decommission request. + */ + public static DeleteDecommissionStateRequest deleteDecommissionStateRequest() { + return new DeleteDecommissionStateRequest(); + } } diff --git a/server/src/main/java/org/opensearch/client/support/AbstractClient.java b/server/src/main/java/org/opensearch/client/support/AbstractClient.java index 80d04156f475b..828ca5f8083ee 100644 --- a/server/src/main/java/org/opensearch/client/support/AbstractClient.java +++ b/server/src/main/java/org/opensearch/client/support/AbstractClient.java @@ -43,6 +43,18 @@ import org.opensearch.action.admin.cluster.allocation.ClusterAllocationExplainRequest; import org.opensearch.action.admin.cluster.allocation.ClusterAllocationExplainRequestBuilder; import org.opensearch.action.admin.cluster.allocation.ClusterAllocationExplainResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateAction; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequestBuilder; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateAction; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateRequestBuilder; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionAction; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequestBuilder; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionResponse; import org.opensearch.action.admin.cluster.health.ClusterHealthAction; import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; import org.opensearch.action.admin.cluster.health.ClusterHealthRequestBuilder; @@ -1388,6 +1400,54 @@ public DeleteStoredScriptRequestBuilder prepareDeleteStoredScript() { public DeleteStoredScriptRequestBuilder prepareDeleteStoredScript(String id) { return prepareDeleteStoredScript().setId(id); } + + @Override + public ActionFuture decommission(DecommissionRequest request) { + return execute(DecommissionAction.INSTANCE, request); + } + + @Override + public void decommission(DecommissionRequest request, ActionListener listener) { + execute(DecommissionAction.INSTANCE, request, listener); + } + + @Override + public DecommissionRequestBuilder prepareDecommission(DecommissionRequest request) { + return new DecommissionRequestBuilder(this, DecommissionAction.INSTANCE, request); + } + + @Override + public ActionFuture getDecommissionState(GetDecommissionStateRequest request) { + return execute(GetDecommissionStateAction.INSTANCE, request); + } + + @Override + public void getDecommissionState(GetDecommissionStateRequest request, ActionListener listener) { + execute(GetDecommissionStateAction.INSTANCE, request, listener); + } + + @Override + public GetDecommissionStateRequestBuilder prepareGetDecommissionState() { + return new GetDecommissionStateRequestBuilder(this, GetDecommissionStateAction.INSTANCE); + } + + @Override + public ActionFuture deleteDecommissionState(DeleteDecommissionStateRequest request) { + return execute(DeleteDecommissionStateAction.INSTANCE, request); + } + + @Override + public void deleteDecommissionState( + DeleteDecommissionStateRequest request, + ActionListener listener + ) { + execute(DeleteDecommissionStateAction.INSTANCE, request, listener); + } + + @Override + public DeleteDecommissionStateRequestBuilder prepareDeleteDecommissionRequest() { + return new DeleteDecommissionStateRequestBuilder(this, DeleteDecommissionStateAction.INSTANCE); + } } static class IndicesAdmin implements IndicesAdminClient { diff --git a/server/src/main/java/org/opensearch/cluster/ClusterModule.java b/server/src/main/java/org/opensearch/cluster/ClusterModule.java index 85f49b19bc967..ee3ef0cbc4d26 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterModule.java @@ -35,6 +35,7 @@ import org.opensearch.cluster.action.index.MappingUpdatedAction; import org.opensearch.cluster.action.index.NodeMappingRefreshAction; import org.opensearch.cluster.action.shard.ShardStateAction; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.ComponentTemplateMetadata; import org.opensearch.cluster.metadata.ComposableIndexTemplateMetadata; import org.opensearch.cluster.metadata.DataStreamMetadata; @@ -194,6 +195,12 @@ public static List getNamedWriteables() { ); registerMetadataCustom(entries, DataStreamMetadata.TYPE, DataStreamMetadata::new, DataStreamMetadata::readDiffFrom); registerMetadataCustom(entries, WeightedRoutingMetadata.TYPE, WeightedRoutingMetadata::new, WeightedRoutingMetadata::readDiffFrom); + registerMetadataCustom( + entries, + DecommissionAttributeMetadata.TYPE, + DecommissionAttributeMetadata::new, + DecommissionAttributeMetadata::readDiffFrom + ); // Task Status (not Diffable) entries.add(new Entry(Task.Status.class, PersistentTasksNodeService.Status.NAME, PersistentTasksNodeService.Status::new)); return entries; @@ -284,6 +291,13 @@ public static List getNamedXWriteables() { WeightedRoutingMetadata::fromXContent ) ); + entries.add( + new NamedXContentRegistry.Entry( + Metadata.Custom.class, + new ParseField(DecommissionAttributeMetadata.TYPE), + DecommissionAttributeMetadata::fromXContent + ) + ); return entries; } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index ca1950df81d68..23bc9c37899e3 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -106,6 +106,7 @@ import java.util.stream.StreamSupport; import static org.opensearch.cluster.coordination.NoClusterManagerBlockService.NO_CLUSTER_MANAGER_BLOCK_ID; +import static org.opensearch.cluster.decommission.DecommissionService.nodeCommissioned; import static org.opensearch.gateway.ClusterStateUpdaters.hideStateIfNotRecovered; import static org.opensearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK; import static org.opensearch.monitor.StatusInfo.Status.UNHEALTHY; @@ -139,6 +140,7 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery private final Settings settings; private final boolean singleNodeDiscovery; + private volatile boolean localNodeCommissioned; private final ElectionStrategy electionStrategy; private final TransportService transportService; private final ClusterManagerService clusterManagerService; @@ -219,7 +221,8 @@ public Coordinator( this::joinLeaderInTerm, this.onJoinValidators, rerouteService, - nodeHealthService + nodeHealthService, + this::onNodeCommissionStatusChange ); this.persistedStateSupplier = persistedStateSupplier; this.noClusterManagerBlockService = new NoClusterManagerBlockService(settings, clusterSettings); @@ -282,6 +285,7 @@ public Coordinator( joinHelper::logLastFailedJoinAttempt ); this.nodeHealthService = nodeHealthService; + this.localNodeCommissioned = true; } private ClusterFormationState getClusterFormationState() { @@ -597,6 +601,9 @@ private void handleJoinRequest(JoinRequest joinRequest, JoinHelper.JoinCallback joinRequest.getSourceNode().getVersion(), stateForJoinValidation.getNodes().getMinNodeVersion() ); + // we are checking source node commission status here to reject any join request coming from a decommissioned node + // even before executing the join task to fail fast + JoinTaskExecutor.ensureNodeCommissioned(joinRequest.getSourceNode(), stateForJoinValidation.metadata()); } sendValidateJoinRequest(stateForJoinValidation, joinRequest, joinCallback); } else { @@ -1425,6 +1432,17 @@ protected void onFoundPeersUpdated() { } } + // package-visible for testing + synchronized void onNodeCommissionStatusChange(boolean localNodeCommissioned) { + this.localNodeCommissioned = localNodeCommissioned; + peerFinder.onNodeCommissionStatusChange(localNodeCommissioned); + } + + // package-visible for testing + boolean localNodeCommissioned() { + return localNodeCommissioned; + } + private void startElectionScheduler() { assert electionScheduler == null : electionScheduler; @@ -1451,6 +1469,14 @@ public void run() { return; } + // if either the localNodeCommissioned flag or the last accepted state thinks it should skip pre voting, we will + // acknowledge it + if (nodeCommissioned(lastAcceptedState.nodes().getLocalNode(), lastAcceptedState.metadata()) == false + || localNodeCommissioned == false) { + logger.debug("skip prevoting as local node is decommissioned"); + return; + } + if (prevotingRound != null) { prevotingRound.close(); } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java index 656e6d220720f..a66152b8016ee 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java @@ -42,6 +42,7 @@ import org.opensearch.cluster.ClusterStateTaskListener; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.coordination.Coordinator.Mode; +import org.opensearch.cluster.decommission.NodeDecommissionedException; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.RerouteService; @@ -57,6 +58,7 @@ import org.opensearch.monitor.StatusInfo; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; +import org.opensearch.transport.RemoteTransportException; import org.opensearch.transport.TransportChannel; import org.opensearch.transport.TransportException; import org.opensearch.transport.TransportRequest; @@ -78,6 +80,7 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiConsumer; +import java.util.function.Consumer; import java.util.function.Function; import java.util.function.LongSupplier; import java.util.function.Supplier; @@ -118,6 +121,7 @@ public class JoinHelper { private final AtomicReference lastFailedJoinAttempt = new AtomicReference<>(); private final Supplier joinTaskExecutorGenerator; + private final Consumer nodeCommissioned; JoinHelper( Settings settings, @@ -130,12 +134,14 @@ public class JoinHelper { Function joinLeaderInTerm, Collection> joinValidators, RerouteService rerouteService, - NodeHealthService nodeHealthService + NodeHealthService nodeHealthService, + Consumer nodeCommissioned ) { this.clusterManagerService = clusterManagerService; this.transportService = transportService; this.nodeHealthService = nodeHealthService; this.joinTimeout = JOIN_TIMEOUT_SETTING.get(settings); + this.nodeCommissioned = nodeCommissioned; this.joinTaskExecutorGenerator = () -> new JoinTaskExecutor(settings, allocationService, logger, rerouteService, transportService) { private final long term = currentTermSupplier.getAsLong(); @@ -342,6 +348,7 @@ public void handleResponse(Empty response) { pendingOutgoingJoins.remove(dedupKey); logger.debug("successfully joined {} with {}", destination, joinRequest); lastFailedJoinAttempt.set(null); + nodeCommissioned.accept(true); onCompletion.run(); } @@ -352,6 +359,13 @@ public void handleException(TransportException exp) { FailedJoinAttempt attempt = new FailedJoinAttempt(destination, joinRequest, exp); attempt.logNow(); lastFailedJoinAttempt.set(attempt); + if (exp instanceof RemoteTransportException && (exp.getCause() instanceof NodeDecommissionedException)) { + logger.info( + "local node is decommissioned [{}]. Will not be able to join the cluster", + exp.getCause().getMessage() + ); + nodeCommissioned.accept(false); + } onCompletion.run(); } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index 5afdb5b12db23..ac237db85ee5b 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -39,6 +39,7 @@ import org.opensearch.cluster.ClusterStateTaskExecutor; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.block.ClusterBlocks; +import org.opensearch.cluster.decommission.NodeDecommissionedException; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; @@ -60,6 +61,7 @@ import java.util.function.BiConsumer; import java.util.stream.Collectors; +import static org.opensearch.cluster.decommission.DecommissionService.nodeCommissioned; import static org.opensearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK; /** @@ -192,6 +194,9 @@ public ClusterTasksResult execute(ClusterState currentState, List jo // we do this validation quite late to prevent race conditions between nodes joining and importing dangling indices // we have to reject nodes that don't support all indices we have in this cluster ensureIndexCompatibility(node.getVersion(), currentState.getMetadata()); + // we have added the same check in handleJoinRequest method and adding it here as this method + // would guarantee that a decommissioned node would never be able to join the cluster and ensures correctness + ensureNodeCommissioned(node, currentState.metadata()); nodesBuilder.add(node); nodesChanged = true; minClusterNodeVersion = Version.min(minClusterNodeVersion, node.getVersion()); @@ -199,7 +204,7 @@ public ClusterTasksResult execute(ClusterState currentState, List jo if (node.isClusterManagerNode()) { joiniedNodeNameIds.put(node.getName(), node.getId()); } - } catch (IllegalArgumentException | IllegalStateException e) { + } catch (IllegalArgumentException | IllegalStateException | NodeDecommissionedException e) { results.failure(joinTask, e); continue; } @@ -358,6 +363,7 @@ public boolean runOnlyOnClusterManager() { /** * a task indicates that the current node should become master + * * @deprecated As of 2.0, because supporting inclusive language, replaced by {@link #newBecomeClusterManagerTask()} */ @Deprecated @@ -384,8 +390,9 @@ public static Task newFinishElectionTask() { * Ensures that all indices are compatible with the given node version. This will ensure that all indices in the given metadata * will not be created with a newer version of opensearch as well as that all indices are newer or equal to the minimum index * compatibility version. - * @see Version#minimumIndexCompatibilityVersion() + * * @throws IllegalStateException if any index is incompatible with the given version + * @see Version#minimumIndexCompatibilityVersion() */ public static void ensureIndexCompatibility(final Version nodeVersion, Metadata metadata) { Version supportedIndexVersion = nodeVersion.minimumIndexCompatibilityVersion(); @@ -415,14 +422,18 @@ public static void ensureIndexCompatibility(final Version nodeVersion, Metadata } } - /** ensures that the joining node has a version that's compatible with all current nodes*/ + /** + * ensures that the joining node has a version that's compatible with all current nodes + */ public static void ensureNodesCompatibility(final Version joiningNodeVersion, DiscoveryNodes currentNodes) { final Version minNodeVersion = currentNodes.getMinNodeVersion(); final Version maxNodeVersion = currentNodes.getMaxNodeVersion(); ensureNodesCompatibility(joiningNodeVersion, minNodeVersion, maxNodeVersion); } - /** ensures that the joining node has a version that's compatible with a given version range */ + /** + * ensures that the joining node has a version that's compatible with a given version range + */ public static void ensureNodesCompatibility(Version joiningNodeVersion, Version minClusterNodeVersion, Version maxClusterNodeVersion) { assert minClusterNodeVersion.onOrBefore(maxClusterNodeVersion) : minClusterNodeVersion + " > " + maxClusterNodeVersion; if (joiningNodeVersion.isCompatible(maxClusterNodeVersion) == false) { @@ -466,6 +477,17 @@ public static void ensureMajorVersionBarrier(Version joiningNodeVersion, Version } } + public static void ensureNodeCommissioned(DiscoveryNode node, Metadata metadata) { + if (nodeCommissioned(node, metadata) == false) { + throw new NodeDecommissionedException( + "node [{}] has decommissioned attribute [{}] with current status of decommissioning [{}]", + node.toString(), + metadata.decommissionAttributeMetadata().decommissionAttribute().toString(), + metadata.decommissionAttributeMetadata().status().status() + ); + } + } + public static Collection> addBuiltInJoinValidators( Collection> onJoinValidators ) { @@ -473,6 +495,7 @@ public static Collection> addBuiltInJoin validators.add((node, state) -> { ensureNodesCompatibility(node.getVersion(), state.getNodes()); ensureIndexCompatibility(node.getVersion(), state.getMetadata()); + ensureNodeCommissioned(node, state.getMetadata()); }); validators.addAll(onJoinValidators); return Collections.unmodifiableCollection(validators); diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java new file mode 100644 index 0000000000000..bf2487a1a0e18 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java @@ -0,0 +1,92 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.io.stream.Writeable; + +import java.io.IOException; +import java.util.Objects; + +/** + * {@link DecommissionAttribute} encapsulates information about decommissioned node attribute like attribute name, attribute value. + * + * @opensearch.internal + */ +public final class DecommissionAttribute implements Writeable { + private final String attributeName; + private final String attributeValue; + + /** + * Constructs new decommission attribute name value pair + * + * @param attributeName attribute name + * @param attributeValue attribute value + */ + public DecommissionAttribute(String attributeName, String attributeValue) { + this.attributeName = attributeName; + this.attributeValue = attributeValue; + } + + /** + * Returns attribute name + * + * @return attributeName + */ + public String attributeName() { + return this.attributeName; + } + + /** + * Returns attribute value + * + * @return attributeValue + */ + public String attributeValue() { + return this.attributeValue; + } + + public DecommissionAttribute(StreamInput in) throws IOException { + attributeName = in.readString(); + attributeValue = in.readString(); + } + + /** + * Writes decommission attribute name value to stream output + * + * @param out stream output + */ + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(attributeName); + out.writeString(attributeValue); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + DecommissionAttribute that = (DecommissionAttribute) o; + + if (!attributeName.equals(that.attributeName)) return false; + return attributeValue.equals(that.attributeValue); + } + + @Override + public int hashCode() { + return Objects.hash(attributeName, attributeValue); + } + + @Override + public String toString() { + return "DecommissionAttribute{" + "attributeName='" + attributeName + '\'' + ", attributeValue='" + attributeValue + '\'' + '}'; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java new file mode 100644 index 0000000000000..395d733b8f1b1 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java @@ -0,0 +1,262 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.opensearch.OpenSearchParseException; +import org.opensearch.Version; +import org.opensearch.cluster.AbstractNamedDiffable; +import org.opensearch.cluster.NamedDiff; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.metadata.Metadata.Custom; +import org.opensearch.common.Strings; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.xcontent.ToXContent; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.util.EnumSet; +import java.util.Objects; +import java.util.Set; + +/** + * Contains metadata about decommission attribute + * + * @opensearch.internal + */ +public class DecommissionAttributeMetadata extends AbstractNamedDiffable implements Custom { + + public static final String TYPE = "decommissionedAttribute"; + + private final DecommissionAttribute decommissionAttribute; + private DecommissionStatus status; + public static final String attributeType = "awareness"; + + /** + * Constructs new decommission attribute metadata with given status + * + * @param decommissionAttribute attribute details + * @param status current status of the attribute decommission + */ + public DecommissionAttributeMetadata(DecommissionAttribute decommissionAttribute, DecommissionStatus status) { + this.decommissionAttribute = decommissionAttribute; + this.status = status; + } + + /** + * Constructs new decommission attribute metadata with status as {@link DecommissionStatus#INIT} + * + * @param decommissionAttribute attribute details + */ + public DecommissionAttributeMetadata(DecommissionAttribute decommissionAttribute) { + this(decommissionAttribute, DecommissionStatus.INIT); + } + + /** + * Returns the current decommissioned attribute + * + * @return decommissioned attributes + */ + public DecommissionAttribute decommissionAttribute() { + return this.decommissionAttribute; + } + + /** + * Returns the current status of the attribute decommission + * + * @return attribute type + */ + public DecommissionStatus status() { + return this.status; + } + + /** + * Returns instance of the metadata with updated status + * @param newStatus status to be updated with + */ + // synchronized is strictly speaking not needed (this is called by a single thread), but just to be safe + public synchronized void validateNewStatus(DecommissionStatus newStatus) { + // if the current status is the expected status already or new status is FAILED, we let the check pass + if (newStatus.equals(status) || newStatus.equals(DecommissionStatus.FAILED)) { + return; + } + // We don't expect that INIT will be new status, as it is registered only when starting the decommission action + switch (newStatus) { + case DRAINING: + validateStatus(Set.of(DecommissionStatus.INIT), newStatus); + break; + case IN_PROGRESS: + validateStatus(Set.of(DecommissionStatus.DRAINING, DecommissionStatus.INIT), newStatus); + break; + case SUCCESSFUL: + validateStatus(Set.of(DecommissionStatus.IN_PROGRESS), newStatus); + break; + default: + throw new IllegalArgumentException( + "illegal decommission status [" + newStatus.status() + "] requested for updating metadata" + ); + } + } + + private void validateStatus(Set expectedStatuses, DecommissionStatus next) { + if (expectedStatuses.contains(status) == false) { + assert false : "can't move decommission status to [" + + next + + "]. current status: [" + + status + + "] (allowed statuses [" + + expectedStatuses + + "])"; + throw new IllegalStateException( + "can't move decommission status to [" + next + "]. current status: [" + status + "] (expected [" + expectedStatuses + "])" + ); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + DecommissionAttributeMetadata that = (DecommissionAttributeMetadata) o; + + if (!status.equals(that.status)) return false; + return decommissionAttribute.equals(that.decommissionAttribute); + } + + @Override + public int hashCode() { + return Objects.hash(attributeType, decommissionAttribute, status); + } + + /** + * {@inheritDoc} + */ + @Override + public String getWriteableName() { + return TYPE; + } + + @Override + public Version getMinimalSupportedVersion() { + return Version.V_2_4_0; + } + + public DecommissionAttributeMetadata(StreamInput in) throws IOException { + this.decommissionAttribute = new DecommissionAttribute(in); + this.status = DecommissionStatus.fromString(in.readString()); + } + + public static NamedDiff readDiffFrom(StreamInput in) throws IOException { + return readDiffFrom(Custom.class, TYPE, in); + } + + /** + * {@inheritDoc} + */ + @Override + public void writeTo(StreamOutput out) throws IOException { + decommissionAttribute.writeTo(out); + out.writeString(status.status()); + } + + public static DecommissionAttributeMetadata fromXContent(XContentParser parser) throws IOException { + XContentParser.Token token; + DecommissionAttribute decommissionAttribute = null; + DecommissionStatus status = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + String currentFieldName = parser.currentName(); + if (attributeType.equals(currentFieldName)) { + if (parser.nextToken() != XContentParser.Token.START_OBJECT) { + throw new OpenSearchParseException( + "failed to parse decommission attribute type [{}], expected object", + attributeType + ); + } + token = parser.nextToken(); + if (token != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + String fieldName = parser.currentName(); + String value; + token = parser.nextToken(); + if (token == XContentParser.Token.VALUE_STRING) { + value = parser.text(); + } else { + throw new OpenSearchParseException( + "failed to parse attribute [{}], expected string for attribute value", + fieldName + ); + } + decommissionAttribute = new DecommissionAttribute(fieldName, value); + parser.nextToken(); + } else { + throw new OpenSearchParseException("failed to parse attribute type [{}], unexpected type", attributeType); + } + } else { + throw new OpenSearchParseException("failed to parse attribute type [{}]", attributeType); + } + } else if ("status".equals(currentFieldName)) { + if (parser.nextToken() != XContentParser.Token.VALUE_STRING) { + throw new OpenSearchParseException( + "failed to parse status of decommissioning, expected string but found unknown type" + ); + } + status = DecommissionStatus.fromString(parser.text()); + } else { + throw new OpenSearchParseException( + "unknown field found [{}], failed to parse the decommission attribute", + currentFieldName + ); + } + } + } + return new DecommissionAttributeMetadata(decommissionAttribute, status); + } + + /** + * {@inheritDoc} + */ + @Override + public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { + toXContent(decommissionAttribute, status, attributeType, builder, params); + return builder; + } + + @Override + public EnumSet context() { + return Metadata.API_AND_GATEWAY; + } + + /** + * @param decommissionAttribute decommission attribute + * @param status decommission status + * @param attributeType attribute type + * @param builder XContent builder + * @param params serialization parameters + */ + public static void toXContent( + DecommissionAttribute decommissionAttribute, + DecommissionStatus status, + String attributeType, + XContentBuilder builder, + ToXContent.Params params + ) throws IOException { + builder.startObject(attributeType); + builder.field(decommissionAttribute.attributeName(), decommissionAttribute.attributeValue()); + builder.endObject(); + builder.field("status", status.status()); + } + + @Override + public String toString() { + return Strings.toString(this); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java new file mode 100644 index 0000000000000..ffb20a05b3ef7 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -0,0 +1,338 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.OpenSearchTimeoutException; +import org.opensearch.action.ActionListener; +import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction; +import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest; +import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsResponse; +import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsAction; +import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsRequest; +import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsResponse; +import org.opensearch.action.admin.cluster.node.stats.NodeStats; +import org.opensearch.action.admin.cluster.node.stats.NodesStatsAction; +import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest; +import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; +import org.opensearch.cluster.ClusterStateTaskConfig; +import org.opensearch.cluster.ClusterStateTaskListener; +import org.opensearch.cluster.ClusterStateUpdateTask; +import org.opensearch.cluster.coordination.NodeRemovalClusterStateTaskExecutor; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.Priority; +import org.opensearch.common.Strings; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.http.HttpStats; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportException; +import org.opensearch.transport.TransportResponseHandler; +import org.opensearch.transport.TransportService; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +/** + * Helper controller class to remove list of nodes from the cluster and update status + * + * @opensearch.internal + */ + +public class DecommissionController { + + private static final Logger logger = LogManager.getLogger(DecommissionController.class); + + private final NodeRemovalClusterStateTaskExecutor nodeRemovalExecutor; + private final ClusterService clusterService; + private final TransportService transportService; + private final ThreadPool threadPool; + + DecommissionController( + ClusterService clusterService, + TransportService transportService, + AllocationService allocationService, + ThreadPool threadPool + ) { + this.clusterService = clusterService; + this.transportService = transportService; + this.nodeRemovalExecutor = new NodeRemovalClusterStateTaskExecutor(allocationService, logger); + this.threadPool = threadPool; + } + + /** + * Transport call to add nodes to voting config exclusion + * + * @param nodes set of nodes Ids to be added to voting config exclusion list + * @param listener callback for response or failure + */ + public void excludeDecommissionedNodesFromVotingConfig(Set nodes, ActionListener listener) { + transportService.sendRequest( + transportService.getLocalNode(), + AddVotingConfigExclusionsAction.NAME, + new AddVotingConfigExclusionsRequest( + Strings.EMPTY_ARRAY, + nodes.toArray(String[]::new), + Strings.EMPTY_ARRAY, + TimeValue.timeValueSeconds(120) // giving a larger timeout of 120 sec as cluster might already be in stress when + // decommission is triggered + ), + new TransportResponseHandler() { + @Override + public void handleResponse(AddVotingConfigExclusionsResponse response) { + listener.onResponse(null); + } + + @Override + public void handleException(TransportException exp) { + listener.onFailure(exp); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + + @Override + public AddVotingConfigExclusionsResponse read(StreamInput in) throws IOException { + return new AddVotingConfigExclusionsResponse(in); + } + } + ); + } + + /** + * Transport call to clear voting config exclusion + * + * @param listener callback for response or failure + */ + public void clearVotingConfigExclusion(ActionListener listener, boolean waitForRemoval) { + final ClearVotingConfigExclusionsRequest clearVotingConfigExclusionsRequest = new ClearVotingConfigExclusionsRequest(); + clearVotingConfigExclusionsRequest.setWaitForRemoval(waitForRemoval); + transportService.sendRequest( + transportService.getLocalNode(), + ClearVotingConfigExclusionsAction.NAME, + clearVotingConfigExclusionsRequest, + new TransportResponseHandler() { + @Override + public void handleResponse(ClearVotingConfigExclusionsResponse response) { + listener.onResponse(null); + } + + @Override + public void handleException(TransportException exp) { + listener.onFailure(exp); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + + @Override + public ClearVotingConfigExclusionsResponse read(StreamInput in) throws IOException { + return new ClearVotingConfigExclusionsResponse(in); + } + } + ); + } + + /** + * This method triggers batch of tasks for nodes to be decommissioned using executor {@link NodeRemovalClusterStateTaskExecutor} + * Once the tasks are submitted, it waits for an expected cluster state to guarantee + * that the expected decommissioned nodes are removed from the cluster + * + * @param nodesToBeDecommissioned set of the node to be decommissioned + * @param reason reason of removal + * @param timeout timeout for the request + * @param nodesRemovedListener callback for the success or failure + */ + public synchronized void removeDecommissionedNodes( + Set nodesToBeDecommissioned, + String reason, + TimeValue timeout, + ActionListener nodesRemovedListener + ) { + final Map nodesDecommissionTasks = new LinkedHashMap<>( + nodesToBeDecommissioned.size() + ); + nodesToBeDecommissioned.forEach(discoveryNode -> { + final NodeRemovalClusterStateTaskExecutor.Task task = new NodeRemovalClusterStateTaskExecutor.Task(discoveryNode, reason); + nodesDecommissionTasks.put(task, nodeRemovalExecutor); + }); + + logger.info("submitting state update task to remove [{}] nodes due to decommissioning", nodesToBeDecommissioned.toString()); + clusterService.submitStateUpdateTasks( + "node-decommissioned", + nodesDecommissionTasks, + ClusterStateTaskConfig.build(Priority.URGENT), + nodeRemovalExecutor + ); + + Predicate allDecommissionedNodesRemovedPredicate = clusterState -> { + Set intersection = Arrays.stream(clusterState.nodes().getNodes().values().toArray(DiscoveryNode.class)) + .collect(Collectors.toSet()); + intersection.retainAll(nodesToBeDecommissioned); + return intersection.size() == 0; + }; + + final ClusterStateObserver observer = new ClusterStateObserver(clusterService, timeout, logger, threadPool.getThreadContext()); + + final ClusterStateObserver.Listener removalListener = new ClusterStateObserver.Listener() { + @Override + public void onNewClusterState(ClusterState state) { + logger.info("successfully removed all decommissioned nodes [{}] from the cluster", nodesToBeDecommissioned.toString()); + nodesRemovedListener.onResponse(null); + } + + @Override + public void onClusterServiceClose() { + logger.warn( + "cluster service closed while waiting for removal of decommissioned nodes [{}]", + nodesToBeDecommissioned.toString() + ); + } + + @Override + public void onTimeout(TimeValue timeout) { + logger.info( + "timed out [{}] while waiting for removal of decommissioned nodes [{}]", + timeout.toString(), + nodesToBeDecommissioned.toString() + ); + nodesRemovedListener.onFailure( + new OpenSearchTimeoutException( + "timed out [{}] while waiting for removal of decommissioned nodes [{}]", + timeout.toString(), + nodesToBeDecommissioned.toString() + ) + ); + } + }; + + if (allDecommissionedNodesRemovedPredicate.test(clusterService.getClusterApplierService().state())) { + removalListener.onNewClusterState(clusterService.getClusterApplierService().state()); + } else { + observer.waitForNextChange(removalListener, allDecommissionedNodesRemovedPredicate); + } + } + + /** + * This method updates the status in the currently registered metadata. + * + * @param decommissionStatus status to update decommission metadata with + * @param listener listener for response and failure + */ + public void updateMetadataWithDecommissionStatus(DecommissionStatus decommissionStatus, ActionListener listener) { + clusterService.submitStateUpdateTask("update-decommission-status", new ClusterStateUpdateTask(Priority.URGENT) { + @Override + public ClusterState execute(ClusterState currentState) { + DecommissionAttributeMetadata decommissionAttributeMetadata = currentState.metadata().decommissionAttributeMetadata(); + assert decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute() != null; + logger.info( + "attempting to update current decommission status [{}] with expected status [{}]", + decommissionAttributeMetadata.status(), + decommissionStatus + ); + // validateNewStatus can throw IllegalStateException if the sequence of update is not valid + decommissionAttributeMetadata.validateNewStatus(decommissionStatus); + decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttributeMetadata.decommissionAttribute(), + decommissionStatus + ); + return ClusterState.builder(currentState) + .metadata(Metadata.builder(currentState.metadata()).decommissionAttributeMetadata(decommissionAttributeMetadata)) + .build(); + } + + @Override + public void onFailure(String source, Exception e) { + listener.onFailure(e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().decommissionAttributeMetadata(); + assert decommissionAttributeMetadata != null; + assert decommissionAttributeMetadata.status().equals(decommissionStatus); + listener.onResponse(decommissionAttributeMetadata.status()); + } + }); + } + + private void logActiveConnections(NodesStatsResponse nodesStatsResponse) { + if (nodesStatsResponse == null || nodesStatsResponse.getNodes() == null) { + logger.info("Node stats response received is null/empty."); + return; + } + + Map nodeActiveConnectionMap = new HashMap<>(); + List responseNodes = nodesStatsResponse.getNodes(); + for (int i = 0; i < responseNodes.size(); i++) { + HttpStats httpStats = responseNodes.get(i).getHttp(); + DiscoveryNode node = responseNodes.get(i).getNode(); + nodeActiveConnectionMap.put(node.getId(), httpStats.getServerOpen()); + } + logger.info("Decommissioning node with connections : [{}]", nodeActiveConnectionMap); + } + + void getActiveRequestCountOnDecommissionedNodes(Set decommissionedNodes) { + if (decommissionedNodes == null || decommissionedNodes.isEmpty()) { + return; + } + String[] nodes = decommissionedNodes.stream().map(DiscoveryNode::getId).toArray(String[]::new); + if (nodes.length == 0) { + return; + } + + final NodesStatsRequest nodesStatsRequest = new NodesStatsRequest(nodes); + nodesStatsRequest.clear(); + nodesStatsRequest.addMetric(NodesStatsRequest.Metric.HTTP.metricName()); + + transportService.sendRequest( + transportService.getLocalNode(), + NodesStatsAction.NAME, + nodesStatsRequest, + new TransportResponseHandler() { + @Override + public void handleResponse(NodesStatsResponse response) { + logActiveConnections(response); + } + + @Override + public void handleException(TransportException exp) { + logger.error("Failure occurred while dumping connection for decommission nodes - ", exp.unwrapCause()); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + + @Override + public NodesStatsResponse read(StreamInput in) throws IOException { + return new NodesStatsResponse(in); + } + } + ); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java new file mode 100644 index 0000000000000..85030a1e902db --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -0,0 +1,656 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.OpenSearchTimeoutException; +import org.opensearch.action.ActionListener; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; +import org.opensearch.cluster.ClusterStateUpdateTask; +import org.opensearch.cluster.NotClusterManagerException; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.metadata.WeightedRoutingMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.WeightedRouting; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.Priority; +import org.opensearch.common.inject.Inject; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +import static org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING; +import static org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING; + +/** + * Service responsible for entire lifecycle of decommissioning and recommissioning an awareness attribute. + *

+ * Whenever a cluster manager initiates operation to decommission an awareness attribute, + * the service makes the best attempt to perform the following task - + *

    + *
  • Initiates nodes decommissioning by adding custom metadata with the attribute and state as {@link DecommissionStatus#INIT}
  • + *
  • Remove to-be-decommissioned cluster-manager eligible nodes from voting config and wait for its abdication if it is active leader
  • + *
  • Triggers weigh away for nodes having given awareness attribute to drain.
  • + *
  • Once weighed away, the service triggers nodes decommission. This marks the decommission status as {@link DecommissionStatus#IN_PROGRESS}
  • + *
  • Once the decommission is successful, the service clears the voting config and marks the status as {@link DecommissionStatus#SUCCESSFUL}
  • + *
  • If service fails at any step, it makes best attempt to mark the status as {@link DecommissionStatus#FAILED} and to clear voting config exclusion
  • + *
+ * + * @opensearch.internal + */ +public class DecommissionService { + + private static final Logger logger = LogManager.getLogger(DecommissionService.class); + + private final ClusterService clusterService; + private final TransportService transportService; + private final ThreadPool threadPool; + private final DecommissionController decommissionController; + private volatile List awarenessAttributes; + private volatile Map> forcedAwarenessAttributes; + + @Inject + public DecommissionService( + Settings settings, + ClusterSettings clusterSettings, + ClusterService clusterService, + TransportService transportService, + ThreadPool threadPool, + AllocationService allocationService + ) { + this.clusterService = clusterService; + this.transportService = transportService; + this.threadPool = threadPool; + this.decommissionController = new DecommissionController(clusterService, transportService, allocationService, threadPool); + this.awarenessAttributes = CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.get(settings); + clusterSettings.addSettingsUpdateConsumer(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING, this::setAwarenessAttributes); + + setForcedAwarenessAttributes(CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.get(settings)); + clusterSettings.addSettingsUpdateConsumer( + CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING, + this::setForcedAwarenessAttributes + ); + } + + private void setAwarenessAttributes(List awarenessAttributes) { + this.awarenessAttributes = awarenessAttributes; + } + + private void setForcedAwarenessAttributes(Settings forceSettings) { + Map> forcedAwarenessAttributes = new HashMap<>(); + Map forceGroups = forceSettings.getAsGroups(); + for (Map.Entry entry : forceGroups.entrySet()) { + List aValues = entry.getValue().getAsList("values"); + if (aValues.size() > 0) { + forcedAwarenessAttributes.put(entry.getKey(), aValues); + } + } + this.forcedAwarenessAttributes = forcedAwarenessAttributes; + } + + /** + * Starts the new decommission request and registers the metadata with status as {@link DecommissionStatus#INIT} + * Once the status is updated, it tries to exclude to-be-decommissioned cluster manager eligible nodes from Voting Configuration + * + * @param decommissionRequest decommission request Object + * @param listener register decommission listener + */ + public void startDecommissionAction( + final DecommissionRequest decommissionRequest, + final ActionListener listener + ) { + final DecommissionAttribute decommissionAttribute = decommissionRequest.getDecommissionAttribute(); + // register the metadata with status as INIT as first step + clusterService.submitStateUpdateTask("decommission [" + decommissionAttribute + "]", new ClusterStateUpdateTask(Priority.URGENT) { + @Override + public ClusterState execute(ClusterState currentState) { + // validates if correct awareness attributes and forced awareness attribute set to the cluster before starting action + validateAwarenessAttribute(decommissionAttribute, awarenessAttributes, forcedAwarenessAttributes); + DecommissionAttributeMetadata decommissionAttributeMetadata = currentState.metadata().decommissionAttributeMetadata(); + // check that request is eligible to proceed + ensureEligibleRequest(decommissionAttributeMetadata, decommissionAttribute); + // ensure attribute is weighed away + ensureToBeDecommissionedAttributeWeighedAway(currentState, decommissionAttribute); + decommissionAttributeMetadata = new DecommissionAttributeMetadata(decommissionAttribute); + logger.info("registering decommission metadata [{}] to execute action", decommissionAttributeMetadata.toString()); + return ClusterState.builder(currentState) + .metadata(Metadata.builder(currentState.metadata()).decommissionAttributeMetadata(decommissionAttributeMetadata)) + .build(); + } + + @Override + public void onFailure(String source, Exception e) { + logger.error( + () -> new ParameterizedMessage( + "failed to start decommission action for attribute [{}]", + decommissionAttribute.toString() + ), + e + ); + listener.onFailure(e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().decommissionAttributeMetadata(); + assert decommissionAttribute.equals(decommissionAttributeMetadata.decommissionAttribute()); + logger.info( + "registered decommission metadata for attribute [{}] with status [{}]", + decommissionAttributeMetadata.decommissionAttribute(), + decommissionAttributeMetadata.status() + ); + decommissionClusterManagerNodes(decommissionRequest, listener); + } + }); + } + + private synchronized void decommissionClusterManagerNodes( + final DecommissionRequest decommissionRequest, + ActionListener listener + ) { + final DecommissionAttribute decommissionAttribute = decommissionRequest.getDecommissionAttribute(); + ClusterState state = clusterService.getClusterApplierService().state(); + // since here metadata is already registered with INIT, we can guarantee that no new node with decommission attribute can further + // join the cluster + // and hence in further request lifecycle we are sure that no new to-be-decommission leader will join the cluster + Set clusterManagerNodesToBeDecommissioned = filterNodesWithDecommissionAttribute(state, decommissionAttribute, true); + logger.info( + "resolved cluster manager eligible nodes [{}] that should be removed from Voting Configuration", + clusterManagerNodesToBeDecommissioned.toString() + ); + + // remove all 'to-be-decommissioned' cluster manager eligible nodes from voting config + Set nodeIdsToBeExcluded = clusterManagerNodesToBeDecommissioned.stream() + .map(DiscoveryNode::getId) + .collect(Collectors.toSet()); + + final Predicate allNodesRemovedAndAbdicated = clusterState -> { + final Set votingConfigNodeIds = clusterState.getLastCommittedConfiguration().getNodeIds(); + return nodeIdsToBeExcluded.stream().noneMatch(votingConfigNodeIds::contains) + && nodeIdsToBeExcluded.contains(clusterState.nodes().getClusterManagerNodeId()) == false + && clusterState.nodes().getClusterManagerNodeId() != null; + }; + + ActionListener exclusionListener = new ActionListener() { + @Override + public void onResponse(Void unused) { + if (clusterService.getClusterApplierService().state().nodes().isLocalNodeElectedClusterManager()) { + if (nodeHasDecommissionedAttribute(clusterService.localNode(), decommissionAttribute)) { + // this is an unexpected state, as after exclusion of nodes having decommission attribute, + // this local node shouldn't have had the decommission attribute. Will send the failure response to the user + String errorMsg = + "unexpected state encountered [local node is to-be-decommissioned leader] while executing decommission request"; + logger.error(errorMsg); + // will go ahead and clear the voting config and mark the status as false + clearVotingConfigExclusionAndUpdateStatus(false, false); + // we can send the failure response to the user here + listener.onFailure(new IllegalStateException(errorMsg)); + } else { + logger.info("will attempt to fail decommissioned nodes as local node is eligible to process the request"); + // we are good here to send the response now as the request is processed by an eligible active leader + // and to-be-decommissioned cluster manager is no more part of Voting Configuration and no more to-be-decommission + // nodes can be part of Voting Config + listener.onResponse(new DecommissionResponse(true)); + drainNodesWithDecommissionedAttribute(decommissionRequest); + } + } else { + // explicitly calling listener.onFailure with NotClusterManagerException as the local node is not the cluster manager + // this will ensures that request is retried until cluster manager times out + logger.info( + "local node is not eligible to process the request, " + + "throwing NotClusterManagerException to attempt a retry on an eligible node" + ); + listener.onFailure( + new NotClusterManagerException( + "node [" + + transportService.getLocalNode().toString() + + "] not eligible to execute decommission request. Will retry until timeout." + ) + ); + } + } + + @Override + public void onFailure(Exception e) { + listener.onFailure(e); + // attempting to mark the status as FAILED + clearVotingConfigExclusionAndUpdateStatus(false, false); + } + }; + + if (allNodesRemovedAndAbdicated.test(state)) { + exclusionListener.onResponse(null); + } else { + logger.debug("sending transport request to remove nodes [{}] from voting config", nodeIdsToBeExcluded.toString()); + // send a transport request to exclude to-be-decommissioned cluster manager eligible nodes from voting config + decommissionController.excludeDecommissionedNodesFromVotingConfig(nodeIdsToBeExcluded, new ActionListener() { + @Override + public void onResponse(Void unused) { + logger.info( + "successfully removed decommissioned cluster manager eligible nodes [{}] from voting config ", + clusterManagerNodesToBeDecommissioned.toString() + ); + final ClusterStateObserver abdicationObserver = new ClusterStateObserver( + clusterService, + TimeValue.timeValueSeconds(60L), + logger, + threadPool.getThreadContext() + ); + final ClusterStateObserver.Listener abdicationListener = new ClusterStateObserver.Listener() { + @Override + public void onNewClusterState(ClusterState state) { + logger.debug("to-be-decommissioned node is no more the active leader"); + exclusionListener.onResponse(null); + } + + @Override + public void onClusterServiceClose() { + String errorMsg = "cluster service closed while waiting for abdication of to-be-decommissioned leader"; + logger.warn(errorMsg); + listener.onFailure(new DecommissioningFailedException(decommissionAttribute, errorMsg)); + } + + @Override + public void onTimeout(TimeValue timeout) { + logger.info("timed out while waiting for abdication of to-be-decommissioned leader"); + clearVotingConfigExclusionAndUpdateStatus(false, false); + listener.onFailure( + new OpenSearchTimeoutException( + "timed out [{}] while waiting for abdication of to-be-decommissioned leader", + timeout.toString() + ) + ); + } + }; + // In case the cluster state is already processed even before this code is executed + // therefore testing first before attaching the listener + ClusterState currentState = clusterService.getClusterApplierService().state(); + if (allNodesRemovedAndAbdicated.test(currentState)) { + abdicationListener.onNewClusterState(currentState); + } else { + logger.debug("waiting to abdicate to-be-decommissioned leader"); + abdicationObserver.waitForNextChange(abdicationListener, allNodesRemovedAndAbdicated); + } + } + + @Override + public void onFailure(Exception e) { + logger.error( + new ParameterizedMessage( + "failure in removing to-be-decommissioned cluster manager eligible nodes [{}] from voting config", + nodeIdsToBeExcluded.toString() + ), + e + ); + exclusionListener.onFailure(e); + } + }); + } + } + + void drainNodesWithDecommissionedAttribute(DecommissionRequest decommissionRequest) { + ClusterState state = clusterService.getClusterApplierService().state(); + Set decommissionedNodes = filterNodesWithDecommissionAttribute( + state, + decommissionRequest.getDecommissionAttribute(), + false + ); + + if (decommissionRequest.isNoDelay()) { + // Call to fail the decommission nodes + failDecommissionedNodes(decommissionedNodes, decommissionRequest.getDecommissionAttribute()); + } else { + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.DRAINING, new ActionListener<>() { + @Override + public void onResponse(DecommissionStatus status) { + logger.info("updated the decommission status to [{}]", status); + // set the weights + scheduleNodesDecommissionOnTimeout(decommissionedNodes, decommissionRequest.getDelayTimeout()); + } + + @Override + public void onFailure(Exception e) { + logger.error( + () -> new ParameterizedMessage( + "failed to update decommission status for attribute [{}] to [{}]", + decommissionRequest.getDecommissionAttribute().toString(), + DecommissionStatus.DRAINING + ), + e + ); + // since we are not able to update the status, we will clear the voting config exclusion we have set earlier + clearVotingConfigExclusionAndUpdateStatus(false, false); + } + }); + } + } + + void scheduleNodesDecommissionOnTimeout(Set decommissionedNodes, TimeValue timeoutForNodeDraining) { + ClusterState state = clusterService.getClusterApplierService().state(); + DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().decommissionAttributeMetadata(); + if (decommissionAttributeMetadata == null) { + return; + } + assert decommissionAttributeMetadata.status().equals(DecommissionStatus.DRAINING) + : "Unexpected status encountered while decommissioning nodes."; + + // This method ensures no matter what, we always exit from this function after clearing the voting config exclusion + DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); + + // Wait for timeout to happen. Log the active connection before decommissioning of nodes. + transportService.getThreadPool().schedule(() -> { + // Log active connections. + decommissionController.getActiveRequestCountOnDecommissionedNodes(decommissionedNodes); + // Call to fail the decommission nodes + failDecommissionedNodes(decommissionedNodes, decommissionAttribute); + }, timeoutForNodeDraining, ThreadPool.Names.GENERIC); + } + + private void failDecommissionedNodes(Set decommissionedNodes, DecommissionAttribute decommissionAttribute) { + + // Weighing away is complete. We have allowed the nodes to be drained. Let's move decommission status to IN_PROGRESS. + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.IN_PROGRESS, new ActionListener<>() { + @Override + public void onResponse(DecommissionStatus status) { + logger.info("updated the decommission status to [{}]", status); + // execute nodes decommissioning + decommissionController.removeDecommissionedNodes( + decommissionedNodes, + "nodes-decommissioned", + TimeValue.timeValueSeconds(120L), + new ActionListener() { + @Override + public void onResponse(Void unused) { + clearVotingConfigExclusionAndUpdateStatus(true, true); + } + + @Override + public void onFailure(Exception e) { + clearVotingConfigExclusionAndUpdateStatus(false, false); + } + } + ); + } + + @Override + public void onFailure(Exception e) { + logger.error( + () -> new ParameterizedMessage( + "failed to update decommission status for attribute [{}] to [{}]", + decommissionAttribute.toString(), + DecommissionStatus.IN_PROGRESS + ), + e + ); + // since we are not able to update the status, we will clear the voting config exclusion we have set earlier + clearVotingConfigExclusionAndUpdateStatus(false, false); + } + }); + } + + private void clearVotingConfigExclusionAndUpdateStatus(boolean decommissionSuccessful, boolean waitForRemoval) { + decommissionController.clearVotingConfigExclusion(new ActionListener() { + @Override + public void onResponse(Void unused) { + logger.info( + "successfully cleared voting config exclusion after completing decommission action, proceeding to update metadata" + ); + DecommissionStatus updateStatusWith = decommissionSuccessful ? DecommissionStatus.SUCCESSFUL : DecommissionStatus.FAILED; + decommissionController.updateMetadataWithDecommissionStatus(updateStatusWith, statusUpdateListener()); + } + + @Override + public void onFailure(Exception e) { + logger.debug( + new ParameterizedMessage("failure in clearing voting config exclusion after processing decommission request"), + e + ); + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.FAILED, statusUpdateListener()); + } + }, waitForRemoval); + } + + private Set filterNodesWithDecommissionAttribute( + ClusterState clusterState, + DecommissionAttribute decommissionAttribute, + boolean onlyClusterManagerNodes + ) { + Set nodesWithDecommissionAttribute = new HashSet<>(); + Iterator nodesIter = onlyClusterManagerNodes + ? clusterState.nodes().getClusterManagerNodes().valuesIt() + : clusterState.nodes().getNodes().valuesIt(); + + while (nodesIter.hasNext()) { + final DiscoveryNode node = nodesIter.next(); + if (nodeHasDecommissionedAttribute(node, decommissionAttribute)) { + nodesWithDecommissionAttribute.add(node); + } + } + return nodesWithDecommissionAttribute; + } + + private static void validateAwarenessAttribute( + final DecommissionAttribute decommissionAttribute, + List awarenessAttributes, + Map> forcedAwarenessAttributes + ) { + String msg = null; + if (awarenessAttributes == null) { + msg = "awareness attribute not set to the cluster."; + } else if (forcedAwarenessAttributes == null) { + msg = "forced awareness attribute not set to the cluster."; + } else if (awarenessAttributes.contains(decommissionAttribute.attributeName()) == false) { + msg = "invalid awareness attribute requested for decommissioning"; + } else if (forcedAwarenessAttributes.containsKey(decommissionAttribute.attributeName()) == false) { + msg = "forced awareness attribute [" + forcedAwarenessAttributes.toString() + "] doesn't have the decommissioning attribute"; + } else if (forcedAwarenessAttributes.get(decommissionAttribute.attributeName()) + .contains(decommissionAttribute.attributeValue()) == false) { + msg = "invalid awareness attribute value requested for decommissioning. Set forced awareness values before to decommission"; + } + + if (msg != null) { + throw new DecommissioningFailedException(decommissionAttribute, msg); + } + } + + private static void ensureToBeDecommissionedAttributeWeighedAway(ClusterState state, DecommissionAttribute decommissionAttribute) { + WeightedRoutingMetadata weightedRoutingMetadata = state.metadata().weightedRoutingMetadata(); + if (weightedRoutingMetadata == null) { + throw new DecommissioningFailedException( + decommissionAttribute, + "no weights are set to the attribute. Please set appropriate weights before triggering decommission action" + ); + } + WeightedRouting weightedRouting = weightedRoutingMetadata.getWeightedRouting(); + if (weightedRouting.attributeName().equals(decommissionAttribute.attributeName()) == false) { + throw new DecommissioningFailedException( + decommissionAttribute, + "no weights are specified to attribute [" + decommissionAttribute.attributeName() + "]" + ); + } + Double attributeValueWeight = weightedRouting.weights().get(decommissionAttribute.attributeValue()); + if (attributeValueWeight == null || attributeValueWeight.equals(0.0) == false) { + throw new DecommissioningFailedException( + decommissionAttribute, + "weight for decommissioned attribute is expected to be [0.0] but found [" + attributeValueWeight + "]" + ); + } + } + + private static void ensureEligibleRequest( + DecommissionAttributeMetadata decommissionAttributeMetadata, + DecommissionAttribute requestedDecommissionAttribute + ) { + String msg = null; + if (decommissionAttributeMetadata != null) { + // check if the same attribute is registered and handle it accordingly + if (decommissionAttributeMetadata.decommissionAttribute().equals(requestedDecommissionAttribute)) { + switch (decommissionAttributeMetadata.status()) { + // for INIT and FAILED - we are good to process it again + case INIT: + case FAILED: + break; + case DRAINING: + case IN_PROGRESS: + case SUCCESSFUL: + msg = "same request is already in status [" + decommissionAttributeMetadata.status() + "]"; + break; + default: + throw new IllegalStateException( + "unknown status [" + decommissionAttributeMetadata.status() + "] currently registered in metadata" + ); + } + } else { + switch (decommissionAttributeMetadata.status()) { + case SUCCESSFUL: + // one awareness attribute is already decommissioned. We will reject the new request + msg = "one awareness attribute [" + + decommissionAttributeMetadata.decommissionAttribute().toString() + + "] already successfully decommissioned, recommission before triggering another decommission"; + break; + case DRAINING: + case IN_PROGRESS: + case INIT: + // it means the decommission has been initiated or is inflight. In that case, will fail new request + msg = "there's an inflight decommission request for attribute [" + + decommissionAttributeMetadata.decommissionAttribute().toString() + + "] is in progress, cannot process this request"; + break; + case FAILED: + break; + default: + throw new IllegalStateException( + "unknown status [" + decommissionAttributeMetadata.status() + "] currently registered in metadata" + ); + } + } + } + + if (msg != null) { + throw new DecommissioningFailedException(requestedDecommissionAttribute, msg); + } + } + + private ActionListener statusUpdateListener() { + return new ActionListener<>() { + @Override + public void onResponse(DecommissionStatus status) { + logger.info("updated the decommission status to [{}]", status); + } + + @Override + public void onFailure(Exception e) { + logger.error("unexpected failure occurred during decommission status update", e); + } + }; + } + + public void startRecommissionAction(final ActionListener listener) { + /* + * For abandoned requests, we might not really know if it actually restored the exclusion list. + * And can land up in cases where even after recommission, exclusions are set(which is unexpected). + * And by definition of OpenSearch - Clusters should have no voting configuration exclusions in normal operation. + * Once the excluded nodes have stopped, clear the voting configuration exclusions with DELETE /_cluster/voting_config_exclusions. + * And hence it is safe to remove the exclusion if any. User should make conscious choice before decommissioning awareness attribute. + */ + decommissionController.clearVotingConfigExclusion(new ActionListener() { + @Override + public void onResponse(Void unused) { + logger.info("successfully cleared voting config exclusion for deleting the decommission."); + deleteDecommissionState(listener); + } + + @Override + public void onFailure(Exception e) { + logger.error("Failure in clearing voting config during delete_decommission request.", e); + listener.onFailure(e); + } + }, false); + } + + void deleteDecommissionState(ActionListener listener) { + clusterService.submitStateUpdateTask("delete_decommission_state", new ClusterStateUpdateTask(Priority.URGENT) { + @Override + public ClusterState execute(ClusterState currentState) { + logger.info("Deleting the decommission attribute from the cluster state"); + Metadata metadata = currentState.metadata(); + Metadata.Builder mdBuilder = Metadata.builder(metadata); + mdBuilder.removeCustom(DecommissionAttributeMetadata.TYPE); + return ClusterState.builder(currentState).metadata(mdBuilder).build(); + } + + @Override + public void onFailure(String source, Exception e) { + logger.error(() -> new ParameterizedMessage("Failed to clear decommission attribute. [{}]", source), e); + listener.onFailure(e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + // Cluster state processed for deleting the decommission attribute. + assert newState.metadata().decommissionAttributeMetadata() == null; + listener.onResponse(new DeleteDecommissionStateResponse(true)); + } + }); + } + + /** + * Utility method to check if the node has decommissioned attribute + * + * @param discoveryNode node to check on + * @param decommissionAttribute attribute to be checked with + * @return true or false based on whether node has decommissioned attribute + */ + public static boolean nodeHasDecommissionedAttribute(DiscoveryNode discoveryNode, DecommissionAttribute decommissionAttribute) { + String nodeAttributeValue = discoveryNode.getAttributes().get(decommissionAttribute.attributeName()); + return nodeAttributeValue != null && nodeAttributeValue.equals(decommissionAttribute.attributeValue()); + } + + /** + * Utility method to check if the node is commissioned or not + * + * @param discoveryNode node to check on + * @param metadata metadata present current which will be used to check the commissioning status of the node + * @return if the node is commissioned or not + */ + public static boolean nodeCommissioned(DiscoveryNode discoveryNode, Metadata metadata) { + DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.decommissionAttributeMetadata(); + if (decommissionAttributeMetadata != null) { + DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); + DecommissionStatus status = decommissionAttributeMetadata.status(); + if (decommissionAttribute != null && status != null) { + if (nodeHasDecommissionedAttribute(discoveryNode, decommissionAttribute) + && (status.equals(DecommissionStatus.IN_PROGRESS) + || status.equals(DecommissionStatus.SUCCESSFUL) + || status.equals(DecommissionStatus.DRAINING))) { + return false; + } + } + } + return true; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java new file mode 100644 index 0000000000000..4ca8c3cc4286e --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java @@ -0,0 +1,75 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +/** + * An enumeration of the states during decommissioning + */ +public enum DecommissionStatus { + /** + * Decommission process is initiated, and to-be-decommissioned leader is excluded from voting config + */ + INIT("init"), + /** + * Decommission process is initiated, and the zone is being drained. + */ + DRAINING("draining"), + + /** + * Decommission process has started, decommissioned nodes should be removed + */ + IN_PROGRESS("in_progress"), + /** + * Decommission action completed + */ + SUCCESSFUL("successful"), + /** + * Decommission request failed + */ + FAILED("failed"); + + private final String status; + + DecommissionStatus(String status) { + this.status = status; + } + + /** + * Returns status that represents the decommission state + * + * @return status + */ + public String status() { + return status; + } + + /** + * Generate decommission status from given string + * + * @param status status in string + * @return status + */ + public static DecommissionStatus fromString(String status) { + if (status == null) { + throw new IllegalArgumentException("decommission status cannot be null"); + } + if (status.equals(INIT.status())) { + return INIT; + } else if (status.equals(DRAINING.status())) { + return DRAINING; + } else if (status.equals(IN_PROGRESS.status())) { + return IN_PROGRESS; + } else if (status.equals(SUCCESSFUL.status())) { + return SUCCESSFUL; + } else if (status.equals(FAILED.status())) { + return FAILED; + } + throw new IllegalStateException("Decommission status [" + status + "] not recognized."); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissioningFailedException.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissioningFailedException.java new file mode 100644 index 0000000000000..fe1b9368ac712 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissioningFailedException.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.opensearch.OpenSearchException; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; + +import java.io.IOException; + +/** + * This exception is thrown whenever a failure occurs in decommission request @{@link DecommissionService} + * + * @opensearch.internal + */ + +public class DecommissioningFailedException extends OpenSearchException { + + private final DecommissionAttribute decommissionAttribute; + + public DecommissioningFailedException(DecommissionAttribute decommissionAttribute, String msg) { + this(decommissionAttribute, msg, null); + } + + public DecommissioningFailedException(DecommissionAttribute decommissionAttribute, String msg, Throwable cause) { + super("[" + (decommissionAttribute == null ? "_na" : decommissionAttribute.toString()) + "] " + msg, cause); + this.decommissionAttribute = decommissionAttribute; + } + + public DecommissioningFailedException(StreamInput in) throws IOException { + super(in); + decommissionAttribute = new DecommissionAttribute(in); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + decommissionAttribute.writeTo(out); + } + + /** + * Returns decommission attribute + * + * @return decommission attribute + */ + public DecommissionAttribute decommissionAttribute() { + return decommissionAttribute; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java b/server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java new file mode 100644 index 0000000000000..847d5a527b017 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.opensearch.OpenSearchException; +import org.opensearch.common.io.stream.StreamInput; + +import java.io.IOException; + +/** + * This exception is thrown if the node is decommissioned by @{@link DecommissionService} + * and this nodes needs to be removed from the cluster + * + * @opensearch.internal + */ +public class NodeDecommissionedException extends OpenSearchException { + + public NodeDecommissionedException(String msg, Object... args) { + super(msg, args); + } + + public NodeDecommissionedException(StreamInput in) throws IOException { + super(in); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/package-info.java b/server/src/main/java/org/opensearch/cluster/decommission/package-info.java new file mode 100644 index 0000000000000..256c2f22253cc --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Decommission lifecycle classes + */ +package org.opensearch.cluster.decommission; diff --git a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java index 086865d2170c3..eb5e8bbc2d49b 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java @@ -51,6 +51,7 @@ import org.opensearch.cluster.block.ClusterBlock; import org.opensearch.cluster.block.ClusterBlockLevel; import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; import org.opensearch.common.Nullable; import org.opensearch.common.Strings; import org.opensearch.common.UUIDs; @@ -795,6 +796,10 @@ public Map dataStreams() { .orElse(Collections.emptyMap()); } + public DecommissionAttributeMetadata decommissionAttributeMetadata() { + return custom(DecommissionAttributeMetadata.TYPE); + } + public ImmutableOpenMap customs() { return this.customs; } @@ -1336,6 +1341,15 @@ public IndexGraveyard indexGraveyard() { return graveyard; } + public Builder decommissionAttributeMetadata(final DecommissionAttributeMetadata decommissionAttributeMetadata) { + putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); + return this; + } + + public DecommissionAttributeMetadata decommissionAttributeMetadata() { + return (DecommissionAttributeMetadata) getCustom(DecommissionAttributeMetadata.TYPE); + } + public Builder updateSettings(Settings settings, String... indices) { if (indices == null || indices.length == 0) { indices = this.indices.keys().toArray(String.class); diff --git a/server/src/main/java/org/opensearch/cluster/routing/WeightedRoutingService.java b/server/src/main/java/org/opensearch/cluster/routing/WeightedRoutingService.java index 54f2c81aea384..6acb4a1e832cb 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/WeightedRoutingService.java +++ b/server/src/main/java/org/opensearch/cluster/routing/WeightedRoutingService.java @@ -19,6 +19,8 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionStatus; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.metadata.WeightedRoutingMetadata; import org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider; @@ -68,6 +70,8 @@ public void registerWeightedRoutingMetadata( clusterService.submitStateUpdateTask("update_weighted_routing", new ClusterStateUpdateTask(Priority.URGENT) { @Override public ClusterState execute(ClusterState currentState) { + // verify currently no decommission action is ongoing + ensureNoOngoingDecommissionAction(currentState); Metadata metadata = currentState.metadata(); Metadata.Builder mdBuilder = Metadata.builder(currentState.metadata()); WeightedRoutingMetadata weightedRoutingMetadata = metadata.custom(WeightedRoutingMetadata.TYPE); @@ -154,4 +158,15 @@ public void verifyAwarenessAttribute(String attributeName) { throw validationException; } } + + public void ensureNoOngoingDecommissionAction(ClusterState state) { + DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().decommissionAttributeMetadata(); + if (decommissionAttributeMetadata != null && decommissionAttributeMetadata.status().equals(DecommissionStatus.FAILED) == false) { + throw new IllegalStateException( + "a decommission action is ongoing with status [" + + decommissionAttributeMetadata.status().status() + + "], cannot update weight during this state" + ); + } + } } diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 9d07e306cfa09..70a17837839cf 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -542,6 +542,7 @@ public void apply(Settings value, Settings current, Settings previous) { PersistentTasksClusterService.CLUSTER_TASKS_ALLOCATION_RECHECK_INTERVAL_SETTING, EnableAssignmentDecider.CLUSTER_TASKS_ALLOCATION_ENABLE_SETTING, PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_SETTING, + PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_DURING_DECOMMISSION_SETTING, PeerFinder.DISCOVERY_REQUEST_PEERS_TIMEOUT_SETTING, ClusterFormationFailureHelper.DISCOVERY_CLUSTER_FORMATION_WARNING_TIMEOUT_SETTING, ElectionSchedulerFactory.ELECTION_INITIAL_TIMEOUT_SETTING, diff --git a/server/src/main/java/org/opensearch/discovery/PeerFinder.java b/server/src/main/java/org/opensearch/discovery/PeerFinder.java index a601a6fbe4d82..e8b6c72c512a2 100644 --- a/server/src/main/java/org/opensearch/discovery/PeerFinder.java +++ b/server/src/main/java/org/opensearch/discovery/PeerFinder.java @@ -84,6 +84,14 @@ public abstract class PeerFinder { Setting.Property.NodeScope ); + // the time between attempts to find all peers when node is in decommissioned state, default set to 2 minutes + public static final Setting DISCOVERY_FIND_PEERS_INTERVAL_DURING_DECOMMISSION_SETTING = Setting.timeSetting( + "discovery.find_peers_interval_during_decommission", + TimeValue.timeValueSeconds(120L), + TimeValue.timeValueMillis(1000), + Setting.Property.NodeScope + ); + public static final Setting DISCOVERY_REQUEST_PEERS_TIMEOUT_SETTING = Setting.timeSetting( "discovery.request_peers_timeout", TimeValue.timeValueMillis(3000), @@ -91,7 +99,8 @@ public abstract class PeerFinder { Setting.Property.NodeScope ); - private final TimeValue findPeersInterval; + private final Settings settings; + private TimeValue findPeersInterval; private final TimeValue requestPeersTimeout; private final Object mutex = new Object(); @@ -112,6 +121,7 @@ public PeerFinder( TransportAddressConnector transportAddressConnector, ConfiguredHostsResolver configuredHostsResolver ) { + this.settings = settings; findPeersInterval = DISCOVERY_FIND_PEERS_INTERVAL_SETTING.get(settings); requestPeersTimeout = DISCOVERY_REQUEST_PEERS_TIMEOUT_SETTING.get(settings); this.transportService = transportService; @@ -128,6 +138,23 @@ public PeerFinder( ); } + public synchronized void onNodeCommissionStatusChange(boolean localNodeCommissioned) { + findPeersInterval = localNodeCommissioned + ? DISCOVERY_FIND_PEERS_INTERVAL_SETTING.get(settings) + : DISCOVERY_FIND_PEERS_INTERVAL_DURING_DECOMMISSION_SETTING.get(settings); + logger.info( + "setting findPeersInterval to [{}] as node commission status = [{}] for local node [{}]", + findPeersInterval, + localNodeCommissioned, + transportService.getLocalNode() + ); + } + + // package private for tests + TimeValue getFindPeersInterval() { + return findPeersInterval; + } + public void activate(final DiscoveryNodes lastAcceptedNodes) { logger.trace("activating with {}", lastAcceptedNodes); diff --git a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestDecommissionAction.java b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestDecommissionAction.java new file mode 100644 index 0000000000000..c041974165eb6 --- /dev/null +++ b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestDecommissionAction.java @@ -0,0 +1,63 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.rest.action.admin.cluster; + +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest; +import org.opensearch.client.Requests; +import org.opensearch.client.node.NodeClient; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.rest.BaseRestHandler; +import org.opensearch.rest.RestRequest; +import org.opensearch.rest.action.RestToXContentListener; + +import java.io.IOException; +import java.util.List; + +import static java.util.Collections.singletonList; +import static org.opensearch.rest.RestRequest.Method.PUT; + +/** + * Registers decommission action + * + * @opensearch.api + */ +public class RestDecommissionAction extends BaseRestHandler { + + @Override + public List routes() { + return singletonList(new Route(PUT, "/_cluster/decommission/awareness/{awareness_attribute_name}/{awareness_attribute_value}")); + } + + @Override + public String getName() { + return "decommission_action"; + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException { + DecommissionRequest decommissionRequest = createRequest(request); + return channel -> client.admin().cluster().decommission(decommissionRequest, new RestToXContentListener<>(channel)); + } + + DecommissionRequest createRequest(RestRequest request) throws IOException { + DecommissionRequest decommissionRequest = Requests.decommissionRequest(); + String attributeName = request.param("awareness_attribute_name"); + String attributeValue = request.param("awareness_attribute_value"); + // Check if we have no delay set. + boolean noDelay = request.paramAsBoolean("no_delay", false); + decommissionRequest.setNoDelay(noDelay); + + if (request.hasParam("delay_timeout")) { + TimeValue delayTimeout = request.paramAsTime("delay_timeout", DecommissionRequest.DEFAULT_NODE_DRAINING_TIMEOUT); + decommissionRequest.setDelayTimeout(delayTimeout); + } + return decommissionRequest.setDecommissionAttribute(new DecommissionAttribute(attributeName, attributeValue)); + } +} diff --git a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestDeleteDecommissionStateAction.java b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestDeleteDecommissionStateAction.java new file mode 100644 index 0000000000000..9fd7ae2248c30 --- /dev/null +++ b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestDeleteDecommissionStateAction.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.rest.action.admin.cluster; + +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequest; +import org.opensearch.client.Requests; +import org.opensearch.client.node.NodeClient; +import org.opensearch.rest.BaseRestHandler; +import org.opensearch.rest.RestRequest; +import org.opensearch.rest.action.RestToXContentListener; + +import java.io.IOException; +import java.util.List; + +import static java.util.Collections.singletonList; +import static org.opensearch.rest.RestRequest.Method.DELETE; + +/** + * Clears the decommission metadata. + * + * @opensearch.api + */ +public class RestDeleteDecommissionStateAction extends BaseRestHandler { + + @Override + public List routes() { + return singletonList(new Route(DELETE, "/_cluster/decommission/awareness")); + } + + @Override + public String getName() { + return "delete_decommission_state_action"; + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException { + DeleteDecommissionStateRequest deleteDecommissionStateRequest = createRequest(); + return channel -> client.admin() + .cluster() + .deleteDecommissionState(deleteDecommissionStateRequest, new RestToXContentListener<>(channel)); + } + + DeleteDecommissionStateRequest createRequest() { + return Requests.deleteDecommissionStateRequest(); + } +} diff --git a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestGetDecommissionStateAction.java b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestGetDecommissionStateAction.java new file mode 100644 index 0000000000000..5d72adbd6ae08 --- /dev/null +++ b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestGetDecommissionStateAction.java @@ -0,0 +1,48 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.rest.action.admin.cluster; + +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateRequest; +import org.opensearch.client.Requests; +import org.opensearch.client.node.NodeClient; +import org.opensearch.rest.BaseRestHandler; +import org.opensearch.rest.RestRequest; +import org.opensearch.rest.action.RestToXContentListener; + +import java.io.IOException; +import java.util.List; + +import static java.util.Collections.singletonList; +import static org.opensearch.rest.RestRequest.Method.GET; + +/** + * Returns decommissioned attribute information + * + * @opensearch.api + */ +public class RestGetDecommissionStateAction extends BaseRestHandler { + + @Override + public List routes() { + return singletonList(new Route(GET, "/_cluster/decommission/awareness/{awareness_attribute_name}/_status")); + } + + @Override + public String getName() { + return "get_decommission_state_action"; + } + + @Override + public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException { + GetDecommissionStateRequest getDecommissionStateRequest = Requests.getDecommissionStateRequest(); + String attributeName = request.param("awareness_attribute_name"); + getDecommissionStateRequest.attributeName(attributeName); + return channel -> client.admin().cluster().getDecommissionState(getDecommissionStateRequest, new RestToXContentListener<>(channel)); + } +} diff --git a/server/src/test/java/org/opensearch/ExceptionSerializationTests.java b/server/src/test/java/org/opensearch/ExceptionSerializationTests.java index 26b0ce7e9e20c..ff2bb77531486 100644 --- a/server/src/test/java/org/opensearch/ExceptionSerializationTests.java +++ b/server/src/test/java/org/opensearch/ExceptionSerializationTests.java @@ -49,6 +49,8 @@ import org.opensearch.cluster.block.ClusterBlockException; import org.opensearch.cluster.coordination.CoordinationStateRejectedException; import org.opensearch.cluster.coordination.NoClusterManagerBlockService; +import org.opensearch.cluster.decommission.DecommissioningFailedException; +import org.opensearch.cluster.decommission.NodeDecommissionedException; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.IllegalShardRoutingStateException; import org.opensearch.cluster.routing.ShardRouting; @@ -860,6 +862,8 @@ public void testIds() { ids.put(160, NoSeedNodeLeftException.class); ids.put(161, ReplicationFailedException.class); ids.put(162, PrimaryShardClosedException.class); + ids.put(163, DecommissioningFailedException.class); + ids.put(164, NodeDecommissionedException.class); Map, Integer> reverse = new HashMap<>(); for (Map.Entry> entry : ids.entrySet()) { diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/DeleteDecommissionStateRequestTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/DeleteDecommissionStateRequestTests.java new file mode 100644 index 0000000000000..1a95b77cc1024 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/DeleteDecommissionStateRequestTests.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness; + +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequest; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +public class DeleteDecommissionStateRequestTests extends OpenSearchTestCase { + + public void testSerialization() throws IOException { + final DeleteDecommissionStateRequest originalRequest = new DeleteDecommissionStateRequest(); + + final DeleteDecommissionStateRequest cloneRequest; + try (BytesStreamOutput out = new BytesStreamOutput()) { + originalRequest.writeTo(out); + try (StreamInput in = out.bytes().streamInput()) { + cloneRequest = new DeleteDecommissionStateRequest(in); + } + } + assertEquals(cloneRequest.clusterManagerNodeTimeout(), originalRequest.clusterManagerNodeTimeout()); + } +} diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/DeleteDecommissionStateResponseTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/DeleteDecommissionStateResponseTests.java new file mode 100644 index 0000000000000..085eda3e9d0e7 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/DeleteDecommissionStateResponseTests.java @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness; + +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateResponse; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +public class DeleteDecommissionStateResponseTests extends OpenSearchTestCase { + + public void testSerialization() throws IOException { + final DeleteDecommissionStateResponse originalResponse = new DeleteDecommissionStateResponse(true); + + final DeleteDecommissionStateResponse deserialized = copyWriteable( + originalResponse, + writableRegistry(), + DeleteDecommissionStateResponse::new + ); + assertEquals(deserialized, originalResponse); + + } +} diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequestTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequestTests.java new file mode 100644 index 0000000000000..973485e1917f7 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequestTests.java @@ -0,0 +1,50 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.get; + +import org.opensearch.action.ActionRequestValidationException; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +public class GetDecommissionStateRequestTests extends OpenSearchTestCase { + public void testSerialization() throws IOException { + String attributeName = "zone"; + final GetDecommissionStateRequest originalRequest = new GetDecommissionStateRequest(attributeName); + final GetDecommissionStateRequest deserialized = copyWriteable( + originalRequest, + writableRegistry(), + GetDecommissionStateRequest::new + ); + assertEquals(deserialized.attributeName(), originalRequest.attributeName()); + } + + public void testValidation() { + { + String attributeName = null; + final GetDecommissionStateRequest request = new GetDecommissionStateRequest(attributeName); + ActionRequestValidationException e = request.validate(); + assertNotNull(e); + assertTrue(e.getMessage().contains("attribute name is missing")); + } + { + String attributeName = ""; + final GetDecommissionStateRequest request = new GetDecommissionStateRequest(attributeName); + ActionRequestValidationException e = request.validate(); + assertNotNull(e); + assertTrue(e.getMessage().contains("attribute name is missing")); + } + { + String attributeName = "zone"; + final GetDecommissionStateRequest request = new GetDecommissionStateRequest(attributeName); + ActionRequestValidationException e = request.validate(); + assertNull(e); + } + } +} diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateResponseTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateResponseTests.java new file mode 100644 index 0000000000000..437faf2a75720 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateResponseTests.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.get; + +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.common.xcontent.XContentParser; +import org.opensearch.test.AbstractXContentTestCase; + +import java.io.IOException; + +public class GetDecommissionStateResponseTests extends AbstractXContentTestCase { + @Override + protected GetDecommissionStateResponse createTestInstance() { + DecommissionStatus status = null; + String attributeValue = null; + if (randomBoolean()) { + status = randomFrom(DecommissionStatus.values()); + attributeValue = randomAlphaOfLength(10); + } + return new GetDecommissionStateResponse(attributeValue, status); + } + + @Override + protected GetDecommissionStateResponse doParseInstance(XContentParser parser) throws IOException { + return GetDecommissionStateResponse.fromXContent(parser); + } + + @Override + protected boolean supportsUnknownFields() { + return false; + } +} diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequestTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequestTests.java new file mode 100644 index 0000000000000..8cd407b3aecf2 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequestTests.java @@ -0,0 +1,87 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.put; + +import org.opensearch.action.ActionRequestValidationException; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +public class DecommissionRequestTests extends OpenSearchTestCase { + + public void testSerialization() throws IOException { + String attributeName = "zone"; + String attributeValue = "zone-1"; + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + final DecommissionRequest originalRequest = new DecommissionRequest(decommissionAttribute); + + final DecommissionRequest deserialized = copyWriteable(originalRequest, writableRegistry(), DecommissionRequest::new); + + assertEquals(deserialized.getDecommissionAttribute(), originalRequest.getDecommissionAttribute()); + assertEquals(deserialized.getDelayTimeout(), originalRequest.getDelayTimeout()); + } + + public void testValidation() { + { + String attributeName = null; + String attributeValue = "test"; + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + + final DecommissionRequest request = new DecommissionRequest(decommissionAttribute); + ActionRequestValidationException e = request.validate(); + assertNotNull(e); + assertTrue(e.getMessage().contains("attribute name is missing")); + } + { + String attributeName = "zone"; + String attributeValue = ""; + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + + final DecommissionRequest request = new DecommissionRequest(decommissionAttribute); + ActionRequestValidationException e = request.validate(); + assertNotNull(e); + assertTrue(e.getMessage().contains("attribute value is missing")); + } + { + String attributeName = "zone"; + String attributeValue = "test"; + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + + final DecommissionRequest request = new DecommissionRequest(decommissionAttribute); + request.setNoDelay(true); + ActionRequestValidationException e = request.validate(); + assertNull(e); + assertEquals(TimeValue.ZERO, request.getDelayTimeout()); + } + { + String attributeName = "zone"; + String attributeValue = "test"; + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + + final DecommissionRequest request = new DecommissionRequest(decommissionAttribute); + ActionRequestValidationException e = request.validate(); + assertNull(e); + assertEquals(DecommissionRequest.DEFAULT_NODE_DRAINING_TIMEOUT, request.getDelayTimeout()); + } + { + String attributeName = "zone"; + String attributeValue = "test"; + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + + final DecommissionRequest request = new DecommissionRequest(decommissionAttribute); + request.setNoDelay(true); + request.setDelayTimeout(TimeValue.timeValueSeconds(30)); + ActionRequestValidationException e = request.validate(); + assertNotNull(e); + assertTrue(e.getMessage().contains("Invalid decommission request")); + } + } +} diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionResponseTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionResponseTests.java new file mode 100644 index 0000000000000..5ee5a5f3cf016 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionResponseTests.java @@ -0,0 +1,21 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.put; + +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +public class DecommissionResponseTests extends OpenSearchTestCase { + public void testSerialization() throws IOException { + final DecommissionResponse originalRequest = new DecommissionResponse(true); + copyWriteable(originalRequest, writableRegistry(), DecommissionResponse::new); + // there are no fields so we're just checking that this doesn't throw anything + } +} diff --git a/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java index d96c972bc6021..74c5d0fcccbed 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java @@ -87,6 +87,7 @@ import static org.opensearch.cluster.coordination.NoClusterManagerBlockService.NO_CLUSTER_MANAGER_BLOCK_SETTING; import static org.opensearch.cluster.coordination.NoClusterManagerBlockService.NO_CLUSTER_MANAGER_BLOCK_WRITES; import static org.opensearch.cluster.coordination.Reconfigurator.CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION; +import static org.opensearch.discovery.PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_DURING_DECOMMISSION_SETTING; import static org.opensearch.discovery.PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_SETTING; import static org.opensearch.monitor.StatusInfo.Status.HEALTHY; import static org.opensearch.monitor.StatusInfo.Status.UNHEALTHY; @@ -1780,6 +1781,48 @@ public void testImproveConfigurationPerformsVotingConfigExclusionStateCheck() { } } + public void testLocalNodeAlwaysCommissionedWithoutDecommissionedException() { + try (Cluster cluster = new Cluster(randomIntBetween(1, 5))) { + cluster.runRandomly(); + cluster.stabilise(); + for (ClusterNode node : cluster.clusterNodes) { + assertTrue(node.coordinator.localNodeCommissioned()); + } + } + } + + public void testClusterStabilisesForPreviouslyDecommissionedNode() { + try (Cluster cluster = new Cluster(randomIntBetween(1, 5))) { + cluster.runRandomly(); + cluster.stabilise(); + for (ClusterNode node : cluster.clusterNodes) { + assertTrue(node.coordinator.localNodeCommissioned()); + } + final ClusterNode leader = cluster.getAnyLeader(); + + ClusterNode decommissionedNode = cluster.new ClusterNode( + nextNodeIndex.getAndIncrement(), true, leader.nodeSettings, () -> new StatusInfo(HEALTHY, "healthy-info") + ); + decommissionedNode.coordinator.onNodeCommissionStatusChange(false); + cluster.clusterNodes.add(decommissionedNode); + + assertFalse(decommissionedNode.coordinator.localNodeCommissioned()); + + cluster.stabilise( + // Interval is updated to decommissioned find peer interval + defaultMillis(DISCOVERY_FIND_PEERS_INTERVAL_DURING_DECOMMISSION_SETTING) + // One message delay to send a join + + DEFAULT_DELAY_VARIABILITY + // Commit a new cluster state with the new node(s). Might be split into multiple commits, and each might need a + // followup reconfiguration + + 3 * 2 * DEFAULT_CLUSTER_STATE_UPDATE_DELAY + ); + + // once cluster stabilises the node joins and would be commissioned + assertTrue(decommissionedNode.coordinator.localNodeCommissioned()); + } + } + private ClusterState buildNewClusterStateWithVotingConfigExclusion( ClusterState currentState, Set newVotingConfigExclusion diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinHelperTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinHelperTests.java index a3c945cdbac3a..7b21042b2ed4a 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinHelperTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinHelperTests.java @@ -90,7 +90,8 @@ public void testJoinDeduplication() { startJoinRequest -> { throw new AssertionError(); }, Collections.emptyList(), (s, p, r) -> {}, - () -> new StatusInfo(HEALTHY, "info") + () -> new StatusInfo(HEALTHY, "info"), + nodeCommissioned -> {} ); transportService.start(); @@ -230,7 +231,8 @@ private void assertJoinValidationRejectsMismatchedClusterUUID(String actionName, startJoinRequest -> { throw new AssertionError(); }, Collections.emptyList(), (s, p, r) -> {}, - null + null, + nodeCommissioned -> {} ); // registers request handler transportService.start(); transportService.acceptIncomingRequests(); @@ -284,7 +286,8 @@ public void testJoinFailureOnUnhealthyNodes() { startJoinRequest -> { throw new AssertionError(); }, Collections.emptyList(), (s, p, r) -> {}, - () -> nodeHealthServiceStatus.get() + () -> nodeHealthServiceStatus.get(), + nodeCommissioned -> {} ); transportService.start(); diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index 479dbc3835b2f..9bb0084b20817 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -36,9 +36,14 @@ import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateTaskExecutor; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.cluster.decommission.NodeDecommissionedException; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.RerouteService; import org.opensearch.cluster.routing.allocation.AllocationService; @@ -50,6 +55,7 @@ import org.opensearch.transport.TransportService; import java.util.HashMap; +import java.util.Collections; import java.util.HashSet; import java.util.Map; @@ -290,4 +296,116 @@ public void testIsBecomeClusterManagerTask() { JoinTaskExecutor.Task joinTaskOfClusterManager = JoinTaskExecutor.newBecomeClusterManagerTask(); assertThat(joinTaskOfClusterManager.isBecomeClusterManagerTask(), is(true)); } + + public void testJoinClusterWithNoDecommission() { + Settings.builder().build(); + Metadata.Builder metaBuilder = Metadata.builder(); + Metadata metadata = metaBuilder.build(); + DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-2")); + JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata); + } + + public void testPreventJoinClusterWithDecommission() { + Settings.builder().build(); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); + DecommissionStatus decommissionStatus = randomFrom( + DecommissionStatus.IN_PROGRESS, + DecommissionStatus.SUCCESSFUL, + DecommissionStatus.DRAINING + ); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + decommissionStatus + ); + Metadata metadata = Metadata.builder().decommissionAttributeMetadata(decommissionAttributeMetadata).build(); + DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-1")); + expectThrows(NodeDecommissionedException.class, () -> JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata)); + } + + public void testJoinClusterWithDifferentDecommission() { + Settings.builder().build(); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); + DecommissionStatus decommissionStatus = randomFrom( + DecommissionStatus.INIT, + DecommissionStatus.IN_PROGRESS, + DecommissionStatus.SUCCESSFUL + ); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + decommissionStatus + ); + Metadata metadata = Metadata.builder().decommissionAttributeMetadata(decommissionAttributeMetadata).build(); + + DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-2")); + JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata); + } + + public void testJoinFailedForDecommissionedNode() throws Exception { + final AllocationService allocationService = mock(AllocationService.class); + when(allocationService.adaptAutoExpandReplicas(any())).then(invocationOnMock -> invocationOnMock.getArguments()[0]); + final RerouteService rerouteService = (reason, priority, listener) -> listener.onResponse(null); + + final JoinTaskExecutor joinTaskExecutor = new JoinTaskExecutor(Settings.EMPTY, allocationService, logger, rerouteService, null); + + final DiscoveryNode clusterManagerNode = new DiscoveryNode(UUIDs.base64UUID(), buildNewFakeTransportAddress(), Version.CURRENT); + + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone1"); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + DecommissionStatus.SUCCESSFUL + ); + final ClusterState clusterManagerClusterState = ClusterState.builder(ClusterName.DEFAULT) + .nodes( + DiscoveryNodes.builder() + .add(clusterManagerNode) + .localNodeId(clusterManagerNode.getId()) + .clusterManagerNodeId(clusterManagerNode.getId()) + ) + .metadata(Metadata.builder().decommissionAttributeMetadata(decommissionAttributeMetadata)) + .build(); + + final DiscoveryNode decommissionedNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + Collections.singletonMap("zone", "zone1"), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + String decommissionedNodeID = decommissionedNode.getId(); + + final ClusterStateTaskExecutor.ClusterTasksResult result = joinTaskExecutor.execute( + clusterManagerClusterState, + List.of(new JoinTaskExecutor.Task(decommissionedNode, "test")) + ); + assertThat(result.executionResults.entrySet(), hasSize(1)); + final ClusterStateTaskExecutor.TaskResult taskResult = result.executionResults.values().iterator().next(); + assertFalse(taskResult.isSuccess()); + assertTrue(taskResult.getFailure() instanceof NodeDecommissionedException); + assertFalse(result.resultingState.getNodes().nodeExists(decommissionedNodeID)); + } + + public void testJoinClusterWithDecommissionFailed() { + Settings.builder().build(); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + DecommissionStatus.FAILED + ); + Metadata metadata = Metadata.builder().decommissionAttributeMetadata(decommissionAttributeMetadata).build(); + + DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-1")); + JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata); + } + + private DiscoveryNode newDiscoveryNode(Map attributes) { + return new DiscoveryNode( + randomAlphaOfLength(10), + randomAlphaOfLength(10), + buildNewFakeTransportAddress(), + attributes, + Collections.singleton(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE), + Version.CURRENT + ); + } } diff --git a/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java b/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java index c77baba5fe167..18a7b892a424c 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java @@ -39,6 +39,10 @@ import org.opensearch.cluster.OpenSearchAllocationTestCase; import org.opensearch.cluster.block.ClusterBlocks; import org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.cluster.decommission.NodeDecommissionedException; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodeRole; @@ -775,6 +779,60 @@ public void testJoinElectedLeaderWithDeprecatedMasterRole() { assertTrue(clusterStateHasNode(node1)); } + public void testJoinFailsWhenDecommissioned() { + DiscoveryNode node0 = newNode(0, true); + DiscoveryNode node1 = newNode(1, true); + long initialTerm = randomLongBetween(1, 10); + long initialVersion = randomLongBetween(1, 10); + setupFakeClusterManagerServiceAndCoordinator( + initialTerm, + initialStateWithDecommissionedAttribute( + initialState(node0, initialTerm, initialVersion, VotingConfiguration.of(node0)), + new DecommissionAttribute("zone", "zone1") + ), + () -> new StatusInfo(HEALTHY, "healthy-info") + ); + assertFalse(isLocalNodeElectedMaster()); + long newTerm = initialTerm + randomLongBetween(1, 10); + joinNodeAndRun(new JoinRequest(node0, newTerm, Optional.of(new Join(node0, node0, newTerm, initialTerm, initialVersion)))); + assertTrue(isLocalNodeElectedMaster()); + assertFalse(clusterStateHasNode(node1)); + joinNodeAndRun(new JoinRequest(node1, newTerm, Optional.of(new Join(node1, node0, newTerm, initialTerm, initialVersion)))); + assertTrue(isLocalNodeElectedMaster()); + assertTrue(clusterStateHasNode(node1)); + DiscoveryNode decommissionedNode = new DiscoveryNode( + "data_2", + 2 + "", + buildNewFakeTransportAddress(), + Collections.singletonMap("zone", "zone1"), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + long anotherTerm = newTerm + randomLongBetween(1, 10); + + assertThat( + expectThrows( + NodeDecommissionedException.class, + () -> joinNodeAndRun(new JoinRequest(decommissionedNode, anotherTerm, Optional.empty())) + ).getMessage(), + containsString("with current status of decommissioning") + ); + assertFalse(clusterStateHasNode(decommissionedNode)); + + DiscoveryNode node3 = new DiscoveryNode( + "data_3", + 3 + "", + buildNewFakeTransportAddress(), + Collections.singletonMap("zone", "zone2"), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + long termForNode3 = anotherTerm + randomLongBetween(1, 10); + + joinNodeAndRun(new JoinRequest(node3, termForNode3, Optional.empty())); + assertTrue(clusterStateHasNode(node3)); + } + private boolean isLocalNodeElectedMaster() { return MasterServiceTests.discoveryState(clusterManagerService).nodes().isLocalNodeElectedMaster(); } @@ -782,4 +840,17 @@ private boolean isLocalNodeElectedMaster() { private boolean clusterStateHasNode(DiscoveryNode node) { return node.equals(MasterServiceTests.discoveryState(clusterManagerService).nodes().get(node.getId())); } + + private static ClusterState initialStateWithDecommissionedAttribute( + ClusterState clusterState, + DecommissionAttribute decommissionAttribute + ) { + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + DecommissionStatus.SUCCESSFUL + ); + return ClusterState.builder(clusterState) + .metadata(Metadata.builder(clusterState.metadata()).decommissionAttributeMetadata(decommissionAttributeMetadata)) + .build(); + } } diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java new file mode 100644 index 0000000000000..ba74b29081c26 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java @@ -0,0 +1,398 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.junit.After; +import org.junit.Before; +import org.opensearch.OpenSearchTimeoutException; +import org.opensearch.Version; +import org.opensearch.action.ActionListener; +import org.opensearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsAction; +import org.opensearch.action.admin.cluster.configuration.TransportClearVotingConfigExclusionsAction; +import org.opensearch.action.support.ActionFilters; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; +import org.opensearch.cluster.ClusterStateUpdateTask; +import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.concurrent.ThreadContext; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.transport.MockTransport; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import static java.util.Collections.emptySet; +import static java.util.Collections.singletonMap; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.sameInstance; +import static org.opensearch.cluster.ClusterState.builder; +import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService; +import static org.opensearch.test.ClusterServiceUtils.createClusterService; +import static org.opensearch.test.ClusterServiceUtils.setState; + +public class DecommissionControllerTests extends OpenSearchTestCase { + + private static ThreadPool threadPool; + private static ClusterService clusterService; + private TransportService transportService; + private AllocationService allocationService; + private DecommissionController decommissionController; + private ClusterSettings clusterSettings; + + @Before + public void setTransportServiceAndDefaultClusterState() { + threadPool = new TestThreadPool("test", Settings.EMPTY); + allocationService = createAllocationService(); + ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); + logger.info("--> adding five nodes on same zone_1"); + clusterState = addNodes(clusterState, "zone_1", "node1", "node2", "node3", "node4", "node5"); + logger.info("--> adding five nodes on same zone_2"); + clusterState = addNodes(clusterState, "zone_2", "node6", "node7", "node8", "node9", "node10"); + logger.info("--> adding five nodes on same zone_3"); + clusterState = addNodes(clusterState, "zone_3", "node11", "node12", "node13", "node14", "node15"); + clusterState = setLocalNodeAsClusterManagerNode(clusterState, "node1"); + clusterState = setThreeNodesInVotingConfig(clusterState); + final ClusterState.Builder builder = builder(clusterState); + clusterService = createClusterService(threadPool, clusterState.nodes().get("node1")); + setState(clusterService, builder); + final MockTransport transport = new MockTransport(); + transportService = transport.createTransportService( + Settings.EMPTY, + threadPool, + TransportService.NOOP_TRANSPORT_INTERCEPTOR, + boundTransportAddress -> clusterService.state().nodes().get("node1"), + null, + emptySet() + ); + + final Settings.Builder nodeSettingsBuilder = Settings.builder(); + final Settings nodeSettings = nodeSettingsBuilder.build(); + clusterSettings = new ClusterSettings(nodeSettings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + + new TransportAddVotingConfigExclusionsAction( + nodeSettings, + clusterSettings, + transportService, + clusterService, + threadPool, + new ActionFilters(emptySet()), + new IndexNameExpressionResolver(new ThreadContext(Settings.EMPTY)) + ); // registers action + + new TransportClearVotingConfigExclusionsAction( + transportService, + clusterService, + threadPool, + new ActionFilters(emptySet()), + new IndexNameExpressionResolver(new ThreadContext(Settings.EMPTY)) + ); // registers action + + transportService.start(); + transportService.acceptIncomingRequests(); + decommissionController = new DecommissionController(clusterService, transportService, allocationService, threadPool); + } + + @After + public void shutdownThreadPoolAndClusterService() { + clusterService.stop(); + threadPool.shutdown(); + } + + public void testAddNodesToVotingConfigExclusion() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(2); + + ClusterStateObserver clusterStateObserver = new ClusterStateObserver(clusterService, null, logger, threadPool.getThreadContext()); + clusterStateObserver.waitForNextChange(new AdjustConfigurationForExclusions(countDownLatch)); + Set nodesToRemoveFromVotingConfig = Collections.singleton(randomFrom("node1", "node6", "node11")); + decommissionController.excludeDecommissionedNodesFromVotingConfig(nodesToRemoveFromVotingConfig, new ActionListener() { + @Override + public void onResponse(Void unused) { + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("unexpected failure occurred while removing node from voting config " + e); + } + }); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + clusterService.getClusterApplierService().state().getVotingConfigExclusions().forEach(vce -> { + assertTrue(nodesToRemoveFromVotingConfig.contains(vce.getNodeId())); + assertEquals(nodesToRemoveFromVotingConfig.size(), 1); + }); + } + + public void testClearVotingConfigExclusions() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + decommissionController.clearVotingConfigExclusion(new ActionListener() { + @Override + public void onResponse(Void unused) { + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("unexpected failure occurred while clearing voting config exclusion" + e); + } + }, false); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + assertThat(clusterService.getClusterApplierService().state().getVotingConfigExclusions(), empty()); + } + + public void testNodesRemovedForDecommissionRequestSuccessfulResponse() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + Set nodesToBeRemoved = new HashSet<>(); + nodesToBeRemoved.add(clusterService.state().nodes().get("node11")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node12")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node13")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node14")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node15")); + + decommissionController.removeDecommissionedNodes( + nodesToBeRemoved, + "unit-test", + TimeValue.timeValueSeconds(30L), + new ActionListener() { + @Override + public void onResponse(Void unused) { + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("there shouldn't have been any failure"); + } + } + ); + + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + // test all 5 nodes removed and cluster has 10 nodes + Set nodes = StreamSupport.stream(clusterService.getClusterApplierService().state().nodes().spliterator(), false) + .collect(Collectors.toSet()); + assertEquals(nodes.size(), 10); + // test no nodes part of zone-3 + for (DiscoveryNode node : nodes) { + assertNotEquals(node.getAttributes().get("zone"), "zone-1"); + } + } + + public void testTimesOut() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + Set nodesToBeRemoved = new HashSet<>(); + nodesToBeRemoved.add(clusterService.state().nodes().get("node11")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node12")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node13")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node14")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node15")); + decommissionController.removeDecommissionedNodes( + nodesToBeRemoved, + "unit-test-timeout", + TimeValue.timeValueMillis(2), + new ActionListener() { + @Override + public void onResponse(Void unused) { + fail("response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertThat(e, instanceOf(OpenSearchTimeoutException.class)); + assertThat(e.getMessage(), containsString("waiting for removal of decommissioned nodes")); + countDownLatch.countDown(); + } + } + ); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + public void testSuccessfulDecommissionStatusMetadataUpdate() throws InterruptedException { + Map> decommissionStateTransitionMap = Map.of( + DecommissionStatus.INIT, + Set.of(DecommissionStatus.DRAINING, DecommissionStatus.IN_PROGRESS), + DecommissionStatus.DRAINING, + Set.of(DecommissionStatus.IN_PROGRESS), + DecommissionStatus.IN_PROGRESS, + Set.of(DecommissionStatus.SUCCESSFUL) + ); + + for (Map.Entry> entry : decommissionStateTransitionMap.entrySet()) { + for (DecommissionStatus val : entry.getValue()) { + verifyDecommissionStatusTransition(entry.getKey(), val); + } + } + } + + public void testSuccessfulDecommissionStatusMetadataUpdateForFailedState() throws InterruptedException { + Map> decommissionStateTransitionMap = Map.of( + DecommissionStatus.INIT, + Set.of(DecommissionStatus.FAILED), + DecommissionStatus.DRAINING, + Set.of(DecommissionStatus.FAILED), + DecommissionStatus.IN_PROGRESS, + Set.of(DecommissionStatus.FAILED) + ); + + for (Map.Entry> entry : decommissionStateTransitionMap.entrySet()) { + for (DecommissionStatus val : entry.getValue()) { + verifyDecommissionStatusTransition(entry.getKey(), val); + } + } + } + + private void verifyDecommissionStatusTransition(DecommissionStatus currentStatus, DecommissionStatus newStatus) + throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionAttributeMetadata oldMetadata = new DecommissionAttributeMetadata( + new DecommissionAttribute("zone", "zone-1"), + currentStatus + ); + ClusterState state = clusterService.state(); + Metadata metadata = state.metadata(); + Metadata.Builder mdBuilder = Metadata.builder(metadata); + mdBuilder.decommissionAttributeMetadata(oldMetadata); + state = ClusterState.builder(state).metadata(mdBuilder).build(); + setState(clusterService, state); + + decommissionController.updateMetadataWithDecommissionStatus(newStatus, new ActionListener() { + @Override + public void onResponse(DecommissionStatus status) { + assertEquals(newStatus, status); + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("decommission status update failed"); + countDownLatch.countDown(); + } + }); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + ClusterState newState = clusterService.getClusterApplierService().state(); + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().decommissionAttributeMetadata(); + assertEquals(decommissionAttributeMetadata.status(), newStatus); + } + + private static class AdjustConfigurationForExclusions implements ClusterStateObserver.Listener { + + final CountDownLatch doneLatch; + + AdjustConfigurationForExclusions(CountDownLatch latch) { + this.doneLatch = latch; + } + + @Override + public void onNewClusterState(ClusterState state) { + clusterService.getClusterManagerService().submitStateUpdateTask("reconfiguration", new ClusterStateUpdateTask() { + @Override + public ClusterState execute(ClusterState currentState) { + assertThat(currentState, sameInstance(state)); + final Set votingNodeIds = new HashSet<>(); + currentState.nodes().forEach(n -> votingNodeIds.add(n.getId())); + currentState.getVotingConfigExclusions().forEach(t -> votingNodeIds.remove(t.getNodeId())); + final CoordinationMetadata.VotingConfiguration votingConfiguration = new CoordinationMetadata.VotingConfiguration( + votingNodeIds + ); + return builder(currentState).metadata( + Metadata.builder(currentState.metadata()) + .coordinationMetadata( + CoordinationMetadata.builder(currentState.coordinationMetadata()) + .lastAcceptedConfiguration(votingConfiguration) + .lastCommittedConfiguration(votingConfiguration) + .build() + ) + ).build(); + } + + @Override + public void onFailure(String source, Exception e) { + throw new AssertionError("unexpected failure", e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + doneLatch.countDown(); + } + }); + } + + @Override + public void onClusterServiceClose() { + throw new AssertionError("unexpected close"); + } + + @Override + public void onTimeout(TimeValue timeout) { + throw new AssertionError("unexpected timeout"); + } + } + + private ClusterState addNodes(ClusterState clusterState, String zone, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + org.opensearch.common.collect.List.of(nodeIds).forEach(nodeId -> nodeBuilder.add(newNode(nodeId, singletonMap("zone", zone)))); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState setLocalNodeAsClusterManagerNode(ClusterState clusterState, String nodeId) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + nodeBuilder.localNodeId(nodeId); + nodeBuilder.clusterManagerNodeId(nodeId); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState setThreeNodesInVotingConfig(ClusterState clusterState) { + final CoordinationMetadata.VotingConfiguration votingConfiguration = CoordinationMetadata.VotingConfiguration.of( + clusterState.nodes().get("node1"), + clusterState.nodes().get("node6"), + clusterState.nodes().get("node11") + ); + + Metadata.Builder builder = Metadata.builder() + .coordinationMetadata( + CoordinationMetadata.builder() + .lastAcceptedConfiguration(votingConfiguration) + .lastCommittedConfiguration(votingConfiguration) + .build() + ); + clusterState = ClusterState.builder(clusterState).metadata(builder).build(); + return clusterState; + } + + private static DiscoveryNode newNode(String nodeId, Map attributes) { + return new DiscoveryNode(nodeId, nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_DATA_ROLE, Version.CURRENT); + } + + final private static Set CLUSTER_MANAGER_DATA_ROLE = Collections.unmodifiableSet( + new HashSet<>(Arrays.asList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)) + ); +} diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java new file mode 100644 index 0000000000000..3f39d67dee765 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -0,0 +1,465 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.hamcrest.Matchers; +import org.junit.After; +import org.junit.Before; +import org.mockito.ArgumentCaptor; +import org.mockito.Mockito; +import org.opensearch.Version; +import org.opensearch.action.ActionListener; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionResponse; +import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsRequest; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.metadata.WeightedRoutingMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.WeightedRouting; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.test.ClusterServiceUtils; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.transport.MockTransport; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportResponseHandler; +import org.opensearch.transport.TransportService; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import static java.util.Collections.emptySet; +import static java.util.Collections.singletonMap; +import static org.opensearch.cluster.ClusterState.builder; +import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService; +import static org.opensearch.test.ClusterServiceUtils.createClusterService; +import static org.opensearch.test.ClusterServiceUtils.setState; + +public class DecommissionServiceTests extends OpenSearchTestCase { + + private ThreadPool threadPool; + private ClusterService clusterService; + private TransportService transportService; + private AllocationService allocationService; + private DecommissionService decommissionService; + private ClusterSettings clusterSettings; + + @Before + public void setUpService() { + threadPool = new TestThreadPool("test", Settings.EMPTY); + clusterService = createClusterService(threadPool); + allocationService = createAllocationService(); + ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); + logger.info("--> adding cluster manager node on zone_1"); + clusterState = addClusterManagerNodes(clusterState, "zone_1", "node1"); + logger.info("--> adding cluster manager node on zone_2"); + clusterState = addClusterManagerNodes(clusterState, "zone_2", "node6"); + logger.info("--> adding cluster manager node on zone_3"); + clusterState = addClusterManagerNodes(clusterState, "zone_3", "node11"); + logger.info("--> adding four data nodes on zone_1"); + clusterState = addDataNodes(clusterState, "zone_1", "node2", "node3", "node4", "node5"); + logger.info("--> adding four data nodes on zone_2"); + clusterState = addDataNodes(clusterState, "zone_2", "node7", "node8", "node9", "node10"); + logger.info("--> adding four data nodes on zone_3"); + clusterState = addDataNodes(clusterState, "zone_3", "node12", "node13", "node14", "node15"); + clusterState = setLocalNodeAsClusterManagerNode(clusterState, "node1"); + clusterState = setNodesInVotingConfig( + clusterState, + clusterState.nodes().get("node1"), + clusterState.nodes().get("node6"), + clusterState.nodes().get("node11") + ); + final ClusterState.Builder builder = builder(clusterState); + setState(clusterService, builder); + final MockTransport transport = new MockTransport(); + transportService = transport.createTransportService( + Settings.EMPTY, + threadPool, + TransportService.NOOP_TRANSPORT_INTERCEPTOR, + boundTransportAddress -> clusterService.state().nodes().get("node1"), + null, + emptySet() + ); + + final Settings.Builder nodeSettingsBuilder = Settings.builder() + .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.getKey(), "zone") + .put("cluster.routing.allocation.awareness.force.zone.values", "zone_1,zone_2,zone_3"); + + clusterSettings = new ClusterSettings(nodeSettingsBuilder.build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + transportService.start(); + transportService.acceptIncomingRequests(); + + this.decommissionService = new DecommissionService( + nodeSettingsBuilder.build(), + clusterSettings, + clusterService, + transportService, + threadPool, + allocationService + ); + } + + @After + public void shutdownThreadPoolAndClusterService() { + clusterService.stop(); + threadPool.shutdown(); + } + + @SuppressWarnings("unchecked") + public void testDecommissioningNotStartedForInvalidAttributeName() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("rack", "rack-a"); + ActionListener listener = new ActionListener() { + @Override + public void onResponse(DecommissionResponse decommissionResponse) { + fail("on response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertTrue(e instanceof DecommissioningFailedException); + assertThat(e.getMessage(), Matchers.endsWith("invalid awareness attribute requested for decommissioning")); + countDownLatch.countDown(); + } + }; + decommissionService.startDecommissionAction(new DecommissionRequest(decommissionAttribute), listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + @SuppressWarnings("unchecked") + public void testDecommissioningNotStartedForInvalidAttributeValue() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "rack-a"); + ActionListener listener = new ActionListener() { + @Override + public void onResponse(DecommissionResponse decommissionResponse) { + fail("on response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertTrue(e instanceof DecommissioningFailedException); + assertThat( + e.getMessage(), + Matchers.endsWith( + "invalid awareness attribute value requested for decommissioning. " + + "Set forced awareness values before to decommission" + ) + ); + countDownLatch.countDown(); + } + }; + decommissionService.startDecommissionAction(new DecommissionRequest(decommissionAttribute), listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + public void testDecommissionNotStartedWithoutWeighingAwayAttribute_1() throws InterruptedException { + Map weights = Map.of("zone_1", 1.0, "zone_2", 1.0, "zone_3", 0.0); + setWeightedRoutingWeights(weights); + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone_1"); + ActionListener listener = new ActionListener() { + @Override + public void onResponse(DecommissionResponse decommissionResponse) { + fail("on response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertTrue(e instanceof DecommissioningFailedException); + assertThat( + e.getMessage(), + Matchers.containsString("weight for decommissioned attribute is expected to be [0.0] but found [1.0]") + ); + countDownLatch.countDown(); + } + }; + decommissionService.startDecommissionAction(new DecommissionRequest(decommissionAttribute), listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + public void testDecommissionNotStartedWithoutWeighingAwayAttribute_2() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone_1"); + ActionListener listener = new ActionListener() { + @Override + public void onResponse(DecommissionResponse decommissionResponse) { + fail("on response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertTrue(e instanceof DecommissioningFailedException); + assertThat( + e.getMessage(), + Matchers.containsString( + "no weights are set to the attribute. Please set appropriate weights before triggering decommission action" + ) + ); + countDownLatch.countDown(); + } + }; + decommissionService.startDecommissionAction(new DecommissionRequest(decommissionAttribute), listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + @SuppressWarnings("unchecked") + public void testDecommissioningFailedWhenAnotherAttributeDecommissioningSuccessful() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionStatus oldStatus = randomFrom(DecommissionStatus.SUCCESSFUL, DecommissionStatus.IN_PROGRESS, DecommissionStatus.INIT); + DecommissionAttributeMetadata oldMetadata = new DecommissionAttributeMetadata( + new DecommissionAttribute("zone", "zone_1"), + oldStatus + ); + final ClusterState.Builder builder = builder(clusterService.state()); + setState( + clusterService, + builder.metadata(Metadata.builder(clusterService.state().metadata()).decommissionAttributeMetadata(oldMetadata).build()) + ); + ActionListener listener = new ActionListener() { + @Override + public void onResponse(DecommissionResponse decommissionResponse) { + fail("on response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertTrue(e instanceof DecommissioningFailedException); + if (oldStatus.equals(DecommissionStatus.SUCCESSFUL)) { + assertThat( + e.getMessage(), + Matchers.endsWith("already successfully decommissioned, recommission before triggering another decommission") + ); + } else { + assertThat(e.getMessage(), Matchers.endsWith("is in progress, cannot process this request")); + } + countDownLatch.countDown(); + } + }; + DecommissionRequest request = new DecommissionRequest(new DecommissionAttribute("zone", "zone_2")); + decommissionService.startDecommissionAction(request, listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + public void testScheduleNodesDecommissionOnTimeout() { + TransportService mockTransportService = Mockito.mock(TransportService.class); + ThreadPool mockThreadPool = Mockito.mock(ThreadPool.class); + Mockito.when(mockTransportService.getLocalNode()).thenReturn(Mockito.mock(DiscoveryNode.class)); + Mockito.when(mockTransportService.getThreadPool()).thenReturn(mockThreadPool); + DecommissionService decommissionService = new DecommissionService( + Settings.EMPTY, + clusterSettings, + clusterService, + mockTransportService, + threadPool, + allocationService + ); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-2"); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + DecommissionStatus.DRAINING + ); + Metadata metadata = Metadata.builder().putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata).build(); + ClusterState state = ClusterState.builder(new ClusterName("test")).metadata(metadata).build(); + + DiscoveryNode decommissionedNode1 = Mockito.mock(DiscoveryNode.class); + DiscoveryNode decommissionedNode2 = Mockito.mock(DiscoveryNode.class); + + setState(clusterService, state); + decommissionService.scheduleNodesDecommissionOnTimeout( + Set.of(decommissionedNode1, decommissionedNode2), + DecommissionRequest.DEFAULT_NODE_DRAINING_TIMEOUT + ); + + Mockito.verify(mockThreadPool).schedule(Mockito.any(Runnable.class), Mockito.any(TimeValue.class), Mockito.anyString()); + } + + public void testDrainNodesWithDecommissionedAttributeWithNoDelay() { + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-2"); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + DecommissionStatus.INIT + ); + + Metadata metadata = Metadata.builder().putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata).build(); + ClusterState state = ClusterState.builder(new ClusterName("test")).metadata(metadata).build(); + + DecommissionRequest request = new DecommissionRequest(decommissionAttribute); + request.setNoDelay(true); + + setState(clusterService, state); + decommissionService.drainNodesWithDecommissionedAttribute(request); + + } + + public void testClearClusterDecommissionState() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-2"); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + DecommissionStatus.SUCCESSFUL + ); + ClusterState state = ClusterState.builder(new ClusterName("test")) + .metadata(Metadata.builder().putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata).build()) + .build(); + + ActionListener listener = new ActionListener<>() { + @Override + public void onResponse(DeleteDecommissionStateResponse decommissionResponse) { + DecommissionAttributeMetadata metadata = clusterService.state().metadata().custom(DecommissionAttributeMetadata.TYPE); + assertNull(metadata); + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("on failure shouldn't have been called"); + countDownLatch.countDown(); + } + }; + + this.decommissionService.deleteDecommissionState(listener); + + // Decommission Attribute should be removed. + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + public void testDeleteDecommissionAttributeClearVotingExclusion() { + TransportService mockTransportService = Mockito.mock(TransportService.class); + Mockito.when(mockTransportService.getLocalNode()).thenReturn(Mockito.mock(DiscoveryNode.class)); + DecommissionService decommissionService = new DecommissionService( + Settings.EMPTY, + clusterSettings, + clusterService, + mockTransportService, + threadPool, + allocationService + ); + decommissionService.startRecommissionAction(Mockito.mock(ActionListener.class)); + + ArgumentCaptor clearVotingConfigExclusionsRequestArgumentCaptor = ArgumentCaptor.forClass( + ClearVotingConfigExclusionsRequest.class + ); + Mockito.verify(mockTransportService) + .sendRequest( + Mockito.any(DiscoveryNode.class), + Mockito.anyString(), + clearVotingConfigExclusionsRequestArgumentCaptor.capture(), + Mockito.any(TransportResponseHandler.class) + ); + + ClearVotingConfigExclusionsRequest request = clearVotingConfigExclusionsRequestArgumentCaptor.getValue(); + assertFalse(request.getWaitForRemoval()); + } + + public void testClusterUpdateTaskForDeletingDecommission() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + ActionListener listener = new ActionListener<>() { + @Override + public void onResponse(DeleteDecommissionStateResponse response) { + assertTrue(response.isAcknowledged()); + assertNull(clusterService.state().metadata().decommissionAttributeMetadata()); + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("On Failure shouldn't have been called"); + countDownLatch.countDown(); + } + }; + decommissionService.deleteDecommissionState(listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + private void setWeightedRoutingWeights(Map weights) { + ClusterState clusterState = clusterService.state(); + WeightedRouting weightedRouting = new WeightedRouting("zone", weights); + WeightedRoutingMetadata weightedRoutingMetadata = new WeightedRoutingMetadata(weightedRouting); + Metadata.Builder metadataBuilder = Metadata.builder(clusterState.metadata()); + metadataBuilder.putCustom(WeightedRoutingMetadata.TYPE, weightedRoutingMetadata); + clusterState = ClusterState.builder(clusterState).metadata(metadataBuilder).build(); + ClusterState.Builder builder = ClusterState.builder(clusterState); + ClusterServiceUtils.setState(clusterService, builder); + } + + private ClusterState addDataNodes(ClusterState clusterState, String zone, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + org.opensearch.common.collect.List.of(nodeIds).forEach(nodeId -> nodeBuilder.add(newDataNode(nodeId, singletonMap("zone", zone)))); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState addClusterManagerNodes(ClusterState clusterState, String zone, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + org.opensearch.common.collect.List.of(nodeIds) + .forEach(nodeId -> nodeBuilder.add(newClusterManagerNode(nodeId, singletonMap("zone", zone)))); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState setLocalNodeAsClusterManagerNode(ClusterState clusterState, String nodeId) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + nodeBuilder.localNodeId(nodeId); + nodeBuilder.clusterManagerNodeId(nodeId); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState setNodesInVotingConfig(ClusterState clusterState, DiscoveryNode... nodes) { + final CoordinationMetadata.VotingConfiguration votingConfiguration = CoordinationMetadata.VotingConfiguration.of(nodes); + + Metadata.Builder builder = Metadata.builder() + .coordinationMetadata( + CoordinationMetadata.builder() + .lastAcceptedConfiguration(votingConfiguration) + .lastCommittedConfiguration(votingConfiguration) + .build() + ); + clusterState = ClusterState.builder(clusterState).metadata(builder).build(); + return clusterState; + } + + private static DiscoveryNode newDataNode(String nodeId, Map attributes) { + return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, DATA_ROLE, Version.CURRENT); + } + + private static DiscoveryNode newClusterManagerNode(String nodeId, Map attributes) { + return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_ROLE, Version.CURRENT); + } + + final private static Set CLUSTER_MANAGER_ROLE = Collections.unmodifiableSet( + new HashSet<>(Collections.singletonList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + ); + + final private static Set DATA_ROLE = Collections.unmodifiableSet( + new HashSet<>(Collections.singletonList(DiscoveryNodeRole.DATA_ROLE)) + ); + + private ClusterState removeNodes(ClusterState clusterState, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.getNodes()); + org.opensearch.common.collect.List.of(nodeIds).forEach(nodeBuilder::remove); + return allocationService.disassociateDeadNodes(ClusterState.builder(clusterState).nodes(nodeBuilder).build(), false, "test"); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java new file mode 100644 index 0000000000000..60b3a03848830 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java @@ -0,0 +1,83 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.metadata; + +import org.opensearch.cluster.ClusterModule; +import org.opensearch.cluster.Diff; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.common.io.stream.NamedWriteableRegistry; +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.common.xcontent.XContentParser; +import org.opensearch.test.AbstractDiffableSerializationTestCase; + +import java.io.IOException; + +public class DecommissionAttributeMetadataSerializationTests extends AbstractDiffableSerializationTestCase { + + @Override + protected Writeable.Reader instanceReader() { + return DecommissionAttributeMetadata::new; + } + + @Override + protected Metadata.Custom createTestInstance() { + String attributeName = randomAlphaOfLength(6); + String attributeValue = randomAlphaOfLength(6); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.values()); + return new DecommissionAttributeMetadata(decommissionAttribute, decommissionStatus); + } + + @Override + protected Metadata.Custom mutateInstance(Metadata.Custom instance) { + return randomValueOtherThan(instance, this::createTestInstance); + } + + @Override + protected Metadata.Custom makeTestChanges(Metadata.Custom testInstance) { + DecommissionAttributeMetadata decommissionAttributeMetadata = (DecommissionAttributeMetadata) testInstance; + DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); + String attributeName = decommissionAttribute.attributeName(); + String attributeValue = decommissionAttribute.attributeValue(); + DecommissionStatus decommissionStatus = decommissionAttributeMetadata.status(); + if (randomBoolean()) { + decommissionStatus = randomFrom(DecommissionStatus.values()); + } + if (randomBoolean()) { + attributeName = randomAlphaOfLength(6); + } + if (randomBoolean()) { + attributeValue = randomAlphaOfLength(6); + } + return new DecommissionAttributeMetadata(new DecommissionAttribute(attributeName, attributeValue), decommissionStatus); + } + + @Override + protected Writeable.Reader> diffReader() { + return DecommissionAttributeMetadata::readDiffFrom; + } + + @Override + protected NamedWriteableRegistry getNamedWriteableRegistry() { + return new NamedWriteableRegistry(ClusterModule.getNamedWriteables()); + } + + @Override + protected Metadata.Custom doParseInstance(XContentParser parser) throws IOException { + assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); + DecommissionAttributeMetadata decommissionAttributeMetadata = DecommissionAttributeMetadata.fromXContent(parser); + assertEquals(XContentParser.Token.END_OBJECT, parser.currentToken()); + return new DecommissionAttributeMetadata( + decommissionAttributeMetadata.decommissionAttribute(), + decommissionAttributeMetadata.status() + ); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java new file mode 100644 index 0000000000000..746d4565b0db3 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.metadata; + +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.common.io.stream.NamedWriteableRegistry; +import org.opensearch.test.AbstractNamedWriteableTestCase; + +import java.io.IOException; +import java.util.Collections; + +public class DecommissionAttributeMetadataTests extends AbstractNamedWriteableTestCase { + @Override + protected DecommissionAttributeMetadata createTestInstance() { + String attributeName = randomAlphaOfLength(6); + String attributeValue = randomAlphaOfLength(6); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.values()); + return new DecommissionAttributeMetadata(decommissionAttribute, decommissionStatus); + } + + @Override + protected DecommissionAttributeMetadata mutateInstance(DecommissionAttributeMetadata instance) throws IOException { + return randomValueOtherThan(instance, this::createTestInstance); + } + + @Override + protected NamedWriteableRegistry getNamedWriteableRegistry() { + return new NamedWriteableRegistry( + Collections.singletonList( + new NamedWriteableRegistry.Entry( + DecommissionAttributeMetadata.class, + DecommissionAttributeMetadata.TYPE, + DecommissionAttributeMetadata::new + ) + ) + ); + } + + @Override + protected Class categoryClass() { + return DecommissionAttributeMetadata.class; + } +} diff --git a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java new file mode 100644 index 0000000000000..030946f4510a1 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.metadata; + +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.common.xcontent.XContentParser; +import org.opensearch.test.AbstractXContentTestCase; + +import java.io.IOException; + +public class DecommissionAttributeMetadataXContentTests extends AbstractXContentTestCase { + @Override + protected DecommissionAttributeMetadata createTestInstance() { + String attributeName = randomAlphaOfLength(6); + String attributeValue = randomAlphaOfLength(6); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.values()); + return new DecommissionAttributeMetadata(decommissionAttribute, decommissionStatus); + } + + @Override + protected DecommissionAttributeMetadata doParseInstance(XContentParser parser) throws IOException { + return DecommissionAttributeMetadata.fromXContent(parser); + } + + @Override + protected boolean supportsUnknownFields() { + return false; + } +} diff --git a/server/src/test/java/org/opensearch/cluster/routing/WeightedRoutingServiceTests.java b/server/src/test/java/org/opensearch/cluster/routing/WeightedRoutingServiceTests.java index fc5d46ef84c79..91b8703cacf5c 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/WeightedRoutingServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/WeightedRoutingServiceTests.java @@ -8,6 +8,7 @@ package org.opensearch.cluster.routing; +import org.hamcrest.MatcherAssert; import org.junit.After; import org.junit.Before; import org.opensearch.Version; @@ -21,6 +22,9 @@ import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionStatus; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.metadata.WeightedRoutingMetadata; import org.opensearch.cluster.node.DiscoveryNode; @@ -43,6 +47,11 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.notNullValue; public class WeightedRoutingServiceTests extends OpenSearchTestCase { private ThreadPool threadPool; @@ -173,6 +182,15 @@ private ClusterState setWeightedRoutingWeights(ClusterState clusterState, Map weights = Map.of("zone_A", 1.0, "zone_B", 1.0, "zone_C", 1.0); ClusterState state = clusterService.state(); @@ -276,4 +294,72 @@ public void testVerifyAwarenessAttribute_ValidAttributeName() { fail("verify awareness attribute should not fail"); } } + + public void testAddWeightedRoutingFailsWhenDecommissionOngoing() throws InterruptedException { + Map weights = Map.of("zone_A", 1.0, "zone_B", 1.0, "zone_C", 1.0); + DecommissionStatus status = randomFrom(DecommissionStatus.INIT, DecommissionStatus.IN_PROGRESS, DecommissionStatus.SUCCESSFUL); + ClusterState state = clusterService.state(); + state = setWeightedRoutingWeights(state, weights); + state = setDecommissionAttribute(state, status); + ClusterState.Builder builder = ClusterState.builder(state); + ClusterServiceUtils.setState(clusterService, builder); + + ClusterPutWeightedRoutingRequestBuilder request = new ClusterPutWeightedRoutingRequestBuilder( + client, + ClusterAddWeightedRoutingAction.INSTANCE + ); + WeightedRouting updatedWeightedRouting = new WeightedRouting("zone", weights); + request.setWeightedRouting(updatedWeightedRouting); + final CountDownLatch countDownLatch = new CountDownLatch(1); + final AtomicReference exceptionReference = new AtomicReference<>(); + ActionListener listener = new ActionListener() { + @Override + public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + exceptionReference.set(e); + countDownLatch.countDown(); + } + }; + weightedRoutingService.registerWeightedRoutingMetadata(request.request(), listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + MatcherAssert.assertThat("Expected onFailure to be called", exceptionReference.get(), notNullValue()); + MatcherAssert.assertThat(exceptionReference.get(), instanceOf(IllegalStateException.class)); + MatcherAssert.assertThat(exceptionReference.get().getMessage(), containsString("a decommission action is ongoing with status")); + } + + public void testAddWeightedRoutingPassesWhenDecommissionFailed() throws InterruptedException { + Map weights = Map.of("zone_A", 1.0, "zone_B", 1.0, "zone_C", 1.0); + DecommissionStatus status = DecommissionStatus.FAILED; + ClusterState state = clusterService.state(); + state = setWeightedRoutingWeights(state, weights); + state = setDecommissionAttribute(state, status); + ClusterState.Builder builder = ClusterState.builder(state); + ClusterServiceUtils.setState(clusterService, builder); + + ClusterPutWeightedRoutingRequestBuilder request = new ClusterPutWeightedRoutingRequestBuilder( + client, + ClusterAddWeightedRoutingAction.INSTANCE + ); + WeightedRouting updatedWeightedRouting = new WeightedRouting("zone", weights); + request.setWeightedRouting(updatedWeightedRouting); + final CountDownLatch countDownLatch = new CountDownLatch(1); + final AtomicReference exceptionReference = new AtomicReference<>(); + ActionListener listener = new ActionListener() { + @Override + public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { + assertTrue(clusterStateUpdateResponse.isAcknowledged()); + assertEquals(updatedWeightedRouting, clusterService.state().metadata().weightedRoutingMetadata().getWeightedRouting()); + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) {} + }; + weightedRoutingService.registerWeightedRoutingMetadata(request.request(), listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } } diff --git a/server/src/test/java/org/opensearch/discovery/PeerFinderTests.java b/server/src/test/java/org/opensearch/discovery/PeerFinderTests.java index 5e7dede0309c6..7e7bb2f0a2911 100644 --- a/server/src/test/java/org/opensearch/discovery/PeerFinderTests.java +++ b/server/src/test/java/org/opensearch/discovery/PeerFinderTests.java @@ -807,6 +807,42 @@ public void testReconnectsToDisconnectedNodes() { assertFoundPeers(rebootedOtherNode); } + public void testConnectionAttemptDuringDecommissioning() { + boolean localNodeCommissioned = randomBoolean(); + peerFinder.onNodeCommissionStatusChange(localNodeCommissioned); + + long findPeersInterval = peerFinder.getFindPeersInterval().millis(); + + final DiscoveryNode otherNode = newDiscoveryNode("node-1"); + providedAddresses.add(otherNode.getAddress()); + transportAddressConnector.addReachableNode(otherNode); + + peerFinder.activate(lastAcceptedNodes); + runAllRunnableTasks(); + assertFoundPeers(otherNode); + + transportAddressConnector.reachableNodes.clear(); + final DiscoveryNode newNode = new DiscoveryNode("new-node", otherNode.getAddress(), Version.CURRENT); + transportAddressConnector.addReachableNode(newNode); + + connectedNodes.remove(otherNode); + disconnectedNodes.add(otherNode); + + // peer discovery will be delayed now + if (localNodeCommissioned == false) { + deterministicTaskQueue.advanceTime(); + runAllRunnableTasks(); + assertPeersNotDiscovered(newNode); + } + + final long expectedTime = CONNECTION_TIMEOUT_MILLIS + findPeersInterval; + while (deterministicTaskQueue.getCurrentTimeMillis() < expectedTime) { + deterministicTaskQueue.advanceTime(); + runAllRunnableTasks(); + } + assertFoundPeers(newNode); + } + private void respondToRequests(Function responseFactory) { final CapturedRequest[] capturedRequests = capturingTransport.getCapturedRequestsAndClear(); for (final CapturedRequest capturedRequest : capturedRequests) { @@ -828,6 +864,16 @@ private void assertFoundPeers(DiscoveryNode... expectedNodesArray) { assertNotifiedOfAllUpdates(); } + private void assertPeersNotDiscovered(DiscoveryNode... undiscoveredNodesArray) { + final Set undiscoveredNodes = Arrays.stream(undiscoveredNodesArray).collect(Collectors.toSet()); + final List actualNodesList = StreamSupport.stream(peerFinder.getFoundPeers().spliterator(), false) + .collect(Collectors.toList()); + final HashSet actualNodesSet = new HashSet<>(actualNodesList); + Set intersection = new HashSet<>(actualNodesSet); + intersection.retainAll(undiscoveredNodes); + assertEquals(intersection.size(), 0); + } + private void assertNotifiedOfAllUpdates() { final Stream actualNodes = StreamSupport.stream(peerFinder.getFoundPeers().spliterator(), false); final Stream notifiedNodes = StreamSupport.stream(foundPeersFromNotification.spliterator(), false); diff --git a/server/src/test/java/org/opensearch/rest/action/admin/cluster/RestDecommissionActionTests.java b/server/src/test/java/org/opensearch/rest/action/admin/cluster/RestDecommissionActionTests.java new file mode 100644 index 0000000000000..b5f61f751b19f --- /dev/null +++ b/server/src/test/java/org/opensearch/rest/action/admin/cluster/RestDecommissionActionTests.java @@ -0,0 +1,96 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.rest.action.admin.cluster; + +import org.junit.Before; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest; +import org.opensearch.rest.RestRequest; +import org.opensearch.test.rest.FakeRestRequest; +import org.opensearch.test.rest.RestActionTestCase; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class RestDecommissionActionTests extends RestActionTestCase { + + private RestDecommissionAction action; + + @Before + public void setupAction() { + action = new RestDecommissionAction(); + controller().registerHandler(action); + } + + public void testCreateRequest() throws IOException { + Map params = new HashMap<>(); + params.put("awareness_attribute_name", "zone"); + params.put("awareness_attribute_value", "zone-1"); + params.put("draining_timeout", "60s"); + + RestRequest deprecatedRequest = buildRestRequest(params); + + DecommissionRequest request = action.createRequest(deprecatedRequest); + assertEquals(request.getDecommissionAttribute().attributeName(), "zone"); + assertEquals(request.getDecommissionAttribute().attributeValue(), "zone-1"); + assertEquals(request.getDelayTimeout().getSeconds(), 120); + assertEquals(deprecatedRequest.getHttpRequest().method(), RestRequest.Method.PUT); + } + + public void testCreateRequestWithDefaultTimeout() throws IOException { + Map params = new HashMap<>(); + params.put("awareness_attribute_name", "zone"); + params.put("awareness_attribute_value", "zone-1"); + + RestRequest deprecatedRequest = buildRestRequest(params); + + DecommissionRequest request = action.createRequest(deprecatedRequest); + assertEquals(request.getDecommissionAttribute().attributeName(), "zone"); + assertEquals(request.getDecommissionAttribute().attributeValue(), "zone-1"); + assertEquals(request.getDelayTimeout().getSeconds(), DecommissionRequest.DEFAULT_NODE_DRAINING_TIMEOUT.getSeconds()); + assertEquals(deprecatedRequest.getHttpRequest().method(), RestRequest.Method.PUT); + } + + public void testCreateRequestWithNoDelay() throws IOException { + Map params = new HashMap<>(); + params.put("awareness_attribute_name", "zone"); + params.put("awareness_attribute_value", "zone-1"); + params.put("no_delay", "true"); + + RestRequest deprecatedRequest = buildRestRequest(params); + + DecommissionRequest request = action.createRequest(deprecatedRequest); + assertEquals(request.getDecommissionAttribute().attributeName(), "zone"); + assertEquals(request.getDecommissionAttribute().attributeValue(), "zone-1"); + assertEquals(request.getDelayTimeout().getSeconds(), 0); + assertEquals(deprecatedRequest.getHttpRequest().method(), RestRequest.Method.PUT); + } + + public void testCreateRequestWithDelayTimeout() throws IOException { + Map params = new HashMap<>(); + params.put("awareness_attribute_name", "zone"); + params.put("awareness_attribute_value", "zone-1"); + params.put("delay_timeout", "300s"); + + RestRequest deprecatedRequest = buildRestRequest(params); + + DecommissionRequest request = action.createRequest(deprecatedRequest); + assertEquals(request.getDecommissionAttribute().attributeName(), "zone"); + assertEquals(request.getDecommissionAttribute().attributeValue(), "zone-1"); + assertEquals(request.getDelayTimeout().getSeconds(), 300); + assertEquals(deprecatedRequest.getHttpRequest().method(), RestRequest.Method.PUT); + } + + private FakeRestRequest buildRestRequest(Map params) { + return new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.PUT) + .withPath("/_cluster/decommission/awareness/{awareness_attribute_name}/{awareness_attribute_value}") + .withParams(params) + .build(); + } +} diff --git a/server/src/test/java/org/opensearch/rest/action/admin/cluster/RestDeleteDecommissionStateActionTests.java b/server/src/test/java/org/opensearch/rest/action/admin/cluster/RestDeleteDecommissionStateActionTests.java new file mode 100644 index 0000000000000..01f988efdf6eb --- /dev/null +++ b/server/src/test/java/org/opensearch/rest/action/admin/cluster/RestDeleteDecommissionStateActionTests.java @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.rest.action.admin.cluster; + +import org.junit.Before; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequest; +import org.opensearch.rest.RestHandler; +import org.opensearch.rest.RestRequest; +import org.opensearch.test.rest.RestActionTestCase; + +import java.util.List; + +public class RestDeleteDecommissionStateActionTests extends RestActionTestCase { + + private RestDeleteDecommissionStateAction action; + + @Before + public void setupAction() { + action = new RestDeleteDecommissionStateAction(); + controller().registerHandler(action); + } + + public void testRoutes() { + List routes = action.routes(); + RestHandler.Route route = routes.get(0); + assertEquals(route.getMethod(), RestRequest.Method.DELETE); + assertEquals("/_cluster/decommission/awareness", route.getPath()); + } + + public void testCreateRequest() { + DeleteDecommissionStateRequest request = action.createRequest(); + assertNotNull(request); + } +}