Skip to content

Commit

Permalink
Merge branch 'main' into fix-bwc-ClusterInfoRequest
Browse files Browse the repository at this point in the history
  • Loading branch information
andrross authored Aug 26, 2022
2 parents d1df779 + c62cecb commit d6d27dd
Show file tree
Hide file tree
Showing 52 changed files with 232 additions and 44 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
- Github workflow for changelog verification ([#4085](https://github.com/opensearch-project/OpenSearch/pull/4085))

### Changed
- Dependency updates (httpcore, mockito, slf4j, httpasyncclient, commons-codec) ([#4308](https://github.com/opensearch-project/OpenSearch/pull/4308))

### Deprecated

Expand All @@ -15,6 +16,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
- `opensearch-service.bat start` and `opensearch-service.bat manager` failing to run ([#4289](https://github.com/opensearch-project/OpenSearch/pull/4289))
- PR reference to checkout code for changelog verifier ([#4296](https://github.com/opensearch-project/OpenSearch/pull/4296))
- Restore using the class ClusterInfoRequest and ClusterInfoRequestBuilder from package 'org.opensearch.action.support.master.info' for subclasses ([#4307](https://github.com/opensearch-project/OpenSearch/pull/4307))
- Do not fail replica shard due to primary closure ([#4133](https://github.com/opensearch-project/OpenSearch/pull/4133))

### Security

Expand Down
10 changes: 5 additions & 5 deletions buildSrc/version.properties
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ snakeyaml = 1.26
icu4j = 70.1
supercsv = 2.4.0
log4j = 2.17.1
slf4j = 1.6.2
slf4j = 1.7.36
asm = 9.3

# when updating the JNA version, also update the version in buildSrc/build.gradle
Expand All @@ -26,10 +26,10 @@ joda = 2.10.13

# client dependencies
httpclient = 4.5.13
httpcore = 4.4.12
httpasyncclient = 4.1.4
httpcore = 4.4.15
httpasyncclient = 4.1.5
commonslogging = 1.2
commonscodec = 1.13
commonscodec = 1.15

# plugin dependencies
aws = 1.12.270
Expand All @@ -42,7 +42,7 @@ bouncycastle=1.70
randomizedrunner = 2.7.1
junit = 4.13.2
hamcrest = 2.1
mockito = 4.6.1
mockito = 4.7.0
objenesis = 3.2
bytebuddy = 1.12.12

Expand Down
1 change: 0 additions & 1 deletion client/rest/licenses/commons-codec-1.13.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions client/rest/licenses/commons-codec-1.15.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
49d94806b6e3dc933dacbd8acb0fdbab8ebd1e5d
1 change: 0 additions & 1 deletion client/rest/licenses/httpasyncclient-4.1.4.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions client/rest/licenses/httpasyncclient-4.1.5.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
cd18227f1eb8e9a263286c1d7362ceb24f6f9b32
1 change: 0 additions & 1 deletion client/rest/licenses/httpcore-4.4.12.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions client/rest/licenses/httpcore-4.4.15.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
7f2e0c573eaa7a74bac2e89b359e1f73d92a0a1d
1 change: 0 additions & 1 deletion client/rest/licenses/httpcore-nio-4.4.12.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions client/rest/licenses/httpcore-nio-4.4.15.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
85d2b6825d42db909a1474f0ffbd6328429b7a32
1 change: 0 additions & 1 deletion client/sniffer/licenses/commons-codec-1.13.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions client/sniffer/licenses/commons-codec-1.15.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
49d94806b6e3dc933dacbd8acb0fdbab8ebd1e5d
1 change: 0 additions & 1 deletion client/sniffer/licenses/httpcore-4.4.12.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions client/sniffer/licenses/httpcore-4.4.15.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
7f2e0c573eaa7a74bac2e89b359e1f73d92a0a1d

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
49d94806b6e3dc933dacbd8acb0fdbab8ebd1e5d

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
49d94806b6e3dc933dacbd8acb0fdbab8ebd1e5d

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
7f2e0c573eaa7a74bac2e89b359e1f73d92a0a1d
1 change: 0 additions & 1 deletion plugins/discovery-ec2/licenses/commons-codec-1.13.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions plugins/discovery-ec2/licenses/commons-codec-1.15.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
49d94806b6e3dc933dacbd8acb0fdbab8ebd1e5d
1 change: 0 additions & 1 deletion plugins/discovery-ec2/licenses/httpcore-4.4.12.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions plugins/discovery-ec2/licenses/httpcore-4.4.15.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
7f2e0c573eaa7a74bac2e89b359e1f73d92a0a1d
1 change: 0 additions & 1 deletion plugins/discovery-gce/licenses/commons-codec-1.13.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions plugins/discovery-gce/licenses/commons-codec-1.15.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
49d94806b6e3dc933dacbd8acb0fdbab8ebd1e5d
1 change: 0 additions & 1 deletion plugins/discovery-gce/licenses/httpcore-4.4.12.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions plugins/discovery-gce/licenses/httpcore-4.4.15.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
7f2e0c573eaa7a74bac2e89b359e1f73d92a0a1d

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
49d94806b6e3dc933dacbd8acb0fdbab8ebd1e5d

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
6c62681a2f655b49963a5983b8b0950a6120ae14
1 change: 0 additions & 1 deletion plugins/repository-azure/licenses/slf4j-api-1.6.2.jar.sha1

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
6c62681a2f655b49963a5983b8b0950a6120ae14

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
49d94806b6e3dc933dacbd8acb0fdbab8ebd1e5d

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
49d94806b6e3dc933dacbd8acb0fdbab8ebd1e5d
1 change: 0 additions & 1 deletion plugins/repository-hdfs/licenses/slf4j-api-1.6.2.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions plugins/repository-hdfs/licenses/slf4j-api-1.7.36.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
6c62681a2f655b49963a5983b8b0950a6120ae14
1 change: 0 additions & 1 deletion plugins/repository-s3/licenses/commons-codec-1.13.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions plugins/repository-s3/licenses/commons-codec-1.15.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
49d94806b6e3dc933dacbd8acb0fdbab8ebd1e5d
1 change: 0 additions & 1 deletion plugins/repository-s3/licenses/httpcore-4.4.12.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions plugins/repository-s3/licenses/httpcore-4.4.15.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
7f2e0c573eaa7a74bac2e89b359e1f73d92a0a1d
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;

import org.hamcrest.MatcherAssert;
import org.opensearch.action.admin.cluster.health.ClusterHealthResponse;
import org.opensearch.action.admin.cluster.node.stats.NodeStats;
import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse;
Expand All @@ -48,6 +49,7 @@
import org.opensearch.action.admin.indices.shards.IndicesShardStoresResponse;
import org.opensearch.action.index.IndexRequestBuilder;
import org.opensearch.action.search.SearchResponse;
import org.opensearch.action.support.replication.TransportReplicationAction;
import org.opensearch.client.Requests;
import org.opensearch.cluster.ClusterState;
import org.opensearch.cluster.health.ClusterHealthStatus;
Expand Down Expand Up @@ -108,6 +110,7 @@
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;

import static org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest.Metric.FS;
import static org.opensearch.common.util.CollectionUtils.iterableAsArrayList;
Expand Down Expand Up @@ -698,6 +701,104 @@ public void testReplicaCorruption() throws Exception {
ensureGreen(TimeValue.timeValueSeconds(60));
}

public void testPrimaryCorruptionDuringReplicationDoesNotFailReplicaShard() throws Exception {
internalCluster().ensureAtLeastNumDataNodes(2);
final NodesStatsResponse nodeStats = client().admin().cluster().prepareNodesStats().get();
final List<NodeStats> dataNodeStats = nodeStats.getNodes()
.stream()
.filter(stat -> stat.getNode().isDataNode())
.collect(Collectors.toUnmodifiableList());
MatcherAssert.assertThat(dataNodeStats.size(), greaterThanOrEqualTo(2));

final NodeStats primaryNode = dataNodeStats.get(0);
final NodeStats replicaNode = dataNodeStats.get(1);
assertAcked(
prepareCreate("test").setSettings(
Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, "0")
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put("index.routing.allocation.include._name", primaryNode.getNode().getName())
.put(EnableAllocationDecider.INDEX_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), EnableAllocationDecider.Rebalance.NONE)
.put("index.allocation.max_retries", Integer.MAX_VALUE) // keep on retrying

)
);
ensureGreen();

// Add custom send behavior between primary and replica that will
// count down a latch to indicate that a replication operation is
// currently in flight, and then block on a second latch that will
// be released once the primary shard has been corrupted.
final CountDownLatch indexingInFlight = new CountDownLatch(1);
final CountDownLatch corruptionHasHappened = new CountDownLatch(1);
final MockTransportService mockTransportService = ((MockTransportService) internalCluster().getInstance(
TransportService.class,
primaryNode.getNode().getName()
));
mockTransportService.addSendBehavior(
internalCluster().getInstance(TransportService.class, replicaNode.getNode().getName()),
(connection, requestId, action, request, options) -> {
if (request instanceof TransportReplicationAction.ConcreteShardRequest) {
indexingInFlight.countDown();
try {
corruptionHasHappened.await();
} catch (InterruptedException e) {
logger.info("Interrupted while waiting for corruption");
}
}
connection.sendRequest(requestId, action, request, options);
}
);

// Configure the modified data node as a replica
final Settings build = Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, "1")
.put("index.routing.allocation.include._name", primaryNode.getNode().getName() + "," + replicaNode.getNode().getName())
.build();
client().admin().indices().prepareUpdateSettings("test").setSettings(build).get();
client().admin().cluster().prepareReroute().get();
ensureGreen();

// Create a snapshot repository. This repo is used to take a snapshot after
// corrupting a file, which causes the node to notice the corrupt data and
// close the shard.
assertAcked(
client().admin()
.cluster()
.preparePutRepository("test-repo")
.setType("fs")
.setSettings(
Settings.builder()
.put("location", randomRepoPath().toAbsolutePath())
.put("compress", randomBoolean())
.put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES)
)
);

client().prepareIndex("test").setSource("field", "value").execute();
indexingInFlight.await();

// Corrupt a file on the primary then take a snapshot. Snapshot should
// finish in the PARTIAL state since the corrupted file will cause a checksum
// validation failure.
final ShardRouting corruptedShardRouting = corruptRandomPrimaryFile();
logger.info("--> {} corrupted", corruptedShardRouting);
final CreateSnapshotResponse createSnapshotResponse = client().admin()
.cluster()
.prepareCreateSnapshot("test-repo", "test-snap")
.setWaitForCompletion(true)
.setIndices("test")
.get();
final SnapshotState snapshotState = createSnapshotResponse.getSnapshotInfo().state();
MatcherAssert.assertThat("Expect file corruption to cause PARTIAL snapshot state", snapshotState, equalTo(SnapshotState.PARTIAL));

// Unblock the blocked indexing thread now that corruption on the primary has been confirmed
corruptionHasHappened.countDown();

// Assert the cluster returns to green status because the replica will be promoted to primary
ensureGreen();
}

private int numShards(String... index) {
ClusterState state = client().admin().cluster().prepareState().get().getState();
GroupShardsIterator shardIterators = state.getRoutingTable().activePrimaryShardsGrouped(index, false);
Expand Down
7 changes: 7 additions & 0 deletions server/src/main/java/org/opensearch/OpenSearchException.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
import static java.util.Collections.singletonMap;
import static java.util.Collections.unmodifiableMap;
import static org.opensearch.Version.V_2_1_0;
import static org.opensearch.Version.V_3_0_0;
import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_UUID_NA_VALUE;
import static org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
import static org.opensearch.common.xcontent.XContentParserUtils.ensureFieldName;
Expand Down Expand Up @@ -1601,6 +1602,12 @@ private enum OpenSearchExceptionHandle {
org.opensearch.indices.replication.common.ReplicationFailedException::new,
161,
V_2_1_0
),
PRIMARY_SHARD_CLOSED_EXCEPTION(
org.opensearch.index.shard.PrimaryShardClosedException.class,
org.opensearch.index.shard.PrimaryShardClosedException::new,
162,
V_3_0_0
);

final Class<? extends OpenSearchException> exceptionClass;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.opensearch.action.support.RetryableAction;
import org.opensearch.common.lease.Releasable;
import org.opensearch.common.util.concurrent.ConcurrentCollections;
import org.opensearch.index.shard.PrimaryShardClosedException;
import org.opensearch.index.shard.IndexShardClosedException;
import org.opensearch.index.shard.ReplicationGroup;
import org.opensearch.index.shard.ShardId;
Expand All @@ -45,6 +46,7 @@
import java.util.Map;
import java.util.Set;
import java.util.function.Consumer;
import java.util.function.Supplier;

/**
* Pending Replication Actions
Expand Down Expand Up @@ -121,23 +123,19 @@ synchronized void acceptNewTrackedAllocationIds(Set<String> trackedAllocationIds
}
}

cancelActions(toCancel, "Replica left ReplicationGroup");
cancelActions(toCancel, () -> new IndexShardClosedException(shardId, "Replica left ReplicationGroup"));
}

@Override
public synchronized void close() {
ArrayList<Set<RetryableAction<?>>> toCancel = new ArrayList<>(onGoingReplicationActions.values());
onGoingReplicationActions.clear();

cancelActions(toCancel, "Primary closed.");
cancelActions(toCancel, () -> new PrimaryShardClosedException(shardId));
}

private void cancelActions(ArrayList<Set<RetryableAction<?>>> toCancel, String message) {
private void cancelActions(ArrayList<Set<RetryableAction<?>>> toCancel, Supplier<IndexShardClosedException> exceptionSupplier) {
threadPool.executor(ThreadPool.Names.GENERIC)
.execute(
() -> toCancel.stream()
.flatMap(Collection::stream)
.forEach(action -> action.cancel(new IndexShardClosedException(shardId, message)))
);
.execute(() -> toCancel.stream().flatMap(Collection::stream).forEach(action -> action.cancel(exceptionSupplier.get())));
}
}
Loading

0 comments on commit d6d27dd

Please sign in to comment.