Skip to content

Commit

Permalink
Fix flaky DecommissionControllerTests.testTimesOut (#4683)
Browse files Browse the repository at this point in the history
This test fails pretty reliably if I run it on repeat. I believe the
problem is that the test assumes the function will take longer than 2ms,
which is likely not a valid assumption in all cases. Fortunately, I can
pass in a zero duration which is guaranteed to timeout even if the
system clock does not advance at all.

Also moved the assertions out of the callback into the main test method,
otherwise the assertion error messages would get buried and the test
report would just show a timeout error.

Signed-off-by: Andrew Ross <andrross@amazon.com>

Signed-off-by: Andrew Ross <andrross@amazon.com>
  • Loading branch information
andrross authored Oct 5, 2022
1 parent ada2467 commit e189179
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
- [Bug]: Fixed invalid location of JDK dependency for arm64 architecture([#4613](https://github.com/opensearch-project/OpenSearch/pull/4613))
- [Bug]: Alias filter lost after rollover ([#4499](https://github.com/opensearch-project/OpenSearch/pull/4499))
- Attempt to fix Github workflow for Gradle Check job ([#4679](https://github.com/opensearch-project/OpenSearch/pull/4679))
- Fix flaky DecommissionControllerTests.testTimesOut ([4683](https://github.com/opensearch-project/OpenSearch/pull/4683))

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.opensearch.cluster.decommission;

import org.hamcrest.MatcherAssert;
import org.junit.After;
import org.junit.Before;
import org.opensearch.OpenSearchTimeoutException;
Expand Down Expand Up @@ -45,6 +46,7 @@
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

Expand All @@ -53,6 +55,7 @@
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.notNullValue;
import static org.hamcrest.Matchers.sameInstance;
import static org.opensearch.cluster.ClusterState.builder;
import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService;
Expand Down Expand Up @@ -213,24 +216,25 @@ public void testTimesOut() throws InterruptedException {
nodesToBeRemoved.add(clusterService.state().nodes().get("node13"));
nodesToBeRemoved.add(clusterService.state().nodes().get("node14"));
nodesToBeRemoved.add(clusterService.state().nodes().get("node15"));
final AtomicReference<Exception> exceptionReference = new AtomicReference<>();
decommissionController.removeDecommissionedNodes(
nodesToBeRemoved,
"unit-test-timeout",
TimeValue.timeValueMillis(2),
new ActionListener<Void>() {
TimeValue.timeValueMillis(0),
new ActionListener<>() {
@Override
public void onResponse(Void unused) {
fail("response shouldn't have been called");
}
public void onResponse(Void unused) {}

@Override
public void onFailure(Exception e) {
assertThat(e, instanceOf(OpenSearchTimeoutException.class));
assertThat(e.getMessage(), containsString("waiting for removal of decommissioned nodes"));
exceptionReference.set(e);
countDownLatch.countDown();
}
}
);
MatcherAssert.assertThat("Expected onFailure to be called", exceptionReference.get(), notNullValue());
MatcherAssert.assertThat(exceptionReference.get(), instanceOf(OpenSearchTimeoutException.class));
MatcherAssert.assertThat(exceptionReference.get().getMessage(), containsString("waiting for removal of decommissioned nodes"));
assertTrue(countDownLatch.await(30, TimeUnit.SECONDS));
}

Expand Down

0 comments on commit e189179

Please sign in to comment.