Skip to content

Commit

Permalink
Fix race conditions in file settings service tests (#116309)
Browse files Browse the repository at this point in the history
This test resolves two race conditions in
`FileSettingsServiceTests#testProcessFileChanges`:

1. The test used `writeTestFile` to update the settings.json file. It did so in multiple steps: first it created a temp file in the operator directory, then moved that file to replace the existing settings.json file. The first step (creating the temp file) triggered the watcher thread in the file settings service to access the settings.json file to check for changes. When this access happened concurrently with the `move` call inside `writeTestFile` the test would throw on a Windows file-system (mocked or real), since you can't move a file while it's open. To fix this, the PR changes `writeTestFile` to creating a temp file elsewhere and simplifies the method. Instead of relying on this method (and multiple file operations) to update the file, the PR instead simply "touches" the settings file with a timestamp update to trigger file processing (more details also in this [comment](#115280 (comment))). 
2. The test awaited latches that would count down when `ReservedClusterStateService#process` was invoked. However, at this point in the file settings processing flow, the settings.json file is still open and would therefore likewise block subsequent writes that fall into the small window of the file still being open. This PR instead adds latches based on file-changed listeners which are reliably invoked _after_ the file is closed.  

Resolves: #115280
  • Loading branch information
n1v0lg authored and jozala committed Nov 13, 2024
1 parent afe67f3 commit 60cd7ae
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 61 deletions.
3 changes: 0 additions & 3 deletions muted-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,6 @@ tests:
- class: org.elasticsearch.search.basic.SearchWhileRelocatingIT
method: testSearchAndRelocateConcurrentlyRandomReplicas
issue: https://github.com/elastic/elasticsearch/issues/116145
- class: org.elasticsearch.reservedstate.service.FileSettingsServiceTests
method: testProcessFileChanges
issue: https://github.com/elastic/elasticsearch/issues/115280
- class: org.elasticsearch.xpack.test.rest.XPackRestIT
method: test {p0=ml/filter_crud/Test update filter}
issue: https://github.com/elastic/elasticsearch/issues/116271
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.core.Strings;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.env.BuildVersion;
import org.elasticsearch.env.Environment;
Expand All @@ -46,10 +44,14 @@
import org.mockito.stubbing.Answer;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.AtomicMoveNotSupportedException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.FileTime;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand All @@ -59,7 +61,6 @@
import java.util.function.Consumer;

import static java.nio.file.StandardCopyOption.ATOMIC_MOVE;
import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
import static org.elasticsearch.node.Node.NODE_NAME_SETTING;
import static org.hamcrest.Matchers.anEmptyMap;
import static org.hamcrest.Matchers.hasEntry;
Expand Down Expand Up @@ -210,24 +211,17 @@ public void testInitialFileWorks() throws Exception {
return null;
}).when(controller).process(any(), any(XContentParser.class), any(), any());

CountDownLatch fileProcessingLatch = new CountDownLatch(1);
CountDownLatch processFileLatch = new CountDownLatch(1);
fileSettingsService.addFileChangedListener(processFileLatch::countDown);

Files.createDirectories(fileSettingsService.watchedFileDir());
// contents of the JSON don't matter, we just need a file to exist
writeTestFile(fileSettingsService.watchedFile(), "{}");

doAnswer((Answer<?>) invocation -> {
try {
return invocation.callRealMethod();
} finally {
fileProcessingLatch.countDown();
}
}).when(fileSettingsService).processFileOnServiceStart();

fileSettingsService.start();
fileSettingsService.clusterChanged(new ClusterChangedEvent("test", clusterService.state(), ClusterState.EMPTY_STATE));

longAwait(fileProcessingLatch);
longAwait(processFileLatch);

verify(fileSettingsService, times(1)).processFileOnServiceStart();
verify(controller, times(1)).process(any(), any(XContentParser.class), eq(ReservedStateVersionCheck.HIGHER_OR_SAME_VERSION), any());
Expand All @@ -240,23 +234,8 @@ public void testProcessFileChanges() throws Exception {
return null;
}).when(controller).process(any(), any(XContentParser.class), any(), any());

CountDownLatch changesOnStartLatch = new CountDownLatch(1);
doAnswer((Answer<?>) invocation -> {
try {
return invocation.callRealMethod();
} finally {
changesOnStartLatch.countDown();
}
}).when(fileSettingsService).processFileOnServiceStart();

CountDownLatch changesLatch = new CountDownLatch(1);
doAnswer((Answer<?>) invocation -> {
try {
return invocation.callRealMethod();
} finally {
changesLatch.countDown();
}
}).when(fileSettingsService).processFileChanges();
CountDownLatch processFileCreationLatch = new CountDownLatch(1);
fileSettingsService.addFileChangedListener(processFileCreationLatch::countDown);

Files.createDirectories(fileSettingsService.watchedFileDir());
// contents of the JSON don't matter, we just need a file to exist
Expand All @@ -265,14 +244,19 @@ public void testProcessFileChanges() throws Exception {
fileSettingsService.start();
fileSettingsService.clusterChanged(new ClusterChangedEvent("test", clusterService.state(), ClusterState.EMPTY_STATE));

longAwait(changesOnStartLatch);
longAwait(processFileCreationLatch);

CountDownLatch processFileChangeLatch = new CountDownLatch(1);
fileSettingsService.addFileChangedListener(processFileChangeLatch::countDown);

verify(fileSettingsService, times(1)).processFileOnServiceStart();
verify(controller, times(1)).process(any(), any(XContentParser.class), eq(ReservedStateVersionCheck.HIGHER_OR_SAME_VERSION), any());

// second file change; contents still don't matter
writeTestFile(fileSettingsService.watchedFile(), "[]");
longAwait(changesLatch);
// Touch the file to get an update
Instant now = LocalDateTime.now(ZoneId.systemDefault()).toInstant(ZoneOffset.ofHours(0));
Files.setLastModifiedTime(fileSettingsService.watchedFile(), FileTime.from(now));

longAwait(processFileChangeLatch);

verify(fileSettingsService, times(1)).processFileChanges();
verify(controller, times(1)).process(any(), any(XContentParser.class), eq(ReservedStateVersionCheck.HIGHER_VERSION_ONLY), any());
Expand Down Expand Up @@ -367,30 +351,15 @@ public void testHandleSnapshotRestoreResetsMetadata() throws Exception {
}

// helpers
private static void writeTestFile(Path path, String contents) {
Path tempFile = null;
private static void writeTestFile(Path path, String contents) throws IOException {
logger.info("Writing settings file under [{}]", path.toAbsolutePath());
Path tempFilePath = createTempFile();
Files.writeString(tempFilePath, contents);
try {
logger.info("Creating settings temp file under [{}]", path.toAbsolutePath());
tempFile = Files.createTempFile(path.getParent(), path.getFileName().toString(), "tmp");
logger.info("Created settings temp file [{}]", tempFile.getFileName());
Files.writeString(tempFile, contents);

try {
logger.info("Moving settings temp file to replace [{}]", tempFile.getFileName());
Files.move(tempFile, path, REPLACE_EXISTING, ATOMIC_MOVE);
} catch (AtomicMoveNotSupportedException e) {
logger.info(
"Atomic move was not available. Falling back on non-atomic move for settings temp file to replace [{}]",
tempFile.getFileName()
);
Files.move(tempFile, path, REPLACE_EXISTING);
}
} catch (final IOException e) {
throw new UncheckedIOException(Strings.format("could not write file [%s]", path.toAbsolutePath()), e);
} finally {
logger.info("Deleting settings temp file [{}]", tempFile != null ? tempFile.getFileName() : null);
// we are ignoring exceptions here, so we do not need handle whether or not tempFile was initialized nor if the file exists
IOUtils.deleteFilesIgnoringExceptions(tempFile);
Files.move(tempFilePath, path, ATOMIC_MOVE);
} catch (AtomicMoveNotSupportedException e) {
logger.info("Atomic move not available. Falling back on non-atomic move to write [{}]", path.toAbsolutePath());
Files.move(tempFilePath, path);
}
}

Expand Down

0 comments on commit 60cd7ae

Please sign in to comment.