From b963267f4dbb3c78134c0b821c00fddec4ab69ff Mon Sep 17 00:00:00 2001 From: Nikolaj Volgushev Date: Thu, 7 Nov 2024 13:37:07 +0100 Subject: [PATCH] Fix race conditions in file settings service tests (#116309) This test resolves two race conditions in `FileSettingsServiceTests#testProcessFileChanges`: 1. The test used `writeTestFile` to update the settings.json file. It did so in multiple steps: first it created a temp file in the operator directory, then moved that file to replace the existing settings.json file. The first step (creating the temp file) triggered the watcher thread in the file settings service to access the settings.json file to check for changes. When this access happened concurrently with the `move` call inside `writeTestFile` the test would throw on a Windows file-system (mocked or real), since you can't move a file while it's open. To fix this, the PR changes `writeTestFile` to creating a temp file elsewhere and simplifies the method. Instead of relying on this method (and multiple file operations) to update the file, the PR instead simply "touches" the settings file with a timestamp update to trigger file processing (more details also in this [comment](https://github.com/elastic/elasticsearch/issues/115280#issuecomment-2457402953)). 2. The test awaited latches that would count down when `ReservedClusterStateService#process` was invoked. However, at this point in the file settings processing flow, the settings.json file is still open and would therefore likewise block subsequent writes that fall into the small window of the file still being open. This PR instead adds latches based on file-changed listeners which are reliably invoked _after_ the file is closed. Resolves: https://github.com/elastic/elasticsearch/issues/115280 --- muted-tests.yml | 3 - .../service/FileSettingsServiceTests.java | 85 ++++++------------- 2 files changed, 27 insertions(+), 61 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 5194b05eac258..7f5c49233e5ad 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -246,9 +246,6 @@ tests: - class: org.elasticsearch.search.basic.SearchWhileRelocatingIT method: testSearchAndRelocateConcurrentlyRandomReplicas issue: https://github.com/elastic/elasticsearch/issues/116145 -- class: org.elasticsearch.reservedstate.service.FileSettingsServiceTests - method: testProcessFileChanges - issue: https://github.com/elastic/elasticsearch/issues/115280 - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=ml/filter_crud/Test update filter} issue: https://github.com/elastic/elasticsearch/issues/116271 diff --git a/server/src/test/java/org/elasticsearch/reservedstate/service/FileSettingsServiceTests.java b/server/src/test/java/org/elasticsearch/reservedstate/service/FileSettingsServiceTests.java index 3622285478df2..aa76245c20679 100644 --- a/server/src/test/java/org/elasticsearch/reservedstate/service/FileSettingsServiceTests.java +++ b/server/src/test/java/org/elasticsearch/reservedstate/service/FileSettingsServiceTests.java @@ -29,8 +29,6 @@ import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.core.IOUtils; -import org.elasticsearch.core.Strings; import org.elasticsearch.core.TimeValue; import org.elasticsearch.env.BuildVersion; import org.elasticsearch.env.Environment; @@ -46,10 +44,14 @@ import org.mockito.stubbing.Answer; import java.io.IOException; -import java.io.UncheckedIOException; import java.nio.file.AtomicMoveNotSupportedException; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.attribute.FileTime; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.ZoneOffset; import java.util.List; import java.util.Map; import java.util.Set; @@ -59,7 +61,6 @@ import java.util.function.Consumer; import static java.nio.file.StandardCopyOption.ATOMIC_MOVE; -import static java.nio.file.StandardCopyOption.REPLACE_EXISTING; import static org.elasticsearch.node.Node.NODE_NAME_SETTING; import static org.hamcrest.Matchers.anEmptyMap; import static org.hamcrest.Matchers.hasEntry; @@ -210,24 +211,17 @@ public void testInitialFileWorks() throws Exception { return null; }).when(controller).process(any(), any(XContentParser.class), any(), any()); - CountDownLatch fileProcessingLatch = new CountDownLatch(1); + CountDownLatch processFileLatch = new CountDownLatch(1); + fileSettingsService.addFileChangedListener(processFileLatch::countDown); Files.createDirectories(fileSettingsService.watchedFileDir()); // contents of the JSON don't matter, we just need a file to exist writeTestFile(fileSettingsService.watchedFile(), "{}"); - doAnswer((Answer) invocation -> { - try { - return invocation.callRealMethod(); - } finally { - fileProcessingLatch.countDown(); - } - }).when(fileSettingsService).processFileOnServiceStart(); - fileSettingsService.start(); fileSettingsService.clusterChanged(new ClusterChangedEvent("test", clusterService.state(), ClusterState.EMPTY_STATE)); - longAwait(fileProcessingLatch); + longAwait(processFileLatch); verify(fileSettingsService, times(1)).processFileOnServiceStart(); verify(controller, times(1)).process(any(), any(XContentParser.class), eq(ReservedStateVersionCheck.HIGHER_OR_SAME_VERSION), any()); @@ -240,23 +234,8 @@ public void testProcessFileChanges() throws Exception { return null; }).when(controller).process(any(), any(XContentParser.class), any(), any()); - CountDownLatch changesOnStartLatch = new CountDownLatch(1); - doAnswer((Answer) invocation -> { - try { - return invocation.callRealMethod(); - } finally { - changesOnStartLatch.countDown(); - } - }).when(fileSettingsService).processFileOnServiceStart(); - - CountDownLatch changesLatch = new CountDownLatch(1); - doAnswer((Answer) invocation -> { - try { - return invocation.callRealMethod(); - } finally { - changesLatch.countDown(); - } - }).when(fileSettingsService).processFileChanges(); + CountDownLatch processFileCreationLatch = new CountDownLatch(1); + fileSettingsService.addFileChangedListener(processFileCreationLatch::countDown); Files.createDirectories(fileSettingsService.watchedFileDir()); // contents of the JSON don't matter, we just need a file to exist @@ -265,14 +244,19 @@ public void testProcessFileChanges() throws Exception { fileSettingsService.start(); fileSettingsService.clusterChanged(new ClusterChangedEvent("test", clusterService.state(), ClusterState.EMPTY_STATE)); - longAwait(changesOnStartLatch); + longAwait(processFileCreationLatch); + + CountDownLatch processFileChangeLatch = new CountDownLatch(1); + fileSettingsService.addFileChangedListener(processFileChangeLatch::countDown); verify(fileSettingsService, times(1)).processFileOnServiceStart(); verify(controller, times(1)).process(any(), any(XContentParser.class), eq(ReservedStateVersionCheck.HIGHER_OR_SAME_VERSION), any()); - // second file change; contents still don't matter - writeTestFile(fileSettingsService.watchedFile(), "[]"); - longAwait(changesLatch); + // Touch the file to get an update + Instant now = LocalDateTime.now(ZoneId.systemDefault()).toInstant(ZoneOffset.ofHours(0)); + Files.setLastModifiedTime(fileSettingsService.watchedFile(), FileTime.from(now)); + + longAwait(processFileChangeLatch); verify(fileSettingsService, times(1)).processFileChanges(); verify(controller, times(1)).process(any(), any(XContentParser.class), eq(ReservedStateVersionCheck.HIGHER_VERSION_ONLY), any()); @@ -367,30 +351,15 @@ public void testHandleSnapshotRestoreResetsMetadata() throws Exception { } // helpers - private static void writeTestFile(Path path, String contents) { - Path tempFile = null; + private static void writeTestFile(Path path, String contents) throws IOException { + logger.info("Writing settings file under [{}]", path.toAbsolutePath()); + Path tempFilePath = createTempFile(); + Files.writeString(tempFilePath, contents); try { - logger.info("Creating settings temp file under [{}]", path.toAbsolutePath()); - tempFile = Files.createTempFile(path.getParent(), path.getFileName().toString(), "tmp"); - logger.info("Created settings temp file [{}]", tempFile.getFileName()); - Files.writeString(tempFile, contents); - - try { - logger.info("Moving settings temp file to replace [{}]", tempFile.getFileName()); - Files.move(tempFile, path, REPLACE_EXISTING, ATOMIC_MOVE); - } catch (AtomicMoveNotSupportedException e) { - logger.info( - "Atomic move was not available. Falling back on non-atomic move for settings temp file to replace [{}]", - tempFile.getFileName() - ); - Files.move(tempFile, path, REPLACE_EXISTING); - } - } catch (final IOException e) { - throw new UncheckedIOException(Strings.format("could not write file [%s]", path.toAbsolutePath()), e); - } finally { - logger.info("Deleting settings temp file [{}]", tempFile != null ? tempFile.getFileName() : null); - // we are ignoring exceptions here, so we do not need handle whether or not tempFile was initialized nor if the file exists - IOUtils.deleteFilesIgnoringExceptions(tempFile); + Files.move(tempFilePath, path, ATOMIC_MOVE); + } catch (AtomicMoveNotSupportedException e) { + logger.info("Atomic move not available. Falling back on non-atomic move to write [{}]", path.toAbsolutePath()); + Files.move(tempFilePath, path); } }