From c37fc34b2c61f985fdf132eaaf0a4289822f00d8 Mon Sep 17 00:00:00 2001 From: Tanguy Leroux Date: Thu, 17 May 2018 10:57:25 +0200 Subject: [PATCH] [Tests] Add debug information to CorruptedFileIT This test failed but the cause is not obvious. This commit adds more debug logging traces so that if it reproduces we could gather more information. Related #30577 --- .../blobstore/BlobStoreRepository.java | 4 +--- .../index/store/CorruptedFileIT.java | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index fcbc54efbf780..ba4b68b20b246 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -1147,6 +1147,7 @@ public void snapshot(final IndexCommit snapshotIndexCommit) { // TODO apparently we don't use the MetadataSnapshot#.recoveryDiff(...) here but we should final Collection fileNames; try { + logger.trace("[{}] [{}] Loading store metadata using index commit [{}]", shardId, snapshotId, snapshotIndexCommit); metadata = store.getMetadata(snapshotIndexCommit); fileNames = snapshotIndexCommit.getFileNames(); } catch (IOException e) { @@ -1242,9 +1243,6 @@ public void snapshot(final IndexCommit snapshotIndexCommit) { /** * Snapshot individual file - *

- * This is asynchronous method. Upon completion of the operation latch is getting counted down and any failures are - * added to the {@code failures} list * * @param fileInfo file to be snapshotted */ diff --git a/server/src/test/java/org/elasticsearch/index/store/CorruptedFileIT.java b/server/src/test/java/org/elasticsearch/index/store/CorruptedFileIT.java index bbfa56a0e55fe..59156c91558c7 100644 --- a/server/src/test/java/org/elasticsearch/index/store/CorruptedFileIT.java +++ b/server/src/test/java/org/elasticsearch/index/store/CorruptedFileIT.java @@ -470,7 +470,7 @@ protected void sendRequest(Connection connection, long requestId, String action, * TODO once checksum verification on snapshotting is implemented this test needs to be fixed or split into several * parts... We should also corrupt files on the actual snapshot and check that we don't restore the corrupted shard. */ - @TestLogging("org.elasticsearch.monitor.fs:DEBUG") + @TestLogging("org.elasticsearch.repositories:TRACE,org.elasticsearch.snapshots:TRACE") public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, InterruptedException, IOException { int numDocs = scaledRandomIntBetween(100, 1000); internalCluster().ensureAtLeastNumDataNodes(2); @@ -494,6 +494,7 @@ public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, I assertHitCount(countResponse, numDocs); ShardRouting shardRouting = corruptRandomPrimaryFile(false); + logger.info("--> shard {} has a corrupted file", shardRouting); // we don't corrupt segments.gen since S/R doesn't snapshot this file // the other problem here why we can't corrupt segments.X files is that the snapshot flushes again before // it snapshots and that will write a new segments.X+1 file @@ -504,9 +505,12 @@ public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, I .put("compress", randomBoolean()) .put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES))); logger.info("--> snapshot"); - CreateSnapshotResponse createSnapshotResponse = client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(true).setIndices("test").get(); - assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.PARTIAL)); - logger.info("failed during snapshot -- maybe SI file got corrupted"); + final CreateSnapshotResponse createSnapshotResponse = client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap") + .setWaitForCompletion(true) + .setIndices("test") + .get(); + final SnapshotState snapshotState = createSnapshotResponse.getSnapshotInfo().state(); + logger.info("--> snapshot terminated with state " + snapshotState); final List files = listShardFiles(shardRouting); Path corruptedFile = null; for (Path file : files) { @@ -515,6 +519,11 @@ public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, I break; } } + if (snapshotState != SnapshotState.PARTIAL) { + logger.info("--> listing shard files for investigation"); + files.forEach(f -> logger.info("path: {}", f.toAbsolutePath())); + } + assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.PARTIAL)); assertThat(corruptedFile, notNullValue()); }