Skip to content

Commit

Permalink
[Tests] Add debug information to CorruptedFileIT
Browse files Browse the repository at this point in the history
This test failed but the cause is not obvious. This commit adds more
debug logging traces so that if it reproduces we could gather more
information.

Related elastic#30577
  • Loading branch information
tlrx authored and ywelsch committed May 23, 2018
1 parent 3a6f73b commit c37fc34
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1147,6 +1147,7 @@ public void snapshot(final IndexCommit snapshotIndexCommit) {
// TODO apparently we don't use the MetadataSnapshot#.recoveryDiff(...) here but we should
final Collection<String> fileNames;
try {
logger.trace("[{}] [{}] Loading store metadata using index commit [{}]", shardId, snapshotId, snapshotIndexCommit);
metadata = store.getMetadata(snapshotIndexCommit);
fileNames = snapshotIndexCommit.getFileNames();
} catch (IOException e) {
Expand Down Expand Up @@ -1242,9 +1243,6 @@ public void snapshot(final IndexCommit snapshotIndexCommit) {

/**
* Snapshot individual file
* <p>
* This is asynchronous method. Upon completion of the operation latch is getting counted down and any failures are
* added to the {@code failures} list
*
* @param fileInfo file to be snapshotted
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ protected void sendRequest(Connection connection, long requestId, String action,
* TODO once checksum verification on snapshotting is implemented this test needs to be fixed or split into several
* parts... We should also corrupt files on the actual snapshot and check that we don't restore the corrupted shard.
*/
@TestLogging("org.elasticsearch.monitor.fs:DEBUG")
@TestLogging("org.elasticsearch.repositories:TRACE,org.elasticsearch.snapshots:TRACE")
public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, InterruptedException, IOException {
int numDocs = scaledRandomIntBetween(100, 1000);
internalCluster().ensureAtLeastNumDataNodes(2);
Expand All @@ -494,6 +494,7 @@ public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, I
assertHitCount(countResponse, numDocs);

ShardRouting shardRouting = corruptRandomPrimaryFile(false);
logger.info("--> shard {} has a corrupted file", shardRouting);
// we don't corrupt segments.gen since S/R doesn't snapshot this file
// the other problem here why we can't corrupt segments.X files is that the snapshot flushes again before
// it snapshots and that will write a new segments.X+1 file
Expand All @@ -504,9 +505,12 @@ public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, I
.put("compress", randomBoolean())
.put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES)));
logger.info("--> snapshot");
CreateSnapshotResponse createSnapshotResponse = client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(true).setIndices("test").get();
assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.PARTIAL));
logger.info("failed during snapshot -- maybe SI file got corrupted");
final CreateSnapshotResponse createSnapshotResponse = client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap")
.setWaitForCompletion(true)
.setIndices("test")
.get();
final SnapshotState snapshotState = createSnapshotResponse.getSnapshotInfo().state();
logger.info("--> snapshot terminated with state " + snapshotState);
final List<Path> files = listShardFiles(shardRouting);
Path corruptedFile = null;
for (Path file : files) {
Expand All @@ -515,6 +519,11 @@ public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, I
break;
}
}
if (snapshotState != SnapshotState.PARTIAL) {
logger.info("--> listing shard files for investigation");
files.forEach(f -> logger.info("path: {}", f.toAbsolutePath()));
}
assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.PARTIAL));
assertThat(corruptedFile, notNullValue());
}

Expand Down

0 comments on commit c37fc34

Please sign in to comment.