Skip to content

Commit

Permalink
Revert "Write splitting-bai files when writing (non-sharded) BAM file…
Browse files Browse the repository at this point in the history
…s from Spark. (#2169)"

This reverts commit a30af5a.
  • Loading branch information
tomwhite committed Dec 9, 2016
1 parent 27d1379 commit 3381f1c
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ public static void writeReads(
String absoluteReferenceFile = referenceFile != null ?
BucketUtils.makeFilePathAbsolute(referenceFile) :
referenceFile;
setHadoopBAMConfigurationProperties(ctx, absoluteOutputFile, absoluteReferenceFile, format);
setHadoopBAMConfigurationProperties(ctx, absoluteOutputFile, absoluteReferenceFile);

// The underlying reads are required to be in SAMRecord format in order to be
// written out, so we convert them to SAMRecord explicitly here. If they're already
Expand Down Expand Up @@ -380,17 +380,11 @@ static FileStatus[] getBamFragments( final Path directory, final FileSystem fs )
* from passing a stale value through to htsjdk when multiple calls are made serially
* with different outputs but the same Spark context
*/
private static void setHadoopBAMConfigurationProperties(final JavaSparkContext ctx, final String outputName,
final String referenceName, final ReadsWriteFormat format) {
private static void setHadoopBAMConfigurationProperties(final JavaSparkContext ctx, final String outputName, final String referenceName) {
final Configuration conf = ctx.hadoopConfiguration();

if (!IOUtils.isCramFileName(outputName)) { // only set the reference for CRAM output
conf.unset(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY);
if (format == ReadsWriteFormat.SINGLE && IOUtils.isBamFileName(outputName)) {
conf.setBoolean(BAMOutputFormat.WRITE_SPLITTING_BAI, true);
} else {
conf.setBoolean(BAMOutputFormat.WRITE_SPLITTING_BAI, false);
}
}
else {
if (null == referenceName) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@
import org.broadinstitute.hellbender.engine.spark.SparkContextFactory;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadCoordinateComparator;
import org.broadinstitute.hellbender.utils.read.ReadsWriteFormat;
import org.broadinstitute.hellbender.utils.test.BaseTest;
import org.broadinstitute.hellbender.utils.test.MiniClusterUtils;
import org.seqdoop.hadoop_bam.SplittingBAMIndexer;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
Expand All @@ -29,7 +27,6 @@

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
Expand Down Expand Up @@ -125,11 +122,6 @@ private void assertSingleShardedWritingWorks(String inputBam, String referenceFi

ReadsSparkSink.writeReads(ctx, outputPath, referenceFile, rddParallelReads, header, ReadsWriteFormat.SINGLE);

// check that a splitting bai file is created
if (IOUtils.isBamFileName(outputPath)) {
Assert.assertTrue(Files.exists(IOUtils.getPath(outputPath + SplittingBAMIndexer.OUTPUT_FILE_EXTENSION)));
}

JavaRDD<GATKRead> rddParallelReads2 = readSource.getParallelReads(outputPath, referenceFile);
final List<GATKRead> writtenReads = rddParallelReads2.collect();

Expand Down

0 comments on commit 3381f1c

Please sign in to comment.