Skip to content

Commit

Permalink
Support loading of CSI from URLs/streams. #1507 (#1595)
Browse files Browse the repository at this point in the history
* Support loading of CSI from URLs/streams.
* fixes #1507
  • Loading branch information
cmnbroad authored Apr 26, 2022
1 parent 22aec67 commit 1449dec
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 17 deletions.
40 changes: 27 additions & 13 deletions src/main/java/htsjdk/samtools/BAMFileReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -407,20 +407,29 @@ public boolean hasIndex() {
*/
@Override
public BAMIndex getIndex() {
if(!hasIndex())
if(!hasIndex()) {
throw new SAMException("No index is available for this BAM file.");
}
if(mIndex == null) {
SamIndexes samIndex = getIndexType();
if (samIndex == null) {
mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexStream, getFileHeader().getSequenceDictionary())
: new DiskBasedBAMFileIndex(mIndexStream, getFileHeader().getSequenceDictionary());
} else if (samIndex.equals(SamIndexes.BAI)) {
mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping)
: new DiskBasedBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping);
} else if (samIndex.equals(SamIndexes.CSI)) {
mIndex = new CSIIndex(mIndexFile, mEnableIndexMemoryMapping, getFileHeader().getSequenceDictionary());
} else {
throw new SAMFormatException("Unsupported BAM index file: " + mIndexFile.getName());
final SamIndexes samIndexType = getIndexType();
final SAMSequenceDictionary sequenceDictionary = getFileHeader().getSequenceDictionary();
if(mIndexFile != null) {
if (samIndexType.equals(SamIndexes.BAI)) {
mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexFile, sequenceDictionary, mEnableIndexMemoryMapping)
: new DiskBasedBAMFileIndex(mIndexFile, sequenceDictionary, mEnableIndexMemoryMapping);
} else if (samIndexType.equals(SamIndexes.CSI)) {
mIndex = new CSIIndex(mIndexFile, mEnableIndexMemoryMapping, sequenceDictionary);
} else {
throw new SAMFormatException("Unsupported BAM index file format: " + mIndexFile.getName());
}
} else if(mIndexStream != null) {
if (samIndexType.equals(SamIndexes.BAI)) {
mIndex = new CachingBAMFileIndex(mIndexStream, sequenceDictionary);
} else if (samIndexType.equals(SamIndexes.CSI)) {
mIndex = new CSIIndex(mIndexStream, sequenceDictionary);
} else {
throw new SAMFormatException("Unsupported BAM index file format: " + mIndexStream.getSource());
}
}
}

Expand All @@ -438,8 +447,13 @@ public SamIndexes getIndexType() {
} else if (mIndexFile.getName().toLowerCase().endsWith(FileExtensions.CSI)) {
return SamIndexes.CSI;
}

throw new SAMFormatException("Unknown BAM index file type: " + mIndexFile.getName());
} else if (mIndexStream != null) {
final SamIndexes samIndexesType = SamIndexes.getSAMIndexTypeFromStream(mIndexStream);
if (samIndexesType == SamIndexes.BAI || samIndexesType == SamIndexes.CSI) {
return samIndexesType;
}
throw new SAMFormatException(String.format("Unknown BAM index file type: %s in %s", samIndexesType, mIndexStream.getSource()));
}

return null;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/htsjdk/samtools/CSIIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public class CSIIndex extends AbstractBAMFileIndex implements BrowseableBAMIndex
*/

public CSIIndex(final SeekableStream stream, final SAMSequenceDictionary dictionary) {
this(new IndexStreamBuffer(stream), stream.getSource(), dictionary);
this(IndexFileBufferFactory.getBuffer(stream), stream.getSource(), dictionary);
}

public CSIIndex(final Path path, final SAMSequenceDictionary dictionary) throws IOException {
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/htsjdk/samtools/CompressedIndexFileBuffer.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package htsjdk.samtools;

import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.BinaryCodec;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.RuntimeIOException;
Expand All @@ -25,6 +26,11 @@ class CompressedIndexFileBuffer implements IndexFileBuffer {
}
}

CompressedIndexFileBuffer(SeekableStream seekableStream) {
mCompressedStream = new BlockCompressedInputStream(seekableStream);
binaryCodec = new BinaryCodec(mCompressedStream);
}

@Override
public void readBytes(final byte[] bytes) {
binaryCodec.readBytes(bytes);
Expand Down
10 changes: 10 additions & 0 deletions src/main/java/htsjdk/samtools/IndexFileBufferFactory.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package htsjdk.samtools;

import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.RuntimeIOException;

Expand All @@ -18,4 +19,13 @@ static IndexFileBuffer getBuffer(File file, boolean enableMemoryMapping) {

return isCompressed ? new CompressedIndexFileBuffer(file) : (enableMemoryMapping ? new MemoryMappedFileBuffer(file) : new RandomAccessFileBuffer(file));
}

static IndexFileBuffer getBuffer(SeekableStream seekableStream) {
boolean isCompressed;
isCompressed = IOUtil.isGZIPInputStream(seekableStream);

return isCompressed ?
new CompressedIndexFileBuffer(seekableStream) :
new IndexStreamBuffer(seekableStream);
}
}
33 changes: 33 additions & 0 deletions src/main/java/htsjdk/samtools/SamIndexes.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@
import htsjdk.samtools.seekablestream.SeekableBufferedStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.FileExtensions;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.RuntimeIOException;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.zip.GZIPInputStream;

/**
* A helper class to read BAI and CRAI indexes. Main goal is to provide BAI stream as a sort of common API for all index types.
Expand Down Expand Up @@ -102,6 +105,36 @@ public static SeekableStream asBaiSeekableStreamOrNull(final SeekableStream inpu
return null;
}

public static SamIndexes getSAMIndexTypeFromStream(final SeekableStream seekableStream) {
SamIndexes indexType = null;
try {
seekableStream.seek(0);
final SeekableBufferedStream bss = new SeekableBufferedStream(seekableStream);

if (IOUtil.isGZIPInputStream(bss)) {
bss.seek(0);
GZIPInputStream gzipStream = new GZIPInputStream(bss);
if (doesStreamStartWith(gzipStream, CSI.magic)) {
indexType = CSI;
} else {
// the CRAI format has no signature bytes, so optimistically call it CRAI
// if its gzipped but not CSI
indexType = CRAI;
}
} else {
bss.seek(0);
if (doesStreamStartWith(bss, BAI.magic)) {
indexType = BAI;
}
}
seekableStream.seek(0);
} catch (final IOException e) {
throw new RuntimeIOException("Error interrogating index input stream", e);
}

return indexType;
}

private static boolean doesStreamStartWith(final InputStream is, final byte[] bytes) throws IOException {
for (final byte b : bytes) {
if (is.read() != (0xFF & b)) {
Expand Down
21 changes: 19 additions & 2 deletions src/test/java/htsjdk/samtools/BAMFileReaderTest.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package htsjdk.samtools;

import htsjdk.HtsjdkTest;
import htsjdk.samtools.seekablestream.ByteArraySeekableStream;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CoordMath;
import org.testng.Assert;
Expand All @@ -10,7 +9,8 @@

import java.io.File;
import java.io.IOException;
import java.util.List;
import java.net.URL;
import java.nio.file.Paths;

public class BAMFileReaderTest extends HtsjdkTest {
private final static File bamFile = new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam");
Expand All @@ -36,6 +36,23 @@ public void init() throws IOException {
bamFileReaderNull = new BAMFileReader(bamFile, null, true, false, ValidationStringency.DEFAULT_STRINGENCY, DefaultSAMRecordFactory.getInstance());
}

@Test
public static void testCSIFromURL() throws IOException {
// https://github.com/samtools/htsjdk/issues/1507
final URL bamURL = Paths.get(bamFile.toURI()).toUri().toURL();
final URL csiURL = Paths.get(csiFileIndex.toURI()).toUri().toURL();
final SamInputResource resource = SamInputResource.of(bamURL).index(csiURL);
final SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
try (final SamReader samReader = factory.open(resource)) {
Assert.assertTrue(samReader.hasIndex());
final BAMIndex index = samReader.indexing().getIndex();
Assert.assertTrue(index instanceof CSIIndex);
try (final SAMRecordIterator unusedIterator =
samReader.queryAlignmentStart("chr1_random", 1)) {}
try (final SAMRecordIterator unusedIterator = samReader.queryUnmapped()) {}
}
}

@Test
public static void testGetIndexTypeOK() {
BAMIndexMetaData.printIndexStats(bamFile);
Expand Down
21 changes: 20 additions & 1 deletion src/test/java/htsjdk/samtools/SamIndexesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.seekablestream.SeekableMemoryStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.seekablestream.SeekableStreamFactory;
import htsjdk.samtools.util.IOUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.ByteArrayInputStream;
Expand Down Expand Up @@ -188,4 +190,21 @@ public void testOpenIndexUrlAsBaiOrNull() throws IOException {
Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartByteOffset());
Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1);
}
}

@DataProvider(name = "getSAMIndexTypeFromStreamTests")
public Object[][] getSAMIndexTypeFromStreamTests() {
return new Object[][]{
{ new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam.bai"), SamIndexes.BAI },
{ new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam.csi"), SamIndexes.CSI },
{ new File("src/test/resources/htsjdk/samtools/cram/cramQueryWithCRAI.cram.crai"), SamIndexes.CRAI},
};
}

@Test(dataProvider = "getSAMIndexTypeFromStreamTests")
public void testGetSAMIndexTypeFromStream(final File indexFile, final SamIndexes expectedIndexType) throws IOException {
try (final SeekableStream seekableStream = SeekableStreamFactory.getInstance().getStreamFor(indexFile.getPath())) {
Assert.assertEquals(SamIndexes.getSAMIndexTypeFromStream(seekableStream),expectedIndexType);
Assert.assertEquals(seekableStream.position(), 0);
}
}
}

0 comments on commit 1449dec

Please sign in to comment.