diff --git a/.test_suite.sh b/.test_suite.sh index 804d1e8c..cbb4c1f7 100755 --- a/.test_suite.sh +++ b/.test_suite.sh @@ -26,6 +26,11 @@ testSliceMultipleRegions() { assertEquals 0 $? } +testSliceMultipleRegionsBed() { + assertEquals "156" `./build/sambamba slice -L test/chr2_chr3_test_region.bed test/issue_204.bam |\ + ./build/sambamba view -c /dev/stdin` +} + testView() { assertEquals "1806" `./build/sambamba view -c ex1_header.sorted.bam chr2` assertEquals "1464" `./build/sambamba view -c ex1_header.sorted.bam chr1` diff --git a/sambamba/slice.d b/sambamba/slice.d index d896553e..ee21313d 100644 --- a/sambamba/slice.d +++ b/sambamba/slice.d @@ -35,6 +35,7 @@ import std.parallelism; import std.conv; import std.stdio; import std.exception; +import sambamba.utils.common.bed; import sambamba.utils.common.overwrite; @@ -167,6 +168,7 @@ void fetchRegions(BamReader bam, Region[] regions, ref Stream stream) // write R1 foreach (read; r1.data) writer.writeRecord(read); + // Flush the BamWriter before our copyAsIs call. writer.flush(); copyAsIs(bam, stream, s2_start_offset, s2_end_offset); @@ -271,6 +273,8 @@ void printUsage() stderr.writeln(); stderr.writeln("OPTIONS: -o, --output-filename=OUTPUT_FILENAME"); stderr.writeln(" output BAM filename"); + stderr.writeln(" -L, --regions=FILENAME"); + stderr.writeln(" output only reads overlapping one of regions from the BED file"); } version(standalone) { @@ -283,17 +287,30 @@ int slice_main(string[] args) { // at least two arguments must be presented string output_filename = null; + string bed_filename = null; try { getopt(args, std.getopt.config.caseSensitive, - "output-filename|o", &output_filename); + "output-filename|o", &output_filename, + "regions|L", &bed_filename); - if (args.length < 3) { + // Check that an input file was specified. + if (args.length < 2) { printUsage(); return 0; } + // Either a bed file or a set of regions needs to be specified. + if (bed_filename is null && args.length < 3) { + throw new Exception("Must provide either a bed file or list of regions."); + } + + // But you can't provide both a bed file AND a set of regions. + if (bed_filename.length > 0 && args.length > 2) { + throw new Exception("specifying both region and BED filename is disallowed"); + } + protectFromOverwrite(args[1], output_filename); import std.parallelism; @@ -318,10 +335,16 @@ int slice_main(string[] args) { stream = new undead.stream.BufferedFile(handle, FileMode.Out, BUFSIZE); } - if (args[2] == "*") { + if (bed_filename is null && args[2] == "*") { fetchUnmapped(bam, stream); } else { - auto regions = map!parseRegion(args[2 .. $]).array; + Region[] regions; + if (bed_filename !is null) { + auto bam_regions = parseBed(bed_filename, bam); + regions = bam_regions.map!(r => Region(bam.reference(r.ref_id).name, r.start, r.end)).array; + } else { + regions = map!parseRegion(args[2 .. $]).array; + } fetchRegions(bam, regions, stream); } diff --git a/test/chr2_chr3_test_region.bed b/test/chr2_chr3_test_region.bed new file mode 100644 index 00000000..e04ed519 --- /dev/null +++ b/test/chr2_chr3_test_region.bed @@ -0,0 +1,4 @@ +2 166860000 166870000 +2 166875000 166890000 +3 100000000 170000000 +3 150000000 200000000