Skip to content

Commit

Permalink
markdup adds @pg record to header (#206)
Browse files Browse the repository at this point in the history
  • Loading branch information
Artem Tarasov committed Apr 24, 2016
1 parent 3885a42 commit 5550f46
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 4 deletions.
6 changes: 6 additions & 0 deletions .test_suite.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,10 @@ testIssue204() {
assertEquals 0 $?
}

testIssue206() {
./build/sambamba markdup ex1_header.sorted.bam ex1_header.dedup.bam 2>/dev/null
./build/sambamba view -H ex1_header.dedup.bam | grep '@PG' | grep -q 'sambamba markdup'
assertEquals 0 $?
}

. shunit2-2.0.3/src/shell/shunit2
3 changes: 2 additions & 1 deletion main.d
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,12 @@ import sambamba.utils.common.ldc_gc_workaround;

import utils.strip_bcf_header;
import utils.lz4;
import utils.version_ : VERSION;

import std.stdio;

void printUsage() {
stderr.writeln("sambamba v0.6.0");
stderr.writeln("sambamba " ~ VERSION);
stderr.writeln();
stderr.writeln("Usage: sambamba [command] [args...]");
stderr.writeln();
Expand Down
2 changes: 1 addition & 1 deletion sambamba-ldmd-debug.rsp
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"-g" "-c" "-m64" "-noboundscheck" "-IBioD/" "-ofbuild/sambamba.o" "-odbuild" "-I." "main.d" "BioD/bio/bam/baifile.d" "sambamba/depth.d" "BioD/bio/core/utils/switchendianness.d" "sambamba/utils/common/readstorage.d" "BioD/bio/core/utils/tmpfile.d" "sambamba/utils/common/bed.d" "BioD/bio/bam/utils/samheadermerger.d" "thirdparty/mergesort.d" "BioD/bio/bam/readrange.d" "cram/exception.d" "sambamba/utils/view/headerserializer.d" "BioD/bio/bam/splitter.d" "cram/htslib.d" "BioD/bio/core/utils/roundbuf.d" "BioD/bio/bam/md/operation.d" "BioD/bio/sam/utils/fastrecordparser.d" "sambamba/utils/common/ldc_gc_workaround.d" "BioD/bio/bam/read.d" "sambamba/utils/common/filtering.d" "BioD/bio/bam/bai/indexing.d" "BioD/bio/bam/utils/value.d" "BioD/bio/bam/randomaccessmanager.d" "sambamba/utils/common/queryparser.d" "BioD/bio/bam/md/reconstruct.d" "BioD/bio/core/base.d" "BioD/bio/core/utils/zlib.d" "BioD/bio/sam/header.d" "BioD/bio/bam/writer.d" "BioD/bio/core/bgzf/block.d" "BioD/bio/bam/md/core.d" "cram/reader.d" "sambamba/utils/common/overwrite.d" "BioD/bio/core/utils/format.d" "BioD/bio/bam/reader.d" "BioD/bio/core/bgzf/inputstream.d" "BioD/bio/core/sequence.d" "BioD/bio/core/utils/bylinefast.d" "sambamba/index.d" "sambamba/markdup.d" "BioD/bio/bam/referenceinfo.d" "BioD/bio/core/tinymap.d" "cram/reference.d" "BioD/bio/bam/constants.d" "BioD/bio/core/bgzf/outputstream.d" "sambamba/utils/common/intervaltree.d" "BioD/bio/bam/utils/graph.d" "BioD/bio/core/utils/algo.d" "BioD/bio/bam/tagvalue.d" "BioD/bio/sam/reader.d" "BioD/bio/core/utils/outbuffer.d" "sambamba/sort.d" "BioD/bio/bam/validation/samheader.d" "sambamba/flagstat.d" "BioD/bio/bam/pileup.d" "sambamba/pileup.d" "BioD/bio/bam/thirdparty/msgpack.d" "BioD/bio/bam/reference.d" "BioD/bio/core/utils/range.d" "BioD/bio/bam/bai/bin.d" "sambamba/utils/view/alignmentrangeprocessor.d" "sambamba/utils/common/tmpdir.d" "sambamba/slice.d" "BioD/bio/core/bgzf/chunk.d" "BioD/bio/core/bgzf/compress.d" "BioD/bio/bam/region.d" "BioD/bio/core/bgzf/virtualoffset.d" "BioD/bio/core/region.d" "BioD/bio/bam/md/parse.d" "BioD/bio/core/utils/stream.d" "sambamba/utils/common/progressbar.d" "thirdparty/unstablesort.d" "BioD/bio/bam/abstractreader.d" "BioD/bio/core/utils/memoize.d" "sambamba/utils/common/pratt_parser.d" "BioD/bio/core/bgzf/constants.d" "sambamba/merge.d" "sambamba/view.d" "BioD/bio/bam/utils/array.d" "BioD/bio/bam/validation/alignment.d" "cram/writer.d" "cram/slicereader.d" "cram/wrappers.d" "BioD/bio/bam/multireader.d" "utils/lz4.d" "utils/strip_bcf_header.d" "sambamba/fixbins.d" "sambamba/utils/common/file.d"
"-g" "-c" "-m64" "-noboundscheck" "-IBioD/" "-ofbuild/sambamba.o" "-odbuild" "-I." "main.d" "BioD/bio/bam/baifile.d" "sambamba/depth.d" "BioD/bio/core/utils/switchendianness.d" "sambamba/utils/common/readstorage.d" "BioD/bio/core/utils/tmpfile.d" "sambamba/utils/common/bed.d" "BioD/bio/bam/utils/samheadermerger.d" "thirdparty/mergesort.d" "BioD/bio/bam/readrange.d" "cram/exception.d" "sambamba/utils/view/headerserializer.d" "BioD/bio/bam/splitter.d" "cram/htslib.d" "BioD/bio/core/utils/roundbuf.d" "BioD/bio/bam/md/operation.d" "BioD/bio/sam/utils/fastrecordparser.d" "sambamba/utils/common/ldc_gc_workaround.d" "BioD/bio/bam/read.d" "sambamba/utils/common/filtering.d" "BioD/bio/bam/bai/indexing.d" "BioD/bio/bam/utils/value.d" "BioD/bio/bam/randomaccessmanager.d" "sambamba/utils/common/queryparser.d" "BioD/bio/bam/md/reconstruct.d" "BioD/bio/core/base.d" "BioD/bio/core/utils/zlib.d" "BioD/bio/sam/header.d" "BioD/bio/bam/writer.d" "BioD/bio/core/bgzf/block.d" "BioD/bio/bam/md/core.d" "cram/reader.d" "sambamba/utils/common/overwrite.d" "BioD/bio/core/utils/format.d" "BioD/bio/bam/reader.d" "BioD/bio/core/bgzf/inputstream.d" "BioD/bio/core/sequence.d" "BioD/bio/core/utils/bylinefast.d" "sambamba/index.d" "sambamba/markdup.d" "BioD/bio/bam/referenceinfo.d" "BioD/bio/core/tinymap.d" "cram/reference.d" "BioD/bio/bam/constants.d" "BioD/bio/core/bgzf/outputstream.d" "sambamba/utils/common/intervaltree.d" "BioD/bio/bam/utils/graph.d" "BioD/bio/core/utils/algo.d" "BioD/bio/bam/tagvalue.d" "BioD/bio/sam/reader.d" "BioD/bio/core/utils/outbuffer.d" "sambamba/sort.d" "BioD/bio/bam/validation/samheader.d" "sambamba/flagstat.d" "BioD/bio/bam/pileup.d" "sambamba/pileup.d" "BioD/bio/bam/thirdparty/msgpack.d" "BioD/bio/bam/reference.d" "BioD/bio/core/utils/range.d" "BioD/bio/bam/bai/bin.d" "sambamba/utils/view/alignmentrangeprocessor.d" "sambamba/utils/common/tmpdir.d" "sambamba/slice.d" "BioD/bio/core/bgzf/chunk.d" "BioD/bio/core/bgzf/compress.d" "BioD/bio/bam/region.d" "BioD/bio/core/bgzf/virtualoffset.d" "BioD/bio/core/region.d" "BioD/bio/bam/md/parse.d" "BioD/bio/core/utils/stream.d" "sambamba/utils/common/progressbar.d" "thirdparty/unstablesort.d" "BioD/bio/bam/abstractreader.d" "BioD/bio/core/utils/memoize.d" "sambamba/utils/common/pratt_parser.d" "BioD/bio/core/bgzf/constants.d" "sambamba/merge.d" "sambamba/view.d" "BioD/bio/bam/utils/array.d" "BioD/bio/bam/validation/alignment.d" "cram/writer.d" "cram/slicereader.d" "cram/wrappers.d" "BioD/bio/bam/multireader.d" "utils/lz4.d" "utils/strip_bcf_header.d" "sambamba/fixbins.d" "sambamba/utils/common/file.d" "utils/version_.d"
2 changes: 1 addition & 1 deletion sambamba-ldmd-release.rsp
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"-O2" "-c" "-m64" "-noboundscheck" "-release" "-inline" "-IBioD/" "-ofbuild/sambamba.o" "-odbuild" "-I." "main.d" "BioD/bio/bam/baifile.d" "sambamba/depth.d" "BioD/bio/core/utils/switchendianness.d" "sambamba/utils/common/readstorage.d" "BioD/bio/core/utils/tmpfile.d" "sambamba/utils/common/bed.d" "BioD/bio/bam/utils/samheadermerger.d" "thirdparty/mergesort.d" "BioD/bio/bam/readrange.d" "cram/exception.d" "sambamba/utils/view/headerserializer.d" "BioD/bio/bam/splitter.d" "cram/htslib.d" "BioD/bio/core/utils/roundbuf.d" "BioD/bio/bam/md/operation.d" "BioD/bio/sam/utils/fastrecordparser.d" "sambamba/utils/common/ldc_gc_workaround.d" "BioD/bio/bam/read.d" "sambamba/utils/common/filtering.d" "BioD/bio/bam/bai/indexing.d" "BioD/bio/bam/utils/value.d" "BioD/bio/bam/randomaccessmanager.d" "sambamba/utils/common/queryparser.d" "BioD/bio/bam/md/reconstruct.d" "BioD/bio/core/base.d" "BioD/bio/core/utils/zlib.d" "BioD/bio/sam/header.d" "BioD/bio/bam/writer.d" "BioD/bio/core/bgzf/block.d" "BioD/bio/bam/md/core.d" "cram/reader.d" "sambamba/utils/common/overwrite.d" "BioD/bio/core/utils/format.d" "BioD/bio/bam/reader.d" "BioD/bio/core/bgzf/inputstream.d" "BioD/bio/core/sequence.d" "BioD/bio/core/utils/bylinefast.d" "sambamba/index.d" "sambamba/markdup.d" "BioD/bio/bam/referenceinfo.d" "BioD/bio/core/tinymap.d" "cram/reference.d" "BioD/bio/bam/constants.d" "BioD/bio/core/bgzf/outputstream.d" "sambamba/utils/common/intervaltree.d" "BioD/bio/bam/utils/graph.d" "BioD/bio/core/utils/algo.d" "BioD/bio/bam/tagvalue.d" "BioD/bio/sam/reader.d" "BioD/bio/core/utils/outbuffer.d" "sambamba/sort.d" "BioD/bio/bam/validation/samheader.d" "sambamba/flagstat.d" "BioD/bio/bam/pileup.d" "sambamba/pileup.d" "BioD/bio/bam/thirdparty/msgpack.d" "BioD/bio/bam/reference.d" "BioD/bio/core/utils/range.d" "BioD/bio/bam/bai/bin.d" "sambamba/utils/view/alignmentrangeprocessor.d" "sambamba/utils/common/tmpdir.d" "sambamba/slice.d" "BioD/bio/core/bgzf/chunk.d" "BioD/bio/core/bgzf/compress.d" "BioD/bio/bam/region.d" "BioD/bio/core/bgzf/virtualoffset.d" "BioD/bio/core/region.d" "BioD/bio/bam/md/parse.d" "BioD/bio/core/utils/stream.d" "sambamba/utils/common/progressbar.d" "thirdparty/unstablesort.d" "BioD/bio/bam/abstractreader.d" "BioD/bio/core/utils/memoize.d" "sambamba/utils/common/pratt_parser.d" "BioD/bio/core/bgzf/constants.d" "sambamba/merge.d" "sambamba/view.d" "BioD/bio/bam/utils/array.d" "BioD/bio/bam/validation/alignment.d" "cram/writer.d" "cram/slicereader.d" "cram/wrappers.d" "BioD/bio/bam/multireader.d" "utils/lz4.d" "utils/strip_bcf_header.d" "sambamba/fixbins.d" "sambamba/utils/common/file.d"
"-O2" "-c" "-m64" "-noboundscheck" "-release" "-inline" "-IBioD/" "-ofbuild/sambamba.o" "-odbuild" "-I." "main.d" "BioD/bio/bam/baifile.d" "sambamba/depth.d" "BioD/bio/core/utils/switchendianness.d" "sambamba/utils/common/readstorage.d" "BioD/bio/core/utils/tmpfile.d" "sambamba/utils/common/bed.d" "BioD/bio/bam/utils/samheadermerger.d" "thirdparty/mergesort.d" "BioD/bio/bam/readrange.d" "cram/exception.d" "sambamba/utils/view/headerserializer.d" "BioD/bio/bam/splitter.d" "cram/htslib.d" "BioD/bio/core/utils/roundbuf.d" "BioD/bio/bam/md/operation.d" "BioD/bio/sam/utils/fastrecordparser.d" "sambamba/utils/common/ldc_gc_workaround.d" "BioD/bio/bam/read.d" "sambamba/utils/common/filtering.d" "BioD/bio/bam/bai/indexing.d" "BioD/bio/bam/utils/value.d" "BioD/bio/bam/randomaccessmanager.d" "sambamba/utils/common/queryparser.d" "BioD/bio/bam/md/reconstruct.d" "BioD/bio/core/base.d" "BioD/bio/core/utils/zlib.d" "BioD/bio/sam/header.d" "BioD/bio/bam/writer.d" "BioD/bio/core/bgzf/block.d" "BioD/bio/bam/md/core.d" "cram/reader.d" "sambamba/utils/common/overwrite.d" "BioD/bio/core/utils/format.d" "BioD/bio/bam/reader.d" "BioD/bio/core/bgzf/inputstream.d" "BioD/bio/core/sequence.d" "BioD/bio/core/utils/bylinefast.d" "sambamba/index.d" "sambamba/markdup.d" "BioD/bio/bam/referenceinfo.d" "BioD/bio/core/tinymap.d" "cram/reference.d" "BioD/bio/bam/constants.d" "BioD/bio/core/bgzf/outputstream.d" "sambamba/utils/common/intervaltree.d" "BioD/bio/bam/utils/graph.d" "BioD/bio/core/utils/algo.d" "BioD/bio/bam/tagvalue.d" "BioD/bio/sam/reader.d" "BioD/bio/core/utils/outbuffer.d" "sambamba/sort.d" "BioD/bio/bam/validation/samheader.d" "sambamba/flagstat.d" "BioD/bio/bam/pileup.d" "sambamba/pileup.d" "BioD/bio/bam/thirdparty/msgpack.d" "BioD/bio/bam/reference.d" "BioD/bio/core/utils/range.d" "BioD/bio/bam/bai/bin.d" "sambamba/utils/view/alignmentrangeprocessor.d" "sambamba/utils/common/tmpdir.d" "sambamba/slice.d" "BioD/bio/core/bgzf/chunk.d" "BioD/bio/core/bgzf/compress.d" "BioD/bio/bam/region.d" "BioD/bio/core/bgzf/virtualoffset.d" "BioD/bio/core/region.d" "BioD/bio/bam/md/parse.d" "BioD/bio/core/utils/stream.d" "sambamba/utils/common/progressbar.d" "thirdparty/unstablesort.d" "BioD/bio/bam/abstractreader.d" "BioD/bio/core/utils/memoize.d" "sambamba/utils/common/pratt_parser.d" "BioD/bio/core/bgzf/constants.d" "sambamba/merge.d" "sambamba/view.d" "BioD/bio/bam/utils/array.d" "BioD/bio/bam/validation/alignment.d" "cram/writer.d" "cram/slicereader.d" "cram/wrappers.d" "BioD/bio/bam/multireader.d" "utils/lz4.d" "utils/strip_bcf_header.d" "sambamba/fixbins.d" "sambamba/utils/common/file.d" "utils/version_.d"
6 changes: 5 additions & 1 deletion sambamba/markdup.d
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import sambamba.utils.common.progressbar;
import sambamba.utils.common.overwrite;
import thirdparty.unstablesort;
import utils.lz4;
import utils.version_ : addPG;

import bio.bam.reader, bio.bam.readrange, bio.bam.writer, bio.bam.referenceinfo,
bio.bam.read, bio.sam.header, bio.bam.abstractreader,
Expand Down Expand Up @@ -1121,6 +1122,8 @@ int markdup_main(string[] args) {

MarkDuplicatesConfig cfg;
cfg.tmpdir = defaultTmpDir();

auto unparsed_args = args.dup;

bool remove_duplicates;
uint n_threads = totalCPUs;
Expand Down Expand Up @@ -1256,7 +1259,8 @@ int markdup_main(string[] args) {
auto writer = new BamWriter(out_stream, compression_level);
writer.setFilename(args[$-1]);
scope(exit) writer.finish();
writer.writeSamHeader(bam.header);
auto header = addPG("markdup", unparsed_args, bam.header);
writer.writeSamHeader(header);
writer.writeReferenceSequenceInfo(bam.reference_sequences);

stderr.writeln(remove_duplicates ? "removing" : "marking", " duplicates...");
Expand Down
21 changes: 21 additions & 0 deletions utils/version_.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
module utils.version_;

immutable string VERSION = "0.6.0";

import bio.sam.header;
import std.array : join;

SamHeader addPG(string tool, string[] args, SamHeader header) {
auto pg_line = PgLine();
pg_line.identifier = "sambamba " ~ tool;
pg_line.command_line = args[0] ~ " " ~ tool ~ " " ~ join(args[1 .. $], " ");
pg_line.program_version = VERSION;

if (header.programs.length > 0) {
auto prev_id = header.programs.values.back.identifier;
pg_line.previous_program = prev_id;
}

header.programs.add(pg_line);
return header;
}

0 comments on commit 5550f46

Please sign in to comment.