Skip to content

Commit

Permalink
Ported remove-clipping function back
Browse files Browse the repository at this point in the history
  • Loading branch information
mbreese committed Aug 22, 2019
1 parent 3272134 commit 4e636ae
Show file tree
Hide file tree
Showing 4 changed files with 226 additions and 1 deletion.
5 changes: 4 additions & 1 deletion src/java/io/compgen/ngsutils/NGSUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import io.compgen.common.updates.UpdateCheck;
import io.compgen.ngsutils.cli.annotate.GTFAnnotate;
import io.compgen.ngsutils.cli.annotate.RepeatAnnotate;
import io.compgen.ngsutils.cli.bam.BaiExplore;
import io.compgen.ngsutils.cli.bam.BamBaseCall;
import io.compgen.ngsutils.cli.bam.BamBest;
import io.compgen.ngsutils.cli.bam.BamCheck;
Expand All @@ -24,6 +25,7 @@
import io.compgen.ngsutils.cli.bam.BamFilterCli;
import io.compgen.ngsutils.cli.bam.BamFlagDuplicates;
import io.compgen.ngsutils.cli.bam.BamReadGroup;
import io.compgen.ngsutils.cli.bam.BamRemoveClipping;
import io.compgen.ngsutils.cli.bam.BamSampleReads;
import io.compgen.ngsutils.cli.bam.BamSplit;
import io.compgen.ngsutils.cli.bam.BamStats;
Expand Down Expand Up @@ -169,7 +171,8 @@ public static void main(String[] args) {
.addCommand(YatesChiSqCli.class)
.addCommand(FastaTri.class)
.addCommand(VCFTsTvRatio.class)
.addCommand(DigestCmd.class);
.addCommand(DigestCmd.class)
.addCommand(BamRemoveClipping.class);

try {
if (args.length == 0) {
Expand Down
14 changes: 14 additions & 0 deletions src/java/io/compgen/ngsutils/bam/support/InvalidReadException.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package io.compgen.ngsutils.bam.support;

public class InvalidReadException extends Exception {

public InvalidReadException(String s) {
super(s);
}

/**
*
*/
private static final long serialVersionUID = 8768517507333129504L;

}
68 changes: 68 additions & 0 deletions src/java/io/compgen/ngsutils/bam/support/ReadUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.util.SortedMap;
import java.util.TreeMap;

import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMRecord;
Expand Down Expand Up @@ -451,4 +452,71 @@ public static boolean isDiscordant(SAMRecord read, int maxDist, boolean includeI

return false;
}

public static SAMRecord removeClipping(SAMRecord read) throws InvalidReadException {
return removeClipping(read, false);
}
public static SAMRecord removeClipping(SAMRecord read, boolean writeFlags) throws InvalidReadException {
int clip5 = 0;
int clip3 = 0;
float origLength = read.getReadBases().length;

boolean inseq = false;
boolean changed = false;

Cigar newCigar = new Cigar();

for (CigarElement ce: read.getCigar().getCigarElements()) {
if (ce.getOperator() == CigarOperator.H) {
// skip this element
changed = true;
} else if (ce.getOperator() == CigarOperator.S) {
changed = true;
if (!inseq) {
clip5 = ce.getLength();
} else {
clip3 = ce.getLength();
}
} else {
inseq = true;
newCigar.add(ce);
}
}

if (!changed) {
return read;
}

if (read.getReadBases().length != read.getBaseQualities().length) {
throw new InvalidReadException("Sequence is not the same length as the base qualities! Did you try to remove clipped bases from a color-space read? (Not supported)");
}

byte[] newSeq = new byte[read.getReadBases().length - clip3 - clip5];
byte[] newQual = new byte[read.getBaseQualities().length - clip3 - clip5];

int origpos = clip5;
int newpos = 0;

int newlen = read.getReadBases().length - clip5 - clip3;

while (newpos < newlen) {
newSeq[newpos] = read.getReadBases()[origpos];
newQual[newpos] = read.getBaseQualities()[origpos];

newpos++;
origpos++;
}

read.setReadBases(newSeq);
read.setBaseQualities(newQual);
read.setCigar(newCigar);

if (writeFlags) {
read.setAttribute("ZA", clip5);
read.setAttribute("ZB", clip3);
read.setAttribute("ZC", (clip5+clip3) / origLength);
}

return read;
}
}
140 changes: 140 additions & 0 deletions src/java/io/compgen/ngsutils/cli/bam/BamRemoveClipping.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
package io.compgen.ngsutils.cli.bam;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMProgramRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamInputResource;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
import io.compgen.cmdline.annotation.Command;
import io.compgen.cmdline.annotation.Exec;
import io.compgen.cmdline.annotation.Option;
import io.compgen.cmdline.annotation.UnnamedArg;
import io.compgen.cmdline.exceptions.CommandArgumentException;
import io.compgen.cmdline.impl.AbstractCommand;
import io.compgen.common.progress.FileChannelStats;
import io.compgen.common.progress.ProgressMessage;
import io.compgen.common.progress.ProgressUtils;
import io.compgen.ngsutils.bam.support.BamHeaderUtils;
import io.compgen.ngsutils.bam.support.InvalidReadException;
import io.compgen.ngsutils.bam.support.ReadUtils;
import io.compgen.ngsutils.support.CloseableFinalizer;

@Command(name="bam-removeclipping", desc="Removes clipped bases (soft) from BAM file reads", category="bam", experimental=true)
public class BamRemoveClipping extends AbstractCommand {
private String inFilename = null;
private String outFilename = null;
private boolean lenient = false;
private boolean silent = false;
private boolean writeFlags = false;
private boolean forceOverwrite = false;


@UnnamedArg(name = "INFILE OUTFILE")
public void setFilenames(String[] filename) throws CommandArgumentException {
if (filename.length != 2) {
throw new CommandArgumentException("You must specify an INFILE and and OUTFILE");
}
this.inFilename = filename[0];
this.outFilename = filename[1];
}

@Option(desc = "Write clipped data to flags (5' clipped bases => ZA:i, 3' clipped bases => ZB:i, Percentage of clipped bases => ZC:f)", name="flags")
public void setWriteFlags(boolean writeFlags) {
this.writeFlags = writeFlags;
}

@Option(desc = "Force overwriting an existing output file", name="force", charName="f")
public void setForce(boolean forceOverwrite) {
this.forceOverwrite = forceOverwrite;
}

@Option(desc = "Use lenient validation strategy", name="lenient")
public void setLenient(boolean lenient) {
this.lenient = lenient;
}

@Option(desc = "Use silent validation strategy", name="silent")
public void setSilent(boolean silent) {
this.silent = silent;
}

@Exec
public void exec() throws IOException, CommandArgumentException {
if (inFilename == null || outFilename == null) {
throw new CommandArgumentException("You must specify an INFILE and and OUTFILE");
}

if (new File(outFilename).exists() && !forceOverwrite) {
throw new CommandArgumentException("Output file: " + outFilename+ " exists! Use -f to force overwriting it.");
}

SamReaderFactory readerFactory = SamReaderFactory.makeDefault();
if (lenient) {
readerFactory.validationStringency(ValidationStringency.LENIENT);
} else if (silent) {
readerFactory.validationStringency(ValidationStringency.SILENT);
}

SamReader reader = null;
String name;
FileChannel channel = null;
if (inFilename.equals("-")) {
reader = readerFactory.open(SamInputResource.of(System.in));
name = "<stdin>";
} else {
File f = new File(inFilename);
FileInputStream fis = new FileInputStream(f);
channel = fis.getChannel();
reader = readerFactory.open(SamInputResource.of(fis));
name = f.getName();
}


SAMFileHeader header = reader.getFileHeader().clone();
SAMProgramRecord pg = BamHeaderUtils.buildSAMProgramRecord("bam-removeclipping", header);
List<SAMProgramRecord> pgRecords = new ArrayList<SAMProgramRecord>(header.getProgramRecords());
pgRecords.add(0, pg);
header.setProgramRecords(pgRecords);

SAMFileWriterFactory factory = new SAMFileWriterFactory();
SAMFileWriter writer = null;

if (outFilename.equals("-")) {
writer = factory.makeBAMWriter(header, true, new BufferedOutputStream(System.out));
} else {
writer = factory.makeBAMWriter(header, true, new File(outFilename));
}


Iterator<SAMRecord> it = ProgressUtils.getIterator(name, reader.iterator(), (channel == null)? null : new FileChannelStats(channel), new ProgressMessage<SAMRecord>() {
long i = 0;
@Override
public String msg(SAMRecord current) {
i++;
return i+" "+current.getReadName();
}}, new CloseableFinalizer<SAMRecord>());
while (it.hasNext()) {
SAMRecord read = it.next();
try {
writer.addAlignment(ReadUtils.removeClipping(read, this.writeFlags));
} catch (InvalidReadException e) {
throw new IOException(e);
}
}
reader.close();
writer.close();
}
}

1 comment on commit 4e636ae

@mbreese
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue #4

Please sign in to comment.