Skip to content

Commit

Permalink
Added option to split tabix by lines *or* by ref
Browse files Browse the repository at this point in the history
  • Loading branch information
mbreese committed Dec 3, 2024
1 parent 15107ef commit 93fb93c
Showing 1 changed file with 56 additions and 19 deletions.
75 changes: 56 additions & 19 deletions src/java/io/compgen/ngsutils/cli/tab/TabixSplit.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,24 @@ public class TabixSplit extends AbstractOutputCommand {
private String infile;
private String templFilename = null;
private boolean header = false;
private boolean byRef = false;
private int linenum = -1;

@Option(desc="Output file template ({} will be replaced by the ref/chrom name, default based on infile)", name="templ")
public void setTemplateName(String templFilename) {
this.templFilename = templFilename;
}

@Option(desc="Split file by ref/chrom", name="by-ref")
public void setByRef(boolean byRef) {
this.byRef = byRef;
}

@Option(desc="Split file by number of lines", name="lines")
public void setLines(int linenum) {
this.linenum = linenum;
}

@Option(desc="Write the header to all files", name="header")
public void setHeader(boolean val) {
this.header = val;
Expand All @@ -46,6 +58,10 @@ public void exec() throws Exception {
}
}

if (linenum > 0 && byRef) {
throw new CommandArgumentException("You can only split --by-ref or --lines, but not both at the same time.");
}

if (!templFilename.contains("{}")) {
throw new CommandArgumentException("Missing {} in template filename");
}
Expand All @@ -57,38 +73,59 @@ public void exec() throws Exception {

List<String> headerLines = new ArrayList<String>();

int lineno = 0;
int fileno = 0;
int curLineNum = 0;

int skipLineNum = 0;
for (String line: IterUtils.wrap(tabix.lines())) {
if (lineno < tabix.getSkipLines()) {
if (skipLineNum < tabix.getSkipLines()) {
if (header) {
headerLines.add(line);
}
lineno ++;
skipLineNum ++;
continue;
}
String[] vals = line.split("\\t");
String seq = vals[tabix.getColSeq()-1];

if (curSeq == null || !curSeq.equals(seq)) {
if (bgz != null) {
bgz.close();
}
curSeq = seq;
bgz = new BGZWriter(templFilename.replaceAll("\\{\\}", curSeq));
if (header) {
for (String hl: headerLines) {
bgz.writeString(hl+"\n");
}
}
System.err.println("Writing: "+templFilename.replaceAll("\\{\\}", curSeq));

if (byRef) {
if (curSeq == null || !curSeq.equals(seq)) {
if (bgz != null) {
bgz.close();
}
curSeq = seq;
bgz = new BGZWriter(templFilename.replaceAll("\\{\\}", curSeq));
if (header) {
for (String hl: headerLines) {
bgz.writeString(hl+"\n");
}
}
System.err.println("Writing: "+templFilename.replaceAll("\\{\\}", curSeq));
}
} else {
if (fileno == 0 || curLineNum > this.linenum) {
if (bgz != null) {
bgz.close();
}
curLineNum=0;
fileno++;
bgz = new BGZWriter(templFilename.replaceAll("\\{\\}", ""+fileno));
if (header) {
for (String hl: headerLines) {
bgz.writeString(hl+"\n");
}
}
System.err.println("Writing: "+templFilename.replaceAll("\\{\\}", ""+fileno));
}
}

bgz.writeString(line+"\n");

if (!byRef) {
curLineNum++;
}
}
if (bgz != null) {
bgz.close();
}
}

}
}

0 comments on commit 93fb93c

Please sign in to comment.