Skip to content

Commit

Permalink
producing sorted vcf output, by default to stdout
Browse files Browse the repository at this point in the history
  • Loading branch information
wdecoster committed May 10, 2019
1 parent 2ccde5d commit a5762db
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 13 deletions.
23 changes: 16 additions & 7 deletions surpyvor/parse_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ def get_args():
version='surpyvor: {}, SURVIVOR {}'.format(
__version__, get_survivor_version()),
help="Print version and quit.")

subparsers = parser.add_subparsers(dest='command',
title='[sub-commands]')

merge = subparsers.add_parser("merge",
help="merging vcf files of SVs",
formatter_class=ArgumentDefaultsHelpFormatter)
Expand All @@ -23,10 +25,10 @@ def get_args():
nargs='+',
required=True,
help="vcf files to merge")
merge_req.add_argument("-o", "--output",
help="output file",
required=True)
merge_opt = merge.add_argument_group('optional arguments')
merge_opt.add_argument("-o", "--output",
help="output file",
default="stdout")
merge_opt.add_argument("-d", "--distance",
type=int,
default=500,
Expand All @@ -51,6 +53,7 @@ def get_args():
action="store_true",
default=False,
help="Estimate distance between calls")

highsens = subparsers.add_parser("highsens",
help="get union of SV vcfs",
formatter_class=ArgumentDefaultsHelpFormatter)
Expand All @@ -59,9 +62,11 @@ def get_args():
nargs='+',
required=True,
help="vcf files to merge")
highsens_req.add_argument("-o", "--output",
highsens_opt = highsens.add_argument_group('optional arguments')
highsens_opt.add_argument("-o", "--output",
help="output file",
required=True)
default="stdout")

highconf = subparsers.add_parser("highconf",
help="get intersection of SV vcfs",
formatter_class=ArgumentDefaultsHelpFormatter)
Expand All @@ -70,9 +75,11 @@ def get_args():
nargs='+',
required=True,
help="vcf files to merge")
highconf_req.add_argument("-o", "--output",
highconf_opt = highconf.add_argument_group('optional arguments')
highconf_opt.add_argument("-o", "--output",
help="output file",
required=True)
default="stdout")

prf = subparsers.add_parser('prf',
help="calculate precision, recall and F-measure",
formatter_class=ArgumentDefaultsHelpFormatter)
Expand Down Expand Up @@ -107,6 +114,7 @@ def get_args():
prf_opt.add_argument("--matrix",
help="Make a confusion matrix.",
action="store_true")

venn = subparsers.add_parser('venn',
help="Make venn diagram for 2 or 3 SV vcf files",
formatter_class=ArgumentDefaultsHelpFormatter)
Expand Down Expand Up @@ -134,6 +142,7 @@ def get_args():
venn_opt.add_argument("--plotout",
help="Name of output plot",
default="venn.png")

upset = subparsers.add_parser('upset',
help="Make upset plot for multiple SV vcf files",
formatter_class=ArgumentDefaultsHelpFormatter)
Expand Down
7 changes: 5 additions & 2 deletions surpyvor/surpyvor.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def sv_merge(samples, distance, callers, require_type, require_strand,
-specify minimal size of SV event (minlength, int)
"""
fhf, fofn_f = tempfile.mkstemp()
fhs, interm_out = tempfile.mkstemp(suffix=".vcf")
with open(fofn_f, 'w') as fofn:
for s in [utils.decompress(s) for s in samples]:
fofn.write(s + "\n")
Expand All @@ -68,11 +69,13 @@ def sv_merge(samples, distance, callers, require_type, require_strand,
str=1 if require_strand else -1,
estm=1 if estimate_distance else -1,
ml=minlength,
out=output)
out=interm_out)
print("Executing SURVIVOR...", end="", flush=True, file=sys.stderr)
subprocess.call(shlex.split(survivor_cmd), stdout=subprocess.DEVNULL)
print("DONE", file=sys.stderr)
utils.vcf_sort(interm_out, output)
os.close(fhf)
os.close(fhs)


def default_merge(args, variants):
Expand Down Expand Up @@ -101,7 +104,7 @@ def precision_recall_fmeasure(args):
print(f"Precision: {round(precision, ndigits=4)}")
recall = tp / len(truth_set)
print(f"Recall: {round(recall, ndigits=4)}")
fmeasure = 2*(precision*recall)/(precision + recall)
fmeasure = 2 * (precision * recall) / (precision + recall)
print(f"F-measure: {round(fmeasure, ndigits=4)}")
if args.bar:
plots.bar_chart(vcf_out)
Expand Down
11 changes: 8 additions & 3 deletions surpyvor/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import sys
import shutil
from shutil import which
import tempfile
from cyvcf2 import VCF
import subprocess
Expand Down Expand Up @@ -127,14 +127,19 @@ def decompress(vcf):


def test_dependencies():
def which(exec):
return shutil.which(exec)
for dependency in ['bcftools', 'bgzip', 'tabix', 'SURVIVOR']:
if not which(dependency):
sys.exit("ERROR: Could not find required executable '{}'.\n"
"Make sure it is installed and in $PATH".format(dependency))


def vcf_sort(input, output):
if output in ["stdout", "-"]:
subprocess.call(shlex.split('bcftools sort {}'.format(input)))
else:
subprocess.call(shlex.split('bcftools sort {} -o {}'.format(input, output)))


def confusion_matrix(vcff, names):
"""
First level of the dict is the "first" call, second level is the "second" sample
Expand Down
2 changes: 1 addition & 1 deletion surpyvor/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.4.1"
__version__ = "0.5.0"

0 comments on commit a5762db

Please sign in to comment.