diff --git a/surpyvor/parse_arguments.py b/surpyvor/parse_arguments.py index d1c3312..5905004 100644 --- a/surpyvor/parse_arguments.py +++ b/surpyvor/parse_arguments.py @@ -13,8 +13,10 @@ def get_args(): version='surpyvor: {}, SURVIVOR {}'.format( __version__, get_survivor_version()), help="Print version and quit.") + subparsers = parser.add_subparsers(dest='command', title='[sub-commands]') + merge = subparsers.add_parser("merge", help="merging vcf files of SVs", formatter_class=ArgumentDefaultsHelpFormatter) @@ -23,10 +25,10 @@ def get_args(): nargs='+', required=True, help="vcf files to merge") - merge_req.add_argument("-o", "--output", - help="output file", - required=True) merge_opt = merge.add_argument_group('optional arguments') + merge_opt.add_argument("-o", "--output", + help="output file", + default="stdout") merge_opt.add_argument("-d", "--distance", type=int, default=500, @@ -51,6 +53,7 @@ def get_args(): action="store_true", default=False, help="Estimate distance between calls") + highsens = subparsers.add_parser("highsens", help="get union of SV vcfs", formatter_class=ArgumentDefaultsHelpFormatter) @@ -59,9 +62,11 @@ def get_args(): nargs='+', required=True, help="vcf files to merge") - highsens_req.add_argument("-o", "--output", + highsens_opt = highsens.add_argument_group('optional arguments') + highsens_opt.add_argument("-o", "--output", help="output file", - required=True) + default="stdout") + highconf = subparsers.add_parser("highconf", help="get intersection of SV vcfs", formatter_class=ArgumentDefaultsHelpFormatter) @@ -70,9 +75,11 @@ def get_args(): nargs='+', required=True, help="vcf files to merge") - highconf_req.add_argument("-o", "--output", + highconf_opt = highconf.add_argument_group('optional arguments') + highconf_opt.add_argument("-o", "--output", help="output file", - required=True) + default="stdout") + prf = subparsers.add_parser('prf', help="calculate precision, recall and F-measure", formatter_class=ArgumentDefaultsHelpFormatter) @@ -107,6 +114,7 @@ def get_args(): prf_opt.add_argument("--matrix", help="Make a confusion matrix.", action="store_true") + venn = subparsers.add_parser('venn', help="Make venn diagram for 2 or 3 SV vcf files", formatter_class=ArgumentDefaultsHelpFormatter) @@ -134,6 +142,7 @@ def get_args(): venn_opt.add_argument("--plotout", help="Name of output plot", default="venn.png") + upset = subparsers.add_parser('upset', help="Make upset plot for multiple SV vcf files", formatter_class=ArgumentDefaultsHelpFormatter) diff --git a/surpyvor/surpyvor.py b/surpyvor/surpyvor.py index 1532e35..9666325 100644 --- a/surpyvor/surpyvor.py +++ b/surpyvor/surpyvor.py @@ -57,6 +57,7 @@ def sv_merge(samples, distance, callers, require_type, require_strand, -specify minimal size of SV event (minlength, int) """ fhf, fofn_f = tempfile.mkstemp() + fhs, interm_out = tempfile.mkstemp(suffix=".vcf") with open(fofn_f, 'w') as fofn: for s in [utils.decompress(s) for s in samples]: fofn.write(s + "\n") @@ -68,11 +69,13 @@ def sv_merge(samples, distance, callers, require_type, require_strand, str=1 if require_strand else -1, estm=1 if estimate_distance else -1, ml=minlength, - out=output) + out=interm_out) print("Executing SURVIVOR...", end="", flush=True, file=sys.stderr) subprocess.call(shlex.split(survivor_cmd), stdout=subprocess.DEVNULL) print("DONE", file=sys.stderr) + utils.vcf_sort(interm_out, output) os.close(fhf) + os.close(fhs) def default_merge(args, variants): @@ -101,7 +104,7 @@ def precision_recall_fmeasure(args): print(f"Precision: {round(precision, ndigits=4)}") recall = tp / len(truth_set) print(f"Recall: {round(recall, ndigits=4)}") - fmeasure = 2*(precision*recall)/(precision + recall) + fmeasure = 2 * (precision * recall) / (precision + recall) print(f"F-measure: {round(fmeasure, ndigits=4)}") if args.bar: plots.bar_chart(vcf_out) diff --git a/surpyvor/utils.py b/surpyvor/utils.py index a049227..1be3b27 100644 --- a/surpyvor/utils.py +++ b/surpyvor/utils.py @@ -1,6 +1,6 @@ import os import sys -import shutil +from shutil import which import tempfile from cyvcf2 import VCF import subprocess @@ -127,14 +127,19 @@ def decompress(vcf): def test_dependencies(): - def which(exec): - return shutil.which(exec) for dependency in ['bcftools', 'bgzip', 'tabix', 'SURVIVOR']: if not which(dependency): sys.exit("ERROR: Could not find required executable '{}'.\n" "Make sure it is installed and in $PATH".format(dependency)) +def vcf_sort(input, output): + if output in ["stdout", "-"]: + subprocess.call(shlex.split('bcftools sort {}'.format(input))) + else: + subprocess.call(shlex.split('bcftools sort {} -o {}'.format(input, output))) + + def confusion_matrix(vcff, names): """ First level of the dict is the "first" call, second level is the "second" sample diff --git a/surpyvor/version.py b/surpyvor/version.py index 3d26edf..3d18726 100644 --- a/surpyvor/version.py +++ b/surpyvor/version.py @@ -1 +1 @@ -__version__ = "0.4.1" +__version__ = "0.5.0"