From e2a06c93e0e503db990423f17caafa8834eda739 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Fri, 20 Apr 2012 14:58:16 +0100 Subject: [PATCH 01/16] hybrid tree building python script --- run_gubbins.py | 122 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 104 insertions(+), 18 deletions(-) diff --git a/run_gubbins.py b/run_gubbins.py index 4367bc9e..17a5d96e 100644 --- a/run_gubbins.py +++ b/run_gubbins.py @@ -8,26 +8,86 @@ from Bio import Phylo import dendropy +# config variables +RAXML_EXEC = 'raxmlHPC -f d -m GTRGAMMA' +FASTTREE_EXEC = '/software/pathogen/external/apps/usr/local/fasttree/FastTree' +FASTTREE_PARAMS = '-gtr -gamma -nt' +GUBBINS_EXEC = './gubbins' + def robinson_foulds_distance(input_tree_name,output_tree_name): input_tree = dendropy.Tree.get_from_path(input_tree_name, 'newick') output_tree = dendropy.Tree.get_from_path(output_tree_name, 'newick') input_tree.robinson_foulds_distance(output_tree) + def reroot_tree_with_outgroup(tree_name, outgroup): if outgroup: tree = Phylo.read(tree_name, 'newick') print tree tree.root_with_outgroup({'name': outgroup}) Phylo.write(tree, tree_name, 'newick') + +def raxml_current_tree_name(base_filename_without_ext,current_time, i): + "RAxML_result."+base_filename_without_ext+"."+str(current_time) +".iteration_"+str(i) + + +def raxml_previous_tree_name(base_filename_without_ext,base_filename, current_time,i): + previous_tree_name = base_filename + + if i> 1: + previous_tree_name = "RAxML_result."+base_filename_without_ext+"."+str(current_time)+".iteration_"+ str(i-1) + return previous_tree_name + + +def raxml_previous_tree(base_filename_without_ext, base_filename, current_time,i): + previous_tree_name = raxml_previous_tree_name(base_filename_without_ext,base_filename, current_time,i) + previous_tree = "" + + if i> 1: + previous_tree = "-t "+ previous_tree_name + return previous_tree + + +def raxml_tree_building_command(i,base_filename_without_ext,base_filename,current_time): + previous_tree_name = raxml_previous_tree_name(base_filename_without_ext,base_filename, current_time,i) + previous_tree = raxml_previous_tree(base_filename_without_ext, base_filename, current_time,i) + return RAXML_EXEC+ " -s "+previous_tree_name+".phylip -n "+base_filename_without_ext+"."+str(current_time)+".iteration_"+str(i)+" "+previous_tree + + +def raxml_gubbins_command(base_filename_without_ext,starting_base_filename,current_time, i,alignment_filename): + current_tree = raxml_current_tree_name(base_filename_without_ext,current_time, i) + return GUBBINS_EXEC+" -r "+ alignment_filename+" "+starting_base_filename+".vcf "+str(current_tree)+" "+starting_base_filename+".phylip" + +def fasttree_current_tree_name(base_filename, i): + return base_filename+".iteration_"+str(i) + +def fasttree_previous_tree_name(base_filename, i): + return base_filename+".iteration_"+str(i-1) + +def fasttree_tree_building_command(i, starting_tree, previous_tree_name,current_tree_name ): + current_tree_name = fasttree_current_tree_name(base_filename, i) + previous_tree_name = fasttree_previous_tree_name(base_filename, i) + + input_tree = "" + if starting_tree != "": + input_tree = " -intree" + starting_tree + elif i > 1 : + input_tree = " -intree "+ previous_tree_name + + return FASTTREE_EXEC+" "+ input_tree+" "+ FASTTREE_PARAMS+" "+ previous_tree_name+".snp_sites.aln > "+current_tree_name + +def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_filename): + current_tree = fasttree_current_tree_name(base_filename, i) + return GUBBINS_EXEC+" -r "+ alignment_filename+" "+starting_base_filename+".vcf "+str(current_tree)+" "+starting_base_filename+".phylip" + -# config variables -TREE_BUILDER_EXEC = 'raxmlHPC -f d -m GTRGAMMA' -GUBBINS_EXEC = './gubbins' parser = argparse.ArgumentParser(description='Iteratively detect recombinations') parser.add_argument('alignment_filename', help='Multifasta alignment file') parser.add_argument('--outgroup', '-o', help='Outgroup name for rerooting') parser.add_argument('--starting_tree', '-s', help='Starting tree') +parser.add_argument('--verbose', '-v', help='Debugging output') +parser.add_argument('--tree_builder', '-t', help='Application to use for tree building (raxml, fasttree), default is to use RAxML for 1st iteration and FastTree for rest', default = "hybrid") parser.add_argument('--iterations', '-i', help='Maximum No. of iterations, default is 5', default = 5) args = parser.parse_args() @@ -43,25 +103,50 @@ def reroot_tree_with_outgroup(tree_name, outgroup): previous_robinson_foulds_distance = 0 -for i in range(1, args.iterations+1): - current_tree = "RAxML_result."+base_filename_without_ext+"."+str(current_time) +".iteration_"+str(i) - previous_tree_name = base_filename - previous_tree = "" +tree_building_command = "" +gubbins_command = "" +previous_tree_name = "" +current_tree = "" - if i> 1: - previous_tree_name = "RAxML_result."+base_filename_without_ext+"."+str(current_time)+".iteration_"+ str(i-1) - previous_tree = "-t "+ previous_tree_name - base_filename = previous_tree_name +for i in range(1, args.iterations+1): - tree_building_command = TREE_BUILDER_EXEC+ " -s "+previous_tree_name+".phylip -n "+base_filename_without_ext+"."+str(current_time)+".iteration_"+str(i)+" "+previous_tree - gubbins_command = GUBBINS_EXEC+" -r "+ args.alignment_filename+" "+starting_base_filename+".vcf "+str(current_tree)+" "+starting_base_filename+".phylip" - print tree_building_command - print gubbins_command + if args.tree_builder == "hybrid" : + if i == 1: + previous_tree_name = raxml_previous_tree_name(base_filename_without_ext,base_filename, current_time,i) + current_tree = raxml_current_tree_name(base_filename_without_ext,current_time, i) + elif i == 2: + previous_tree_name = current_tree + current_tree = fasttree_current_tree_name(base_filename, i) + else: + previous_tree_name = fasttree_previous_tree_name(base_filename, i) + current_tree = fasttree_current_tree_name(base_filename, i) + tree_building_command = fasttree_tree_building_command(i, args.starting_tree, previous_tree_name,current_tree_name ) + gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename) + + elif args.tree_builder == "raxml": + previous_tree_name = raxml_previous_tree_name(base_filename_without_ext,base_filename, current_time,i) + current_tree = raxml_current_tree_name(base_filename_without_ext,current_time, i) + tree_building_command = raxml_tree_building_command(i,base_filename_without_ext,base_filename,current_time) + gubbins_command = raxml_gubbins_command(base_filename_without_ext,starting_base_filename,current_time, i,args.alignment_filename) + elif args.tree_builder == "fasttree": + previous_tree_name = fasttree_previous_tree_name(base_filename, i) + current_tree = fasttree_current_tree_name(base_filename, i) + + if i ==1: + os.rename(base_filename+".vcf", previous_tree_name+".vcf") + os.rename(base_filename+".phylip", previous_tree_name+".phylip") + os.rename(base_filename+".snp_sites.aln", previous_tree_name+".snp_sites.aln") + tree_building_command = fasttree_tree_building_command(i, args.starting_tree, previous_tree_name,current_tree_name ) + gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename) + + print tree_building_command if args.verbose + subprocess.call(tree_building_command, shell=True) - #subprocess.call(tree_building_command) reroot_tree_with_outgroup(str(current_tree), args.outgroup) - #subprocess.call(gubbins_command) - + + print gubbins_command if args.verbose + subprocess.call(gubbins_command, shell=True) + # first iteration creates tree 1 # 2nd iteration creates tree 2, and you can calculate first RF distance # 3rd iteration creates tree 3, and you can now compare RF distances with the previous iteration @@ -73,4 +158,5 @@ def reroot_tree_with_outgroup(tree_name, outgroup): break else: previous_robinson_foulds_distance = current_robinson_foulds_distance + \ No newline at end of file From 76c0c5c1c67b7a6493783ceb7a4a00a95df8546b Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 23 Apr 2012 10:34:11 +0100 Subject: [PATCH 02/16] verbose output --- run_gubbins.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/run_gubbins.py b/run_gubbins.py index 17a5d96e..4eece59b 100644 --- a/run_gubbins.py +++ b/run_gubbins.py @@ -86,7 +86,7 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ parser.add_argument('alignment_filename', help='Multifasta alignment file') parser.add_argument('--outgroup', '-o', help='Outgroup name for rerooting') parser.add_argument('--starting_tree', '-s', help='Starting tree') -parser.add_argument('--verbose', '-v', help='Debugging output') +parser.add_argument('--verbose', '-v', action='count', help='Turn on debugging') parser.add_argument('--tree_builder', '-t', help='Application to use for tree building (raxml, fasttree), default is to use RAxML for 1st iteration and FastTree for rest', default = "hybrid") parser.add_argument('--iterations', '-i', help='Maximum No. of iterations, default is 5', default = 5) args = parser.parse_args() @@ -139,12 +139,14 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ tree_building_command = fasttree_tree_building_command(i, args.starting_tree, previous_tree_name,current_tree_name ) gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename) - print tree_building_command if args.verbose + if args.verbose > 0: + print tree_building_command subprocess.call(tree_building_command, shell=True) reroot_tree_with_outgroup(str(current_tree), args.outgroup) - print gubbins_command if args.verbose + if args.verbose > 0: + print gubbins_command subprocess.call(gubbins_command, shell=True) # first iteration creates tree 1 From a9195e96db2555dcafd43220830f1c41429b5b6b Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 23 Apr 2012 11:06:14 +0100 Subject: [PATCH 03/16] fix hybrid command --- run_gubbins.py | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/run_gubbins.py b/run_gubbins.py index 4eece59b..2f1cee4a 100644 --- a/run_gubbins.py +++ b/run_gubbins.py @@ -55,8 +55,8 @@ def raxml_tree_building_command(i,base_filename_without_ext,base_filename,curren def raxml_gubbins_command(base_filename_without_ext,starting_base_filename,current_time, i,alignment_filename): - current_tree = raxml_current_tree_name(base_filename_without_ext,current_time, i) - return GUBBINS_EXEC+" -r "+ alignment_filename+" "+starting_base_filename+".vcf "+str(current_tree)+" "+starting_base_filename+".phylip" + current_tree_name = raxml_current_tree_name(base_filename_without_ext,current_time, i) + return GUBBINS_EXEC+" -r "+ alignment_filename+" "+starting_base_filename+".vcf "+str(current_tree_name)+" "+starting_base_filename+".phylip" def fasttree_current_tree_name(base_filename, i): return base_filename+".iteration_"+str(i) @@ -69,16 +69,16 @@ def fasttree_tree_building_command(i, starting_tree, previous_tree_name,current_ previous_tree_name = fasttree_previous_tree_name(base_filename, i) input_tree = "" - if starting_tree != "": - input_tree = " -intree" + starting_tree + if starting_tree is not None: + input_tree = " -intree" + starting_tree elif i > 1 : - input_tree = " -intree "+ previous_tree_name - + input_tree = " -intree "+ previous_tree_name + return FASTTREE_EXEC+" "+ input_tree+" "+ FASTTREE_PARAMS+" "+ previous_tree_name+".snp_sites.aln > "+current_tree_name def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_filename): - current_tree = fasttree_current_tree_name(base_filename, i) - return GUBBINS_EXEC+" -r "+ alignment_filename+" "+starting_base_filename+".vcf "+str(current_tree)+" "+starting_base_filename+".phylip" + current_tree_name = fasttree_current_tree_name(base_filename, i) + return GUBBINS_EXEC+" -r "+ alignment_filename+" "+starting_base_filename+".vcf "+str(current_tree_name)+" "+starting_base_filename+".phylip" @@ -88,7 +88,7 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ parser.add_argument('--starting_tree', '-s', help='Starting tree') parser.add_argument('--verbose', '-v', action='count', help='Turn on debugging') parser.add_argument('--tree_builder', '-t', help='Application to use for tree building (raxml, fasttree), default is to use RAxML for 1st iteration and FastTree for rest', default = "hybrid") -parser.add_argument('--iterations', '-i', help='Maximum No. of iterations, default is 5', default = 5) +parser.add_argument('--iterations', '-i', help='Maximum No. of iterations, default is 5', type=int, default = 5) args = parser.parse_args() # find all snp sites @@ -106,33 +106,38 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ tree_building_command = "" gubbins_command = "" previous_tree_name = "" -current_tree = "" +current_tree_name = "" for i in range(1, args.iterations+1): if args.tree_builder == "hybrid" : if i == 1: previous_tree_name = raxml_previous_tree_name(base_filename_without_ext,base_filename, current_time,i) - current_tree = raxml_current_tree_name(base_filename_without_ext,current_time, i) + current_tree_name = raxml_current_tree_name(base_filename_without_ext,current_time, i) + tree_building_command = raxml_tree_building_command(i,base_filename_without_ext,base_filename,current_time) + gubbins_command = raxml_gubbins_command(base_filename_without_ext,starting_base_filename,current_time, i,args.alignment_filename) elif i == 2: - previous_tree_name = current_tree - current_tree = fasttree_current_tree_name(base_filename, i) + previous_tree_name = current_tree_name + current_tree_name = fasttree_current_tree_name(base_filename, i) + tree_building_command = fasttree_tree_building_command(i, args.starting_tree, previous_tree_name,current_tree_name ) + gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename) else: previous_tree_name = fasttree_previous_tree_name(base_filename, i) - current_tree = fasttree_current_tree_name(base_filename, i) - tree_building_command = fasttree_tree_building_command(i, args.starting_tree, previous_tree_name,current_tree_name ) - gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename) + current_tree_name = fasttree_current_tree_name(base_filename, i) + tree_building_command = fasttree_tree_building_command(i, args.starting_tree, previous_tree_name,current_tree_name ) + gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename) elif args.tree_builder == "raxml": previous_tree_name = raxml_previous_tree_name(base_filename_without_ext,base_filename, current_time,i) - current_tree = raxml_current_tree_name(base_filename_without_ext,current_time, i) + current_tree_name = raxml_current_tree_name(base_filename_without_ext,current_time, i) tree_building_command = raxml_tree_building_command(i,base_filename_without_ext,base_filename,current_time) gubbins_command = raxml_gubbins_command(base_filename_without_ext,starting_base_filename,current_time, i,args.alignment_filename) + elif args.tree_builder == "fasttree": previous_tree_name = fasttree_previous_tree_name(base_filename, i) - current_tree = fasttree_current_tree_name(base_filename, i) + current_tree_name = fasttree_current_tree_name(base_filename, i) - if i ==1: + if i == 1: os.rename(base_filename+".vcf", previous_tree_name+".vcf") os.rename(base_filename+".phylip", previous_tree_name+".phylip") os.rename(base_filename+".snp_sites.aln", previous_tree_name+".snp_sites.aln") @@ -143,7 +148,7 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ print tree_building_command subprocess.call(tree_building_command, shell=True) - reroot_tree_with_outgroup(str(current_tree), args.outgroup) + reroot_tree_with_outgroup(str(current_tree_name), args.outgroup) if args.verbose > 0: print gubbins_command @@ -153,9 +158,9 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ # 2nd iteration creates tree 2, and you can calculate first RF distance # 3rd iteration creates tree 3, and you can now compare RF distances with the previous iteration if i> 1: - previous_robinson_foulds_distance = robinson_foulds_distance(previous_tree_name,current_tree) + previous_robinson_foulds_distance = robinson_foulds_distance(previous_tree_name,current_tree_name) elif i > 2: - current_robinson_foulds_distance = robinson_foulds_distance(previous_tree_name,current_tree) + current_robinson_foulds_distance = robinson_foulds_distance(previous_tree_name,current_tree_name) if math.ceil(current_robinson_foulds_distance) == math.ceil(previous_robinson_foulds_distance): break else: From 3d93f7af013423742b4c1545e56abdfb8b9dc9e2 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 23 Apr 2012 11:27:06 +0100 Subject: [PATCH 04/16] return required at end of methods --- run_gubbins.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run_gubbins.py b/run_gubbins.py index 2f1cee4a..4f90f040 100644 --- a/run_gubbins.py +++ b/run_gubbins.py @@ -17,7 +17,7 @@ def robinson_foulds_distance(input_tree_name,output_tree_name): input_tree = dendropy.Tree.get_from_path(input_tree_name, 'newick') output_tree = dendropy.Tree.get_from_path(output_tree_name, 'newick') - input_tree.robinson_foulds_distance(output_tree) + return input_tree.robinson_foulds_distance(output_tree) def reroot_tree_with_outgroup(tree_name, outgroup): @@ -28,7 +28,7 @@ def reroot_tree_with_outgroup(tree_name, outgroup): Phylo.write(tree, tree_name, 'newick') def raxml_current_tree_name(base_filename_without_ext,current_time, i): - "RAxML_result."+base_filename_without_ext+"."+str(current_time) +".iteration_"+str(i) + return "RAxML_result."+base_filename_without_ext+"."+str(current_time) +".iteration_"+str(i) def raxml_previous_tree_name(base_filename_without_ext,base_filename, current_time,i): From 6f5ee53968e33cb9732f1fc5aaa08df78ac62278 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 23 Apr 2012 11:52:53 +0100 Subject: [PATCH 05/16] use the original input file for fasttree --- run_gubbins.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/run_gubbins.py b/run_gubbins.py index 4f90f040..2a840751 100644 --- a/run_gubbins.py +++ b/run_gubbins.py @@ -64,7 +64,7 @@ def fasttree_current_tree_name(base_filename, i): def fasttree_previous_tree_name(base_filename, i): return base_filename+".iteration_"+str(i-1) -def fasttree_tree_building_command(i, starting_tree, previous_tree_name,current_tree_name ): +def fasttree_tree_building_command(i, starting_tree, current_tree_name,starting_base_filename ): current_tree_name = fasttree_current_tree_name(base_filename, i) previous_tree_name = fasttree_previous_tree_name(base_filename, i) @@ -74,7 +74,7 @@ def fasttree_tree_building_command(i, starting_tree, previous_tree_name,current_ elif i > 1 : input_tree = " -intree "+ previous_tree_name - return FASTTREE_EXEC+" "+ input_tree+" "+ FASTTREE_PARAMS+" "+ previous_tree_name+".snp_sites.aln > "+current_tree_name + return FASTTREE_EXEC+" "+ input_tree+" "+ FASTTREE_PARAMS+" "+ starting_base_filename+".snp_sites.aln > "+current_tree_name def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_filename): current_tree_name = fasttree_current_tree_name(base_filename, i) @@ -119,12 +119,12 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ elif i == 2: previous_tree_name = current_tree_name current_tree_name = fasttree_current_tree_name(base_filename, i) - tree_building_command = fasttree_tree_building_command(i, args.starting_tree, previous_tree_name,current_tree_name ) + tree_building_command = fasttree_tree_building_command(i, args.starting_tree,current_tree_name,args.alignment_filename ) gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename) else: previous_tree_name = fasttree_previous_tree_name(base_filename, i) current_tree_name = fasttree_current_tree_name(base_filename, i) - tree_building_command = fasttree_tree_building_command(i, args.starting_tree, previous_tree_name,current_tree_name ) + tree_building_command = fasttree_tree_building_command(i, args.starting_tree,current_tree_name,args.alignment_filename ) gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename) elif args.tree_builder == "raxml": @@ -141,7 +141,7 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ os.rename(base_filename+".vcf", previous_tree_name+".vcf") os.rename(base_filename+".phylip", previous_tree_name+".phylip") os.rename(base_filename+".snp_sites.aln", previous_tree_name+".snp_sites.aln") - tree_building_command = fasttree_tree_building_command(i, args.starting_tree, previous_tree_name,current_tree_name ) + tree_building_command = fasttree_tree_building_command(i, args.starting_tree,current_tree_name,args.alignment_filename ) gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename) if args.verbose > 0: From 50f5bfa849d5e86e7c2f295f890d24c3b9578b77 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 23 Apr 2012 11:57:03 +0100 Subject: [PATCH 06/16] provide previous tree to fasttree --- run_gubbins.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/run_gubbins.py b/run_gubbins.py index 2a840751..864a1f63 100644 --- a/run_gubbins.py +++ b/run_gubbins.py @@ -64,9 +64,8 @@ def fasttree_current_tree_name(base_filename, i): def fasttree_previous_tree_name(base_filename, i): return base_filename+".iteration_"+str(i-1) -def fasttree_tree_building_command(i, starting_tree, current_tree_name,starting_base_filename ): +def fasttree_tree_building_command(i, starting_tree, current_tree_name,starting_base_filename, previous_tree_name ): current_tree_name = fasttree_current_tree_name(base_filename, i) - previous_tree_name = fasttree_previous_tree_name(base_filename, i) input_tree = "" if starting_tree is not None: @@ -119,12 +118,12 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ elif i == 2: previous_tree_name = current_tree_name current_tree_name = fasttree_current_tree_name(base_filename, i) - tree_building_command = fasttree_tree_building_command(i, args.starting_tree,current_tree_name,args.alignment_filename ) + tree_building_command = fasttree_tree_building_command(i, args.starting_tree,current_tree_name,args.alignment_filename,previous_tree_name ) gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename) else: previous_tree_name = fasttree_previous_tree_name(base_filename, i) current_tree_name = fasttree_current_tree_name(base_filename, i) - tree_building_command = fasttree_tree_building_command(i, args.starting_tree,current_tree_name,args.alignment_filename ) + tree_building_command = fasttree_tree_building_command(i, args.starting_tree,current_tree_name,args.alignment_filename,previous_tree_name ) gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename) elif args.tree_builder == "raxml": @@ -141,7 +140,7 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ os.rename(base_filename+".vcf", previous_tree_name+".vcf") os.rename(base_filename+".phylip", previous_tree_name+".phylip") os.rename(base_filename+".snp_sites.aln", previous_tree_name+".snp_sites.aln") - tree_building_command = fasttree_tree_building_command(i, args.starting_tree,current_tree_name,args.alignment_filename ) + tree_building_command = fasttree_tree_building_command(i, args.starting_tree,current_tree_name,args.alignment_filename,previous_tree_name ) gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename) if args.verbose > 0: From 12abe317f7745043d0630972c95cb44bb23fb3bf Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 23 Apr 2012 11:57:19 +0100 Subject: [PATCH 07/16] FastTree without abs path --- run_gubbins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_gubbins.py b/run_gubbins.py index 864a1f63..f3511942 100644 --- a/run_gubbins.py +++ b/run_gubbins.py @@ -10,7 +10,7 @@ # config variables RAXML_EXEC = 'raxmlHPC -f d -m GTRGAMMA' -FASTTREE_EXEC = '/software/pathogen/external/apps/usr/local/fasttree/FastTree' +FASTTREE_EXEC = 'FastTree' FASTTREE_PARAMS = '-gtr -gamma -nt' GUBBINS_EXEC = './gubbins' From d34fe5ca4db54bacf366ca0e62f1e37ccfb670e9 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 23 Apr 2012 12:05:07 +0100 Subject: [PATCH 08/16] pass globals as params --- run_gubbins.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/run_gubbins.py b/run_gubbins.py index f3511942..9aacdaa4 100644 --- a/run_gubbins.py +++ b/run_gubbins.py @@ -48,15 +48,15 @@ def raxml_previous_tree(base_filename_without_ext, base_filename, current_time,i return previous_tree -def raxml_tree_building_command(i,base_filename_without_ext,base_filename,current_time): +def raxml_tree_building_command(i,base_filename_without_ext,base_filename,current_time, raxml_exec): previous_tree_name = raxml_previous_tree_name(base_filename_without_ext,base_filename, current_time,i) previous_tree = raxml_previous_tree(base_filename_without_ext, base_filename, current_time,i) - return RAXML_EXEC+ " -s "+previous_tree_name+".phylip -n "+base_filename_without_ext+"."+str(current_time)+".iteration_"+str(i)+" "+previous_tree + return raxml_exec+ " -s "+previous_tree_name+".phylip -n "+base_filename_without_ext+"."+str(current_time)+".iteration_"+str(i)+" "+previous_tree -def raxml_gubbins_command(base_filename_without_ext,starting_base_filename,current_time, i,alignment_filename): +def raxml_gubbins_command(base_filename_without_ext,starting_base_filename,current_time, i,alignment_filename,gubbins_exec): current_tree_name = raxml_current_tree_name(base_filename_without_ext,current_time, i) - return GUBBINS_EXEC+" -r "+ alignment_filename+" "+starting_base_filename+".vcf "+str(current_tree_name)+" "+starting_base_filename+".phylip" + return gubbins_exec+" -r "+ alignment_filename+" "+starting_base_filename+".vcf "+str(current_tree_name)+" "+starting_base_filename+".phylip" def fasttree_current_tree_name(base_filename, i): return base_filename+".iteration_"+str(i) @@ -64,7 +64,7 @@ def fasttree_current_tree_name(base_filename, i): def fasttree_previous_tree_name(base_filename, i): return base_filename+".iteration_"+str(i-1) -def fasttree_tree_building_command(i, starting_tree, current_tree_name,starting_base_filename, previous_tree_name ): +def fasttree_tree_building_command(i, starting_tree, current_tree_name,starting_base_filename, previous_tree_name,fasttree_exec, fasttree_params ): current_tree_name = fasttree_current_tree_name(base_filename, i) input_tree = "" @@ -73,11 +73,11 @@ def fasttree_tree_building_command(i, starting_tree, current_tree_name,starting_ elif i > 1 : input_tree = " -intree "+ previous_tree_name - return FASTTREE_EXEC+" "+ input_tree+" "+ FASTTREE_PARAMS+" "+ starting_base_filename+".snp_sites.aln > "+current_tree_name + return fasttree_exec+" "+ input_tree+" "+ fasttree_params+" "+ starting_base_filename+".snp_sites.aln > "+current_tree_name -def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_filename): +def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_filename,gubbins_exec): current_tree_name = fasttree_current_tree_name(base_filename, i) - return GUBBINS_EXEC+" -r "+ alignment_filename+" "+starting_base_filename+".vcf "+str(current_tree_name)+" "+starting_base_filename+".phylip" + return gubbins_exec+" -r "+ alignment_filename+" "+starting_base_filename+".vcf "+str(current_tree_name)+" "+starting_base_filename+".phylip" @@ -113,24 +113,24 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ if i == 1: previous_tree_name = raxml_previous_tree_name(base_filename_without_ext,base_filename, current_time,i) current_tree_name = raxml_current_tree_name(base_filename_without_ext,current_time, i) - tree_building_command = raxml_tree_building_command(i,base_filename_without_ext,base_filename,current_time) - gubbins_command = raxml_gubbins_command(base_filename_without_ext,starting_base_filename,current_time, i,args.alignment_filename) + tree_building_command = raxml_tree_building_command(i,base_filename_without_ext,base_filename,current_time,RAXML_EXEC) + gubbins_command = raxml_gubbins_command(base_filename_without_ext,starting_base_filename,current_time, i,args.alignment_filename,GUBBINS_EXEC) elif i == 2: previous_tree_name = current_tree_name current_tree_name = fasttree_current_tree_name(base_filename, i) - tree_building_command = fasttree_tree_building_command(i, args.starting_tree,current_tree_name,args.alignment_filename,previous_tree_name ) - gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename) + tree_building_command = fasttree_tree_building_command(i, args.starting_tree,current_tree_name,args.alignment_filename,previous_tree_name,FASTTREE_EXEC,FASTTREE_PARAMS ) + gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename,GUBBINS_EXEC) else: previous_tree_name = fasttree_previous_tree_name(base_filename, i) current_tree_name = fasttree_current_tree_name(base_filename, i) - tree_building_command = fasttree_tree_building_command(i, args.starting_tree,current_tree_name,args.alignment_filename,previous_tree_name ) - gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename) + tree_building_command = fasttree_tree_building_command(i, args.starting_tree,current_tree_name,args.alignment_filename,previous_tree_name,FASTTREE_EXEC,FASTTREE_PARAMS ) + gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename,GUBBINS_EXEC) elif args.tree_builder == "raxml": previous_tree_name = raxml_previous_tree_name(base_filename_without_ext,base_filename, current_time,i) current_tree_name = raxml_current_tree_name(base_filename_without_ext,current_time, i) - tree_building_command = raxml_tree_building_command(i,base_filename_without_ext,base_filename,current_time) - gubbins_command = raxml_gubbins_command(base_filename_without_ext,starting_base_filename,current_time, i,args.alignment_filename) + tree_building_command = raxml_tree_building_command(i,base_filename_without_ext,base_filename,current_time,RAXML_EXEC) + gubbins_command = raxml_gubbins_command(base_filename_without_ext,starting_base_filename,current_time, i,args.alignment_filename,GUBBINS_EXEC) elif args.tree_builder == "fasttree": previous_tree_name = fasttree_previous_tree_name(base_filename, i) @@ -140,8 +140,8 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ os.rename(base_filename+".vcf", previous_tree_name+".vcf") os.rename(base_filename+".phylip", previous_tree_name+".phylip") os.rename(base_filename+".snp_sites.aln", previous_tree_name+".snp_sites.aln") - tree_building_command = fasttree_tree_building_command(i, args.starting_tree,current_tree_name,args.alignment_filename,previous_tree_name ) - gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename) + tree_building_command = fasttree_tree_building_command(i, args.starting_tree,current_tree_name,args.alignment_filename,previous_tree_name,FASTTREE_EXEC,FASTTREE_PARAMS ) + gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename,GUBBINS_EXEC) if args.verbose > 0: print tree_building_command From 9edcec541c4a6dabb75762d70df3068b4f310348 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 23 Apr 2012 12:13:01 +0100 Subject: [PATCH 09/16] run rf distance for 2nd or more iteration --- run_gubbins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_gubbins.py b/run_gubbins.py index 9aacdaa4..7d16fdfe 100644 --- a/run_gubbins.py +++ b/run_gubbins.py @@ -156,7 +156,7 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ # first iteration creates tree 1 # 2nd iteration creates tree 2, and you can calculate first RF distance # 3rd iteration creates tree 3, and you can now compare RF distances with the previous iteration - if i> 1: + if i == 2: previous_robinson_foulds_distance = robinson_foulds_distance(previous_tree_name,current_tree_name) elif i > 2: current_robinson_foulds_distance = robinson_foulds_distance(previous_tree_name,current_tree_name) From 4b2a333be992088bc1a50404b4030344d6b5777b Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 23 Apr 2012 12:15:54 +0100 Subject: [PATCH 10/16] rf distance verbose --- run_gubbins.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/run_gubbins.py b/run_gubbins.py index 7d16fdfe..0081c12a 100644 --- a/run_gubbins.py +++ b/run_gubbins.py @@ -160,6 +160,8 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ previous_robinson_foulds_distance = robinson_foulds_distance(previous_tree_name,current_tree_name) elif i > 2: current_robinson_foulds_distance = robinson_foulds_distance(previous_tree_name,current_tree_name) + if args.verbose > 0: + print "RF Distance (previous, current): "+ previous_robinson_foulds_distance +", "+ current_robinson_foulds_distance if math.ceil(current_robinson_foulds_distance) == math.ceil(previous_robinson_foulds_distance): break else: From d1c4645c6c79e1d34d3c26e67cd7319b109c01ca Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 23 Apr 2012 12:27:04 +0100 Subject: [PATCH 11/16] import math --- run_gubbins.py | 1 + 1 file changed, 1 insertion(+) diff --git a/run_gubbins.py b/run_gubbins.py index 0081c12a..fc876c2e 100644 --- a/run_gubbins.py +++ b/run_gubbins.py @@ -7,6 +7,7 @@ import time from Bio import Phylo import dendropy +import math # config variables RAXML_EXEC = 'raxmlHPC -f d -m GTRGAMMA' From e585d1acf24ba7db94c5d6794435ba7ef02a346a Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 23 Apr 2012 12:32:04 +0100 Subject: [PATCH 12/16] convert RF distance to string before printing --- run_gubbins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_gubbins.py b/run_gubbins.py index fc876c2e..f00e4bd8 100644 --- a/run_gubbins.py +++ b/run_gubbins.py @@ -162,7 +162,7 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ elif i > 2: current_robinson_foulds_distance = robinson_foulds_distance(previous_tree_name,current_tree_name) if args.verbose > 0: - print "RF Distance (previous, current): "+ previous_robinson_foulds_distance +", "+ current_robinson_foulds_distance + print "RF Distance (previous, current): "+ str(previous_robinson_foulds_distance) +", "+ str(current_robinson_foulds_distance) if math.ceil(current_robinson_foulds_distance) == math.ceil(previous_robinson_foulds_distance): break else: From ea8a8e6608604ae5408fdac0d687c48eeba247eb Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 23 Apr 2012 12:39:14 +0100 Subject: [PATCH 13/16] dont round up RF distance --- run_gubbins.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run_gubbins.py b/run_gubbins.py index f00e4bd8..c4f11a35 100644 --- a/run_gubbins.py +++ b/run_gubbins.py @@ -7,7 +7,7 @@ import time from Bio import Phylo import dendropy -import math + # config variables RAXML_EXEC = 'raxmlHPC -f d -m GTRGAMMA' @@ -163,7 +163,7 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ current_robinson_foulds_distance = robinson_foulds_distance(previous_tree_name,current_tree_name) if args.verbose > 0: print "RF Distance (previous, current): "+ str(previous_robinson_foulds_distance) +", "+ str(current_robinson_foulds_distance) - if math.ceil(current_robinson_foulds_distance) == math.ceil(previous_robinson_foulds_distance): + if current_robinson_foulds_distance == previous_robinson_foulds_distance: break else: previous_robinson_foulds_distance = current_robinson_foulds_distance From b7f27bb5b7c3d5cc9db23427cd529fcd9c3a5ee0 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 23 Apr 2012 12:50:56 +0100 Subject: [PATCH 14/16] save all previous RF distances --- run_gubbins.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/run_gubbins.py b/run_gubbins.py index c4f11a35..c1f3da5a 100644 --- a/run_gubbins.py +++ b/run_gubbins.py @@ -7,6 +7,7 @@ import time from Bio import Phylo import dendropy +from array import * # config variables @@ -101,7 +102,7 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ current_time = int(time.time()) -previous_robinson_foulds_distance = 0 +previous_robinson_foulds_distances = array('d',[]) tree_building_command = "" gubbins_command = "" @@ -158,14 +159,16 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ # 2nd iteration creates tree 2, and you can calculate first RF distance # 3rd iteration creates tree 3, and you can now compare RF distances with the previous iteration if i == 2: - previous_robinson_foulds_distance = robinson_foulds_distance(previous_tree_name,current_tree_name) + previous_robinson_foulds_distances.append(robinson_foulds_distance(previous_tree_name,current_tree_name)) elif i > 2: current_robinson_foulds_distance = robinson_foulds_distance(previous_tree_name,current_tree_name) if args.verbose > 0: - print "RF Distance (previous, current): "+ str(previous_robinson_foulds_distance) +", "+ str(current_robinson_foulds_distance) - if current_robinson_foulds_distance == previous_robinson_foulds_distance: + print "RF Distance (previous, current): "+ str(previous_robinson_foulds_distances) +", "+ str(current_robinson_foulds_distance) + + try: + previous_robinson_foulds_distances.index(current_robinson_foulds_distance) break - else: - previous_robinson_foulds_distance = current_robinson_foulds_distance - + except ValueError: + previous_robinson_foulds_distances.append(current_robinson_foulds_distance) + \ No newline at end of file From 436b5155d8e428df8b8c015a5a19acecf3eed42e Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 23 Apr 2012 13:06:23 +0100 Subject: [PATCH 15/16] hybrid should run fasttree first --- run_gubbins.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/run_gubbins.py b/run_gubbins.py index c1f3da5a..446d557b 100644 --- a/run_gubbins.py +++ b/run_gubbins.py @@ -41,8 +41,7 @@ def raxml_previous_tree_name(base_filename_without_ext,base_filename, current_ti return previous_tree_name -def raxml_previous_tree(base_filename_without_ext, base_filename, current_time,i): - previous_tree_name = raxml_previous_tree_name(base_filename_without_ext,base_filename, current_time,i) +def raxml_previous_tree(base_filename_without_ext, base_filename, current_time,i,previous_tree_name): previous_tree = "" if i> 1: @@ -50,9 +49,8 @@ def raxml_previous_tree(base_filename_without_ext, base_filename, current_time,i return previous_tree -def raxml_tree_building_command(i,base_filename_without_ext,base_filename,current_time, raxml_exec): - previous_tree_name = raxml_previous_tree_name(base_filename_without_ext,base_filename, current_time,i) - previous_tree = raxml_previous_tree(base_filename_without_ext, base_filename, current_time,i) +def raxml_tree_building_command(i,base_filename_without_ext,base_filename,current_time, raxml_exec,previous_tree_name): + previous_tree = raxml_previous_tree(base_filename_without_ext, base_filename, current_time,i,previous_tree_name) return raxml_exec+ " -s "+previous_tree_name+".phylip -n "+base_filename_without_ext+"."+str(current_time)+".iteration_"+str(i)+" "+previous_tree @@ -88,7 +86,7 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ parser.add_argument('--outgroup', '-o', help='Outgroup name for rerooting') parser.add_argument('--starting_tree', '-s', help='Starting tree') parser.add_argument('--verbose', '-v', action='count', help='Turn on debugging') -parser.add_argument('--tree_builder', '-t', help='Application to use for tree building (raxml, fasttree), default is to use RAxML for 1st iteration and FastTree for rest', default = "hybrid") +parser.add_argument('--tree_builder', '-t', help='Application to use for tree building (raxml, fasttree), default is to use FastTree for 1st iteration and RAxML for rest', default = "hybrid") parser.add_argument('--iterations', '-i', help='Maximum No. of iterations, default is 5', type=int, default = 5) args = parser.parse_args() @@ -113,25 +111,26 @@ def fasttree_gubbins_command(base_filename,starting_base_filename, i,alignment_ if args.tree_builder == "hybrid" : if i == 1: - previous_tree_name = raxml_previous_tree_name(base_filename_without_ext,base_filename, current_time,i) - current_tree_name = raxml_current_tree_name(base_filename_without_ext,current_time, i) - tree_building_command = raxml_tree_building_command(i,base_filename_without_ext,base_filename,current_time,RAXML_EXEC) - gubbins_command = raxml_gubbins_command(base_filename_without_ext,starting_base_filename,current_time, i,args.alignment_filename,GUBBINS_EXEC) - elif i == 2: - previous_tree_name = current_tree_name - current_tree_name = fasttree_current_tree_name(base_filename, i) - tree_building_command = fasttree_tree_building_command(i, args.starting_tree,current_tree_name,args.alignment_filename,previous_tree_name,FASTTREE_EXEC,FASTTREE_PARAMS ) - gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename,GUBBINS_EXEC) - else: previous_tree_name = fasttree_previous_tree_name(base_filename, i) current_tree_name = fasttree_current_tree_name(base_filename, i) tree_building_command = fasttree_tree_building_command(i, args.starting_tree,current_tree_name,args.alignment_filename,previous_tree_name,FASTTREE_EXEC,FASTTREE_PARAMS ) gubbins_command = fasttree_gubbins_command(base_filename,starting_base_filename, i,args.alignment_filename,GUBBINS_EXEC) + + elif i == 2: + previous_tree_name = current_tree_name + current_tree_name = raxml_current_tree_name(base_filename_without_ext,current_time, i) + tree_building_command = raxml_tree_building_command(i,base_filename_without_ext,base_filename,current_time,RAXML_EXEC,previous_tree_name) + gubbins_command = raxml_gubbins_command(base_filename_without_ext,starting_base_filename,current_time, i,args.alignment_filename,GUBBINS_EXEC) + else: + previous_tree_name = raxml_previous_tree_name(base_filename_without_ext,base_filename, current_time,i) + current_tree_name = raxml_current_tree_name(base_filename_without_ext,current_time, i) + tree_building_command = raxml_tree_building_command(i,base_filename_without_ext,base_filename,current_time,RAXML_EXEC,previous_tree_name) + gubbins_command = raxml_gubbins_command(base_filename_without_ext,starting_base_filename,current_time, i,args.alignment_filename,GUBBINS_EXEC) elif args.tree_builder == "raxml": previous_tree_name = raxml_previous_tree_name(base_filename_without_ext,base_filename, current_time,i) current_tree_name = raxml_current_tree_name(base_filename_without_ext,current_time, i) - tree_building_command = raxml_tree_building_command(i,base_filename_without_ext,base_filename,current_time,RAXML_EXEC) + tree_building_command = raxml_tree_building_command(i,base_filename_without_ext,base_filename,current_time,RAXML_EXEC,previous_tree_name) gubbins_command = raxml_gubbins_command(base_filename_without_ext,starting_base_filename,current_time, i,args.alignment_filename,GUBBINS_EXEC) elif args.tree_builder == "fasttree": From f761d950d73f38fac19c7243fb1d0aac239b0281 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 23 Apr 2012 13:09:09 +0100 Subject: [PATCH 16/16] make python script executable --- run_gubbins.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 run_gubbins.py diff --git a/run_gubbins.py b/run_gubbins.py old mode 100644 new mode 100755