From 0883fbe6be4f80ae9dbde6b8be3ba410fd95366a Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 11 Jul 2018 21:56:05 +0000 Subject: [PATCH 1/2] Made Python 3 compatible --- scripts/srst2.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/scripts/srst2.py b/scripts/srst2.py index ac174ef..65c15c2 100755 --- a/scripts/srst2.py +++ b/scripts/srst2.py @@ -305,8 +305,8 @@ def parse_fai(fai_file,db_type,delimiter): gene_clusters.append(gene_cluster) if len(delimiter_check) > 0: - print "Warning! MLST delimiter is " + delimiter + " but these genes may violate the pattern and cause problems:" - print ",".join(delimiter_check) + print("Warning! MLST delimiter is " + delimiter + " but these genes may violate the pattern and cause problems:") + print(",".join(delimiter_check)) return size, gene_clusters, unique_gene_symbols, unique_allele_symbols, gene_cluster_symbols @@ -697,7 +697,7 @@ def run_bowtie(mapping_files_pre,sample_name,fastqs,args,db_name,db_full_path): try: command += ['-u',str(int(args.stop_after))] except ValueError: - print "WARNING. You asked to stop after mapping '" + args.stop_after + "' reads. I don't understand this, and will map all reads. Please speficy an integer with --stop_after or leave this as default to map 1 million reads." + print("WARNING. You asked to stop after mapping '" + args.stop_after + "' reads. I don't understand this, and will map all reads. Please speficy an integer with --stop_after or leave this as default to map 1 million reads.") if args.other: x = args.other @@ -805,12 +805,12 @@ def calculate_ST(allele_scores, ST_db, gene_names, sample_name, mlst_delimiter, try: clean_st = ST_db[allele_string] except KeyError: - print "This combination of alleles was not found in the sequence type database:", - print sample_name, + print("This combination of alleles was not found in the sequence type database:",) + print(sample_name,) for gene in allele_scores: (allele,diffs,depth_problems,divergence) = allele_scores[gene] - print allele, - print + print(allele,) + print() clean_st = "NF" else: clean_st = "ND" @@ -847,7 +847,7 @@ def parse_ST_database(ST_filename,gene_names_from_fai): ST_db = {} # key = allele string, value = ST gene_names = [] num_gene_cols_expected = len(gene_names_from_fai) - print "Attempting to read " + str(num_gene_cols_expected) + " loci from ST database " + ST_filename + print("Attempting to read " + str(num_gene_cols_expected) + " loci from ST database " + ST_filename) with open(ST_filename) as f: count = 0 for line in f: @@ -857,23 +857,23 @@ def parse_ST_database(ST_filename,gene_names_from_fai): gene_names = line_split[1:min(num_gene_cols_expected+1,len(line_split))] for g in gene_names_from_fai: if g not in gene_names: - print "Warning: gene " + g + " in database file isn't among the columns in the ST definitions: " + ",".join(gene_names) - print " Any sequences with this gene identifer from the database will not be included in typing." + print("Warning: gene " + g + " in database file isn't among the columns in the ST definitions: " + ",".join(gene_names)) + print(" Any sequences with this gene identifer from the database will not be included in typing.") if len(line_split) == num_gene_cols_expected+1: gene_names.pop() # we read too many columns num_gene_cols_expected -= 1 for g in gene_names: if g not in gene_names_from_fai: - print "Warning: gene " + g + " in ST definitions file isn't among those in the database " + ",".join(gene_names_from_fai) - print " This will result in all STs being called as unknown (but allele calls will be accurate for other loci)." + print("Warning: gene " + g + " in ST definitions file isn't among those in the database " + ",".join(gene_names_from_fai)) + print(" This will result in all STs being called as unknown (but allele calls will be accurate for other loci).") else: ST = line_split[0] if ST not in ST_db.values(): ST_string = " ".join(line_split[1:num_gene_cols_expected+1]) ST_db[ST_string] = ST else: - print "Warning: this ST is not unique in the ST definitions file: " + ST - print "Read ST database " + ST_filename + " successfully" + print("Warning: this ST is not unique in the ST definitions file: " + ST) + print("Read ST database " + ST_filename + " successfully") return (ST_db, gene_names) def get_allele_name_from_db(allele,run_type,args,unique_allele_symbols=False,unique_cluster_symbols=False): @@ -1655,9 +1655,9 @@ def main(): if not os.path.exists(output_dir): try: os.makedirs(output_dir) - print "Created directory " + output_dir + " for output" + print("Created directory " + output_dir + " for output") except: - print "Error. Specified output as " + args.output + " however the directory " + output_dir + " does not exist and our attempt to create one failed." + print("Error. Specified output as " + args.output + " however the directory " + output_dir + " does not exist and our attempt to create one failed.") if args.log is True: logfile = args.output + ".log" @@ -1702,9 +1702,9 @@ def main(): if not args.mlst_definitions: # print warning to screen to alert user, may want to stop and restart - print "Warning, MLST allele sequences were provided without ST definitions:" - print " allele sequences: " + str(args.mlst_db) - print " these will be mapped and scored, but STs can not be calculated" + print("Warning, MLST allele sequences were provided without ST definitions:") + print(" allele sequences: " + str(args.mlst_db)) + print(" these will be mapped and scored, but STs can not be calculated") # log logging.info("Warning, MLST allele sequences were provided without ST definitions:") From 217dc1cb8db3aa88d2c27111f6f6da199424ca9d Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 12 Jul 2018 01:05:15 +0000 Subject: [PATCH 2/2] made python 3 compatible, updated samtools and bowtie2 versions --- scripts/srst2.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/scripts/srst2.py b/scripts/srst2.py index 65c15c2..af9f8e0 100755 --- a/scripts/srst2.py +++ b/scripts/srst2.py @@ -613,11 +613,11 @@ def check_command_version(command_list, version_identifier, command_name, requir # allow multiple specific versions that have been specifically tested def check_bowtie_version(): return check_command_versions([get_bowtie_execs()[0], '--version'], 'version ', 'bowtie', - ['2.1.0','2.2.3','2.2.4','2.2.5','2.2.6','2.2.7','2.2.8','2.2.9']) + ['2.1.0','2.2.3','2.2.4','2.2.5','2.2.6','2.2.7','2.2.8','2.2.9','2.3.2']) def check_samtools_version(): return check_command_versions([get_samtools_exec()], 'Version: ', 'samtools', - ['0.1.18','0.1.19','1.0','1.1','1.2','1.3','(0.1.18 is ' + ['1.5', '0.1.18','0.1.19','1.0','1.1','1.2','1.3','(0.1.18 is ' 'recommended)']) def check_command_versions(command_list, version_prefix, command_name, required_versions): @@ -635,7 +635,9 @@ def check_command_versions(command_list, version_prefix, command_name, required_ command_stdout = e.output for v in required_versions: - if version_prefix + v in command_stdout: + logging.error("COMMAND_STDOUT:") + logging.error(command_stdout) + if (str(version_prefix) + str(v)).encode() in command_stdout: return v logging.error("Incorrect version of {} installed.".format(command_name)) @@ -956,7 +958,7 @@ def parse_scores(run_type,args,scores, hash_edge_depth, for gene in scores_by_gene: gene_hash = scores_by_gene[gene] - scores_sorted = sorted(gene_hash.iteritems(),key=operator.itemgetter(1)) # sort by score + scores_sorted = sorted(gene_hash.items(),key=operator.itemgetter(1)) # sort by score (top_allele,top_score) = scores_sorted[0] # check if depth is adequate for confident call @@ -1279,7 +1281,7 @@ def process_fasta_db(args, fileSets, run_type, db_reports, db_results_list, fast db_path, db_name = os.path.split(fasta) # database (db_name,db_ext) = os.path.splitext(db_name) db_results = "__".join([args.output,run_type,db_name,"results.txt"]) - db_report = file(db_results,"w") + db_report = open(db_results, "w") # file(db_results,"w") db_reports.append(db_results) # Get sequence lengths and gene names @@ -1475,9 +1477,9 @@ def map_fileSet_to_db(args, sample_name, fastq_inputs, db_name, fasta, size, gen full_results = "__".join([args.output,"fullgenes",db_name,"results.txt"]) logging.info("Printing verbose gene detection results to " + full_results) if os.path.exists(full_results): - f = file(full_results,"a") + f = open(full_results,"a") else: - f = file(full_results,"w") # create and write header + f = open(full_results,"w") # create and write header f.write("\t".join(["Sample","DB","gene","allele","coverage","depth","diffs","uncertainty","divergence","length", "maxMAF","clusterid","seqid","annotation"])+"\n") for gene in allele_scores: (allele,diffs,depth_problem,divergence) = allele_scores[gene] # gene = top scoring alleles for each cluster