diff --git a/Dockerfile b/Dockerfile index 5395c5d..e7e4785 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,7 +12,9 @@ RUN apt-get --allow-releaseinfo-change update -t oldoldstable && apt-get instal mariadb-server \ mariadb-client \ wget \ - zlib1g-dev + zlib1g-dev \ + procps \ + && rm -rf /var/lib/apt/lists/* RUN conda install -c daler \ pip \ @@ -24,7 +26,9 @@ RUN conda install -c daler \ pandas \ pyyaml \ sphinx \ - pysam + pysam \ + colorama \ + termcolor RUN conda install -c daler \ tabix \ bedtools=2.25.0 @@ -32,6 +36,4 @@ ENV DISPLAY=:0 ENV LANG C.UTF-8 WORKDIR /opt/pybedtools -RUN pip install https://github.com/AndersenLab/bam-toolbox/archive/0.0.3.tar.gz -RUN apt-get install -y procps \ - && rm -rf /var/lib/apt/lists/* \ No newline at end of file +RUN pip install https://github.com/AndersenLab/bam-toolbox/archive/1.0.0.tar.gz \ No newline at end of file diff --git a/README.md b/README.md index 0b111a8..22e8049 100644 --- a/README.md +++ b/README.md @@ -3,12 +3,12 @@ ## Installation ``` -pip install https://github.com/AndersenLab/bam-toolbox/archive/0.0.3.tar.gz +pip install https://github.com/AndersenLab/bam-toolbox/archive/1.0.0.tar.gz ``` ## Usage - bam-toolbox 0.1 + bam-toolbox 1.0 usage: bam [...] diff --git a/bam/__init__.py b/bam/__init__.py index e344246..d538f87 100644 --- a/bam/__init__.py +++ b/bam/__init__.py @@ -1 +1 @@ -__version__ = "0.0.3" \ No newline at end of file +__version__ = "1.0.0" \ No newline at end of file diff --git a/bam/cli.py b/bam/cli.py index 80beff9..c69a0d8 100755 --- a/bam/cli.py +++ b/bam/cli.py @@ -9,13 +9,19 @@ coverage """ -from docopt import docopt from subprocess import call, check_output, CalledProcessError -from clint.textui import colored, puts, indent import sys -import bam import os +from docopt import docopt +from colorama import Fore, just_fix_windows_console, init +from termcolor import colored + +import bam + + +just_fix_windows_console() +init(autoreset=True) debug = None if len(sys.argv) == 1: @@ -46,7 +52,7 @@ def is_exe(fpath): def main(): args = docopt(__doc__, - version='bam-toolbox v0.1', + version='bam-toolbox v1.0', argv = debug, options_first=True) argv = [args['']] + args[''] @@ -59,33 +65,27 @@ def main(): for install_name, program in program_list.items(): check_output(["brew", "tap", "homebrew/science"]) try: - with indent(4): - puts(colored.blue("Installing " + install_name)) + print(Fore.BLUE + " Installing " + install_name) check_output(["brew", "install", install_name]) program_installed.remove(install_name) except CalledProcessError: try: check_output(["which", program]) - with indent(4): - puts(colored.blue(program + " previously installed")) + print(Fore.BLUE + " " + program + " previously installed") program_installed.remove(install_name) except CalledProcessError: - with indent(4): - puts(colored.red("Error installing " + install_name)) + print(Fore.RED + " Error installing " + install_name) if len(program_installed) == 0: - with indent(4): - puts(colored.blue("Programs successfully installed!")) + print(Fore.BLUE + " Programs successfully installed!") else: - with indent(4): - puts(colored.red("Error: Not all programs successfully installed: " + ", ".join(program_installed))) + print(Fore.RED + " Error: Not all programs successfully installed: " + ", ".join(program_installed)) elif args[""] == "": print(__doc__) for prog in program_list.values(): try: check_output(["which", prog]) except CalledProcessError: - with indent(4): - puts(colored.red(prog + " not installed. Use a package manager to install or try using 'tb.py setup'\n")) + print(Fore.RED + " " + prog + " not installed. Use a package manager to install or try using 'tb.py setup'\n") elif args[''] in ['coverage', 'readgroups', 'fastq']: comm = ['python', getScriptPath() + '/' + args[""] + ".py"] + argv exit(call(comm)) diff --git a/bam/coverage.py b/bam/coverage.py index 65a9320..fc9a9f4 100644 --- a/bam/coverage.py +++ b/bam/coverage.py @@ -12,16 +12,20 @@ --header print header """ -from docopt import docopt +import sys from collections import OrderedDict -from clint.textui import colored, indent, puts_err import os import re from subprocess import Popen, PIPE - from datetime import datetime from collections import OrderedDict +from docopt import docopt +from colorama import Fore, just_fix_windows_console, init +from termcolor import colored + +just_fix_windows_console() +init(autoreset=True) class output_line: @@ -72,14 +76,14 @@ def __init__(self, fname, mtchr = None): def parse_header(self): header, err = Popen(["samtools", "view", "-H", self.fname], stdout=PIPE, stderr=PIPE).communicate() - if err != "": + if err != b"": raise Exception(err) self.header = header contigs = OrderedDict() contig_regions = [] - for x in re.findall("@SQ\WSN:(?P[A-Za-z0-9_]*)\WLN:(?P[0-9]+)", header): - contigs[x[0]] = int(x[1]) - region = "%s:%s-%s" % (x[0], "1", x[1]) + for x in re.findall(b"@SQ\t[A-Za-z0-9]*SN:(?P[A-Za-z0-9_]*)[A-Za-z0-9]*\tLN:(?P[0-9]+)", header): + contigs[x[0].decode('utf-8')] = int(x[1]) + region = "%s:%s-%s" % (x[0].decode('utf-8'), "1", x[1].decode('utf-8')) contig_regions.append(region) self.contigs = contigs self.contig_regions = contig_regions @@ -87,12 +91,12 @@ def parse_header(self): mtchr = [x for x in self.contigs.keys() if x.lower().find("m") == 0] if len(mtchr) == 1: self.mtchr = mtchr[0] - with indent(4): - puts_err(colored.blue("\nGuessing Mitochondrial Chromosome: " + self.mtchr + "\n")) + print(Fore.BLUE + "\n Guessing Mitochondrial Chromosome: " + self.mtchr + "\n", + file=sys.stderr) self.genome_length = sum(contigs.values()) if mtchr: - self.nuclear_length = sum([x for x in contigs.values() if x != contigs[self.mtchr]]) + self.nuclear_length = sum([x for x in self.contigs.values() if x != self.contigs[self.mtchr]]) def sum_coverage(self, region=None): @@ -101,7 +105,7 @@ def sum_coverage(self, region=None): pos_covered = 0 cum_depth = 0 for row in comm.stdout: - chrom, pos, depth = row.strip().split("\t") + chrom, pos, depth = row.strip().split(b"\t") pos_covered += 1 cum_depth += int(depth) return pos_covered, cum_depth @@ -109,7 +113,7 @@ def sum_coverage(self, region=None): def iterate_window(bamfile, size): for chrom, size in bamfile.contigs.items(): - for i in xrange(1, size, window): + for i in range(1, size, window): if i + window > size: end = size else: @@ -137,10 +141,9 @@ def calc_coverage(bamfile, regions=None, mtchr=None): # If end extends to far, adjust for chrom chrom_len = bamfile.contigs[chrom] if end > chrom_len: - m = "\nSpecified chromosome end extends beyond chromosome length. Set to max of: " - with indent(4): - puts_err(colored.yellow(m + str(chrom_len) + "\n")) - end = chrom_len + m = "\n Specified chromosome end extends beyond chromosome length. Set to max of: " + print(Fore.YELLOW + m + str(chrom_len) + "\n", file=sys.stderr) + end = chrom_len region = "{c}:{s}-{e}".format(c=chrom, s=start, e=end + 1) pos_covered, cum_depth = bamfile.sum_coverage(region) @@ -199,13 +202,14 @@ def calc_coverage(bamfile, regions=None, mtchr=None): Calculate coverage genome wide """ bam = args[""] + print(b.contig_regions, file=sys.stderr) cov = calc_coverage(b, b.contig_regions) # Genomewide depth output_dir = {} + output_dir["chrom"] = "genome" output_dir["start"] = 1 output_dir["end"] = b.genome_length - output_dir["chrom"] = "genome" bases_mapped = sum([x["bases_mapped"] for x in cov]) output_dir["ATTR"] = "bases_mapped" @@ -224,8 +228,8 @@ def calc_coverage(bamfile, regions=None, mtchr=None): if b.mtchr: # Nuclear - output_dir["end"] = b.nuclear_length output_dir["chrom"] = "nuclear" + output_dir["end"] = b.nuclear_length bases_mapped = sum([x["bases_mapped"] for x in cov if x["chrom"] != b.mtchr]) output_dir["ATTR"] = "bases_mapped" print(output_line(bam_name, output_dir, bases_mapped)) @@ -243,6 +247,6 @@ def calc_coverage(bamfile, regions=None, mtchr=None): print(output_line(bam_name, output_dir, pos_mapped)) # mt:nuclear ratio - output_dir = {"start": 1, "end": b.nuclear_length, "chrom": "genome", "ATTR": "mt_nuclear_ratio"} + output_dir = {"chrom": "genome", "start": 1, "end": b.nuclear_length, "ATTR": "mt_nuclear_ratio"} mt_nuc = [x for x in cov if x["chrom"] == b.mtchr][0]["depth_of_coverage"] / coverage print(output_line(bam_name, output_dir, mt_nuc)) diff --git a/requirements.txt b/requirements.txt index 5068ee2..246cb28 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ -clint +colorama +termcolor docopt pandas pybedtools \ No newline at end of file