Skip to content
This repository has been archived by the owner on Dec 23, 2020. It is now read-only.

Commit

Permalink
Merge pull request #13 from a-slide/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
a-slide authored May 8, 2019
2 parents 650ef51 + 04afd22 commit 29654b9
Show file tree
Hide file tree
Showing 7 changed files with 1,058 additions and 1,480 deletions.
43 changes: 37 additions & 6 deletions NanopolishComp/Eventalign_collapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,18 @@
# Standard library imports
import multiprocessing as mp
from time import time
from collections import OrderedDict
from collections import *
import traceback
import datetime

# Third party imports
import numpy as np
from tqdm import tqdm

# Local imports
from NanopolishComp.common import file_readable, dir_writable, NanopolishCompError
from NanopolishComp.common import *
from NanopolishComp import __version__ as package_version
from NanopolishComp import __name__ as package_name

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~LOGGING INFO~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
import logging
Expand All @@ -47,7 +50,7 @@ def __init__ (self,
* input_fn
Path to a nanopolish eventalign tsv output file.
* outdir
Path to the output folder
Path to the output folder (will be created if it does exist yet)
* outprefix
text outprefix for all the files generated
* max_reads
Expand All @@ -65,21 +68,39 @@ def __init__ (self,
Reduce verbosity
"""

# Save init options in dict for later
kwargs = locals()

# Define overall verbose level
self.log = logging.getLogger()
if verbose:
self.log.setLevel (logging.DEBUG)
elif quiet:
self.log.setLevel (logging.WARNING)
else:
self.log.setLevel (logging.INFO)

# Collect args in dict for log report
self.option_d = OrderedDict()
self.option_d["package_name"] = package_name
self.option_d["package_version"] = package_version
self.option_d["timestamp"] = str(datetime.datetime.now())
for i, j in kwargs.items():
if i != "self":
self.option_d[i]=j
self.log.debug ("Options summary")
self.log.debug (dict_to_str(self.option_d))

# Verify parameters validity
self.log.info ("Checking arguments")
# Try to read input file if not a stream
self.log.debug("\tTesting input file readability")
if input_fn != 0 and not file_readable (input_fn):
raise IOError ("Cannot read input file")
self.log.info("Testing output dir writability")
if not dir_writable (outdir):
raise IOError ("Cannot write output file in indicated folder. Create the output folder if it does not exist yet")
# Try to create output folder
self.log.debug("\tCreating output folder")
mkdir(outdir, exist_ok=True)
# Check other args
self.log.debug("\tChecking number of threads")
if threads < 3:
raise ValueError ("At least 3 threads required")
Expand Down Expand Up @@ -128,6 +149,11 @@ def __init__ (self,
self.log.warning ("\nAn error occured. All processes were killed\n")
raise E

def __repr__ (self):
m = "General options:\n"
m+=dict_to_str(self.option_d)
return m

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PRIVATE METHODS~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
def _split_reads (self, in_q, error_q):
"""
Expand Down Expand Up @@ -307,6 +333,11 @@ def _write_output (self, out_q, error_q):
# Flag last line
data_fp.write ("#\n")

# Open log file
log_fn = os.path.join(self.outdir, self.outprefix+"_eventalign_collapse.log")
with open (log_fn, "w") as log_fp:
log_fp.write (str(self))

# Manage exceptions and deal poison pills
except Exception:
error_q.put (NanopolishCompError(traceback.format_exc()))
Expand Down
42 changes: 31 additions & 11 deletions NanopolishComp/Freq_meth_calculate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@
# Standard library imports
from collections import *
import csv
import datetime

# Third party imports
from tqdm import tqdm

# Local imports
from NanopolishComp.common import *
from NanopolishComp import __version__ as package_version
from NanopolishComp import __name__ as package_name

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~LOGGING INFO~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
import logging
Expand All @@ -37,7 +40,7 @@ def __init__ (self,
* input_fn
Path to a nanopolish call_methylation tsv output file
* outdir
Path to the output folder
Path to the output folder (will be created if it does exist yet)
* outprefix
text outprefix for all the files generated
* min_llr
Expand All @@ -56,21 +59,38 @@ def __init__ (self,
Reduce verbosity
"""

# Save init options in dict for later
kwargs = locals()

# Define overall verbose level
self.log = logging.getLogger()
if verbose:
self.log.setLevel (logging.DEBUG)
elif quiet:
self.log.setLevel (logging.WARNING)
else:
self.log.setLevel (logging.INFO)

# Collect args in dict for log report
self.option_d = OrderedDict()
self.option_d["package_name"] = package_name
self.option_d["package_version"] = package_version
self.option_d["timestamp"] = str(datetime.datetime.now())
for i, j in kwargs.items():
if i != "self":
self.option_d[i]=j
self.log.debug ("Options summary")
self.log.debug (dict_to_str(self.option_d))

# Verify parameters validity
self.log.warning ("## Checking arguments ##")
self.log.info("Test input file readability")
# Try to read input file if not a stream
self.log.debug("\tTesting input file readability")
if input_fn != 0 and not file_readable (input_fn):
raise IOError ("Cannot read input file")
self.log.info("Testing output dir writability")
if not dir_writable (outdir):
raise IOError ("Cannot write output file in indicated folder. Create the output folder if it does not exist yet")
# Try to create output folder
self.log.debug("\tCreating output folder")
mkdir(outdir, exist_ok=True)

if motif == "cpg":
motif_seq = "CG"
Expand Down Expand Up @@ -144,7 +164,7 @@ def _parse_methylation_calls(self, fn, min_llr=2.5, min_depth=10, min_meth_freq=

# Print read level counter summary
self.log.debug ("Read sites summary")
self.log.debug (counter_to_str(self.site_c))
self.log.debug (dict_to_str(self.site_c))

self.log.info ("Filtering out positions with low coverage or methylation frequency")
filtered_sites_d = OrderedDict()
Expand All @@ -164,16 +184,17 @@ def _parse_methylation_calls(self, fn, min_llr=2.5, min_depth=10, min_meth_freq=

# Print genomic positions level counter summary
self.log.debug ("Genomic positions summary")
self.log.debug (counter_to_str(self.pos_c))
self.log.debug (dict_to_str(self.pos_c))

return filtered_sites_d

def __repr__ (self):
m = ""
m = "General options:\n"
m+=dict_to_str(self.option_d)
m+="Read sites summary:\n"
m+=counter_to_str(self.site_c)
m+=dict_to_str(self.site_c)
m+="Genomic positions summary:\n"
m+=counter_to_str(self.pos_c)
m+=dict_to_str(self.pos_c)
return m

def _write_output (self, sites_d, outdir, outprefix):
Expand Down Expand Up @@ -202,7 +223,6 @@ def _write_output (self, sites_d, outdir, outprefix):
self.log.info("Writing log file")
fn = os.path.join(outdir, outprefix+"_freq_meth_calculate.log")
with open (fn, "w") as fp:
# Write header
fp.write (str(self))

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~HELPER CLASS~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
Expand Down
2 changes: 1 addition & 1 deletion NanopolishComp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-

# Define self package variable
__version__ = "0.6.1"
__version__ = "0.6.2"
__description__ = 'NanopolishComp is a Python3 package for downstream analyses of Nanopolish output files'

# Collect info in a dictionary for setup.py
Expand Down
4 changes: 2 additions & 2 deletions NanopolishComp/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def main(args=None):
subparser_ec.set_defaults(func=Eventalign_collapse_main)
subparser_ec_io = subparser_ec.add_argument_group("Input/Output options")
subparser_ec_io.add_argument("-i", "--input_fn", default=0, help="Path to a nanopolish eventalign tsv output file. If '0' read from std input (default: %(default)s)")
subparser_ec_io.add_argument("-o", "--outdir", type=str, default="./", help="Path to the output folder (default: %(default)s)")
subparser_ec_io.add_argument("-o", "--outdir", type=str, default="./", help="Path to the output folder (will be created if it does exist yet) (default: %(default)s)")
subparser_ec_io.add_argument("-p", "--outprefix", type=str, default="out", help="text outprefix for all the files generated (default: %(default)s)")
subparser_ec_rp = subparser_ec.add_argument_group("Run parameters options")
subparser_ec_rp.add_argument("-s", "--write_samples", default=False, action='store_true', help="If given, will write the raw sample if nanopolish eventalign was ran with --samples option (default: %(default)s)")
Expand All @@ -44,7 +44,7 @@ def main(args=None):
subparser_fm.set_defaults(func=Freq_meth_calculate_main)
subparser_fm_io = subparser_fm.add_argument_group("Input/Output options")
subparser_fm_io.add_argument("-i", "--input_fn", default=0, help="Path to a nanopolish call_methylation tsv output file. If not specified read from std input")
subparser_fm_io.add_argument("-o", "--outdir", type=str, default="./", help="Path to the output folder (default: %(default)s)")
subparser_fm_io.add_argument("-o", "--outdir", type=str, default="./", help="Path to the output folder (will be created if it does exist yet) (default: %(default)s)")
subparser_fm_io.add_argument("-p", "--outprefix", type=str, default="out", help="text outprefix for all the files generated (default: %(default)s)")
subparser_fm_fo = subparser_fm.add_argument_group("Filtering options")
subparser_fm_fo.add_argument("-l", "--min_llr", type=float, default=2.5, help="Log likelihood ratio threshold (default: %(default)s)")
Expand Down
19 changes: 15 additions & 4 deletions NanopolishComp/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ def dir_writable (fn, **kwargs):
fn = os.path.dirname(fn)
return os.path.dirname(fn) and os.access (fn, os.W_OK)

def mkdir (fn, exist_ok=False):
""" Create directory recursivelly. Raise IO error if path exist or if error at creation """
try:
os.makedirs (fn, exist_ok=exist_ok)
except:
raise NanopolishCompError ("Error creating output folder `{}`".format(fn))

def numeric_cast_dict (d):
"""Cast str values to integer or float from a dict """
for k, v in d.items():
Expand All @@ -48,11 +55,15 @@ def find_subseq_index (seq, subseq):
yield i
i = seq.find(subseq, i+1)

def counter_to_str (c):
""" Transform a counter dict to a tabulated str """
def dict_to_str (c):
""" Transform a dict to a tabulated str """
m = ""
for i, j in c.most_common():
m += "\t{}: {:,}\n".format(i, j)
if type(c) == Counter:
for i, j in c.most_common():
m += "\t{}: {:,}\n".format(i, j)
else:
for i, j in c.items():
m += "\t{}: {}\n".format(i, j)
return m

def jhelp (f:"python function or method"):
Expand Down
Loading

0 comments on commit 29654b9

Please sign in to comment.