Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bug fixes #2

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion common_tads.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from tools.common_domains import find_common_domains, common_domains_multiple_sets, save_domains_matrix
from os import path
import glob
from tools.str2bool import str2bool


def main():
Expand All @@ -28,7 +29,7 @@ def main():
parser.add_argument("-o", "--output",
help="Directory to save output file. Output is saved only when analysing multiple sets of TADs "
"(When --bedfile_1 is a directory. If None save in input directory.", default=None)
parser.add_argument("-r", "--report", help="If True print output matrix to stdout. Default=True", default=True, type=bool)
parser.add_argument("-r", "--report", help="If True print output matrix to stdout. Default=True", default=True, type=str2bool)
parser.add_argument("-s", "--shift", default=0, type=int,
help="Accepted shift of two domain boundaries positions in base pair.")
args = parser.parse_args()
Expand Down
3 changes: 2 additions & 1 deletion moc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
save_moc_matrix, add_row_and_columns_id
from os import path
import glob
from tools.str2bool import str2bool


def main():
Expand All @@ -23,7 +24,7 @@ def main():
parser.add_argument("-o", "--output",
help="Directory to save output file. Output is saved only when analysing multiple sets of TADs "
"(When --bedfile_1 is a directory. If None save in input directory.", default=None)
parser.add_argument("-r", "--report", help="If True return MoC to stdout. Default=True", default=True)
parser.add_argument("-r", "--report", help="If True return MoC to stdout. Default=True", default=True, type=str2bool)
args = parser.parse_args()

if args.output:
Expand Down
3 changes: 2 additions & 1 deletion plot_venn.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import glob
from tools.measure_of_concordance import read_domains_from_bedfile
from tools.common_domains import plot_venn_diagram_of_3_sets, plot_venn_diagram_of_2_sets
from tools.str2bool import str2bool


def main():
Expand All @@ -16,7 +17,7 @@ def main():
help='List of files (two or three) with different domains sets.', required=True)
parser.add_argument("-o", "--output",
help="Directory or filename to save a plot in.", default=None)
parser.add_argument("-s", "--show", help="If True show the plot.", default=True, type=bool)
parser.add_argument("-s", "--show", help="If True show the plot.", default=True, type=str2bool)
args = parser.parse_args()

if args.output:
Expand Down
32 changes: 8 additions & 24 deletions tools/common_domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,38 +14,21 @@ def find_common_domains(set1, set2, shift):
common_domains = []
for i in range(len(set1)):
for j in range(len(set2)):
if shift == 0:
if check_domains(domain1=set1[i], domain2=set2[j]):
common_domains.append(set1[i])
else:
if check_shifted_domains(domain1=set1[i], domain2=set2[j], shift=shift):
common_domains.append(set1[i])
if check_shifted_domains(domain1=set1[i], domain2=set2[j], shift=shift):
common_domains.append(set1[i])
return common_domains


def check_shifted_domains(domain1, domain2, shift):
"""Check if two domains positions are identical (with accepted shift)"""
if domain1[0] != domain2[0]:
return False
if domain2[1] - shift < 0:
start = 0
else:
start = domain2[1] - shift
if domain1[1] in range(start, domain2[1] + shift):
if domain1[2] in range(domain2[2] - shift, domain2[2] + shift):
if domain2[1] - shift <= domain1[1] <= domain2[1] + shift:
if domain2[2] - shift <= domain1[2] <= domain2[2] + shift:
return True
return False


def check_domains(domain1, domain2):
"""Check if two domains have exact same boundaries"""
if domain2[0] != domain1[0]:
return False
if domain1[1] == domain2[1] and domain1[2] == domain2[2]:
return True
return False


def save_domains_matrix(tad_matrix, outfile):
"""Format nicely and save conserved domains matrix with sets names"""
tad_matrix.to_csv(outfile, sep=",", header=True, index=True)
Expand All @@ -56,10 +39,11 @@ def common_domains_multiple_sets(domains_sets, shift):
"""Return a matrix with common domains for different sets."""
matr = np.zeros((len(domains_sets), len(domains_sets)), dtype=int)
for i in range(len(domains_sets)):
for j in range(len(domains_sets)):
for j in range(i, len(domains_sets)):
if i == j:
continue
matr[i][j] = len(find_common_domains(set1=domains_sets[i], set2=domains_sets[j], shift=shift))
matr[i][j] = len(domains_sets[i])
else:
matr[i][j] = matr[j][i] = len(find_common_domains(set1=domains_sets[i], set2=domains_sets[j], shift=shift))
return matr


Expand Down
11 changes: 7 additions & 4 deletions tools/measure_of_concordance.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,27 @@ def read_domains_from_bedfile(bedfile):
def calculate_moc(set1, set2):
n1 = len(set1) # number fo domains in set 1
n2 = len(set2) # number of domains in set 2
if set1 == set2 == 1:
if n1 == n2 == 1:
return 1
moc = 0
for i in range(n1):
p_i = set1[i][2] - set1[i][1] # domain length
for j in range(n2):
if set1[i][0] == set2[j][0]: # check if same chromosome
q_j = set2[j][2] - set2[j][1] # domain length
overlap = len(check_overlap(start1=set1[i][1], end1=set1[i][2], start2=set2[j][1],
end2=set2[j][2])) # calculate overlap in bp
overlap = check_overlap(start1=set1[i][1], end1=set1[i][2], start2=set2[j][1],
end2=set2[j][2]) # calculate overlap in bp
if overlap > 0:
moc += (overlap ** 2 / (p_i * q_j))
return (1 / ((n1 * n2) ** (1 / 2) - 1)) * (moc - 1)


def check_overlap(start1, end1, start2, end2):
"""Return the overlap between two domains"""
return range(max(start1, start2), min(end1, end2 + 1))
overlap = min(end1, end2) - max(start1, start2)
if overlap < 0:
overlap = 0
return overlap


def moc_for_multiple_sets(set_list):
Expand Down
12 changes: 12 additions & 0 deletions tools/str2bool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import argparse

def str2bool(v):
if isinstance(v, bool):
return v
v = v.lower()
if v in ('true', 't', '1'):
return True
elif v in ('false', 'f', '0'):
return False
else:
raise argparse.ArgumentTypeError()