Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/nygenome/Conpair
Browse files Browse the repository at this point in the history
  • Loading branch information
Bergmann committed Jun 17, 2018
2 parents 1e47943 + fad92d9 commit 3572dbf
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 24 deletions.
6 changes: 3 additions & 3 deletions modules/ContaminationMarker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python2.7
#!/usr/bin/env python

#
# 2015-09-02
Expand Down Expand Up @@ -73,7 +73,7 @@ def parse_mpileup_line(line, min_map_quality=0, min_base_quality=0):
if min_map_quality > 0:
verbose_lines = line[6].split(',')
mapqs = [v.split('@')[-1] for v in verbose_lines]
mapqs_above_threshold = set([i for i in xrange(0, len(mapqs)) if int(mapqs[i]) >= min_map_quality])
mapqs_above_threshold = set([i for i in range(0, len(mapqs)) if int(mapqs[i]) >= min_map_quality])

indexes_A = find_all_positions_of_char(bases, 'A')
indexes_C = find_all_positions_of_char(bases, 'C')
Expand Down Expand Up @@ -173,7 +173,7 @@ def baseQ2int(baseQ_string, scaling_factor=33):


def find_all_positions_of_char(s, char):
indexes = [i for i in xrange(0, len(s)) if s[i] == char]
indexes = [i for i in range(0, len(s)) if s[i] == char]
return(indexes)


Expand Down
15 changes: 7 additions & 8 deletions modules/ContaminationModel.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python2.7
#!/usr/bin/env python

#
# 2015-09-02
Expand Down Expand Up @@ -32,7 +32,7 @@
def create_conditional_likelihood_of_base_dict(checkpoints):

D = defaultdict(lambda: defaultdict(dict))
for bq in xrange(0, baseQ_max+1):
for bq in range(0, baseQ_max+1):
Q = phred2prob(bq)/3

f_AAAA_A = lambda x: (1-Q)
Expand Down Expand Up @@ -71,12 +71,12 @@ def create_conditional_likelihood_of_base_dict(checkpoints):
def likelihood_per_marker(A, Scores, checkpoints, ref_basequals, alt_basequals):

for bq in ref_basequals:
for i in xrange(0, len(checkpoints)):
for i in range(0, len(checkpoints)):
v = checkpoints[i]
ref_part = [Scores['AAAA_A'][v][bq], Scores['AABB_A'][v][bq], Scores['AABA_A'][v][bq], Scores['ABAB_A'][v][bq], Scores['ABAA_A'][v][bq], Scores['ABAA_B'][v][bq], Scores['AAAA_B'][v][bq], Scores['AABB_B'][v][bq], Scores['AABA_B'][v][bq]]
A[i] += ref_part
for bq in alt_basequals:
for i in xrange(0, len(checkpoints)):
for i in range(0, len(checkpoints)):
v = checkpoints[i]
alt_part = [Scores['AAAA_B'][v][bq], Scores['AABB_B'][v][bq], Scores['AABA_B'][v][bq], Scores['ABAB_B'][v][bq], Scores['ABAA_B'][v][bq], Scores['ABAA_A'][v][bq], Scores['AAAA_A'][v][bq], Scores['AABB_A'][v][bq], Scores['AABA_A'][v][bq]]
A[i] += alt_part
Expand All @@ -88,16 +88,15 @@ def calculate_contamination_likelihood(checkpoints, Data, Scores):
D = np.zeros([len(checkpoints), 1],dtype='float64')
for marker_data in Data:
A = np.zeros([len(checkpoints), 9],dtype='float64')
for i in xrange(0, len(checkpoints)):
for i in range(0, len(checkpoints)):
A[i] += marker_data[0]
A = likelihood_per_marker(A, Scores, checkpoints, marker_data[1], marker_data[2])
for i in xrange(0, len(checkpoints)):
for i in range(0, len(checkpoints)):
D[i] += log10p(sum([pow(10,x ) for x in A[i]]))

return(D)

def apply_brents_algorithm(Data, Scores, (x1, x2, x3)):

def apply_brents_algorithm(Data, Scores, x1, x2, x3):
def f(x):
if type(x) is np.ndarray:
x = x[0]
Expand Down
2 changes: 1 addition & 1 deletion modules/Genotypes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python2.7
#!/usr/bin/env python

#
# 2015-10-08
Expand Down
6 changes: 3 additions & 3 deletions modules/MathOperations.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python2.7
#!/usr/bin/env python

#
# 2016-02-29
Expand Down Expand Up @@ -27,6 +27,6 @@ def phred2prob(Q):
def log10p(f):

if f < 1e-323:
return(-324)
return(-324)
else:
return(np.log10(f))
return(np.log10(f))
6 changes: 3 additions & 3 deletions scripts/estimate_tumor_normal_contamination.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def drange(start, stop, step):
Data = []
for line in file:
if line.startswith("[REDUCE RESULT]"):
continue
continue
pileup = ContaminationMarker.parse_mpileup_line(line, min_map_quality=MMQ)
try:
marker = Markers[pileup.chrom + ":" + pileup.pos]
Expand Down Expand Up @@ -140,7 +140,7 @@ def drange(start, stop, step):

### SEARCHING THE SPACE AROUND ARGMAX - Brent's algorithm

optimal_val = ContaminationModel.apply_brents_algorithm(Data, Scores, (x1, x2, x3))
optimal_val = ContaminationModel.apply_brents_algorithm(Data, Scores, x1, x2, x3)

### PRINTING THE NORMAL RESULTS

Expand Down Expand Up @@ -212,7 +212,7 @@ def drange(start, stop, step):

### SEARCHING THE SPACE AROUND ARGMAX - Brent's algorithm

optimal_val = ContaminationModel.apply_brents_algorithm(Data, Scores, (x1, x2, x3))
optimal_val = ContaminationModel.apply_brents_algorithm(Data, Scores, x1, x2, x3)

### PRINTING THE TUMOR RESULTS

Expand Down
4 changes: 2 additions & 2 deletions scripts/run_gatk_pileup_for_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
if opts.temp_dir_java:
JAVA_TEMP = "-Djava.io.tmpdir={}".format(opts.temp_dir_java)
if not os.path.isdir(opts.temp_dir_java):
os.makedirs(opts.temp_dir_java, 0770)
os.makedirs(opts.temp_dir_java, 770)
else:
JAVA_TEMP = ""

Expand All @@ -95,7 +95,7 @@
for line in source_file:
if line.startswith("chr"):
tmp_source.write(line[3:])
else:
else:
tmp_source.write(line)

move(tmp_source.name, source_file.name)
8 changes: 4 additions & 4 deletions scripts/verify_concordance.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,10 @@


if opts.outfile == "-":
print round(float(concordant)/(concordant+discordant), 3)
print "Based on " + str(concordant+discordant) + "/" + str(len(Markers)) + " markers (coverage per marker threshold: " + str(COVERAGE_THRESHOLD) + " reads)"
print "Minimum mappinq quality: " + str(MMQ)
print "Minimum base quality: " + str(MBQ)
print(round(float(concordant)/(concordant+discordant), 3))
print("Based on " + str(concordant+discordant) + "/" + str(len(Markers)) + " markers (coverage per marker threshold: " + str(COVERAGE_THRESHOLD) + " reads)")
print("Minimum mappinq quality: " + str(MMQ))
print("Minimum base quality: " + str(MBQ))
else:
outfile = open(opts.outfile, 'w')
outfile.write("Concordance: "+ str(round(100.0*float(concordant)/(concordant+discordant), 2)) + "%\n")
Expand Down

0 comments on commit 3572dbf

Please sign in to comment.