Skip to content

Commit

Permalink
Merge pull request #85 from dieterich-lab/devel
Browse files Browse the repository at this point in the history
Go for it
  • Loading branch information
tjakobi authored Aug 12, 2020
2 parents 6e4fe3c + 07192e7 commit afd5cdd
Show file tree
Hide file tree
Showing 12 changed files with 133 additions and 108 deletions.
10 changes: 5 additions & 5 deletions DCC/Circ_nonCirc_Exon_Match.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import HTSeq

from IntervalTree import IntervalTree
from .IntervalTree import IntervalTree


class CircNonCircExon(object):
Expand Down Expand Up @@ -194,7 +194,7 @@ def printuniq(self, Infile):
for lin in f:
lin_split = lin.split('\t')
if keys.count(lin_split[0] + '\t' + lin_split[1] + '\t' + lin_split[2]) == 1:
print lin.strip('\n')
print(lin.strip('\n'))

def readgtf(self, gtf_file):
# store nonCircExons based on transcript_id and exon_number with all its annotations from different transcripts
Expand Down Expand Up @@ -275,7 +275,7 @@ def readHTSeqCount(self, HTSeqCount, exon_id2custom_exon_id):
def findcircAdjacent(self, circExons, Custom_exon_id2Iv, Iv2Custom_exon_id, start=True):
circAdjacentExons = {}
circAdjacentExonsIv = {}
for key in circExons.keys():
for key in list(circExons.keys()):
for ids in circExons[key]:
try:
interval = Custom_exon_id2Iv[self.getAdjacent(ids, start=start)]
Expand All @@ -292,7 +292,7 @@ def printCounts(self, Exons, Count_custom_exon_id, Custom_exon_id2Length):
# Print the counts of circexons and adjacentexons
# Exons: dictionaries with intervals as key, custom_exon_id as values
ExonCounts = {}
for key in Exons.keys():
for key in list(Exons.keys()):
counts = []
for ids in Exons[key]: # If for circAdjacentExons, ids here is a list
try:
Expand Down Expand Up @@ -397,7 +397,7 @@ def readSJ_out_tab(self, SJ_out_tab):
strand] = lin_split[6]
sj.close()
except IOError:
print 'Do you have SJ.out.tab files in your sample folder? DCC cannot find it.'
print('Do you have SJ.out.tab files in your sample folder? DCC cannot find it.')
return junctionReadCount

def getskipjunctionCount(self, exonskipjunctions, junctionReadCount):
Expand Down
4 changes: 2 additions & 2 deletions DCC/CombineCounts.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ def comb_coor(self, circfiles, strand=True):
onefile.close()

if strand:
coors = ['\t'.join(key.split('\t')[:-1]) + value for key, value in coorsDict.iteritems()]
coors = ['\t'.join(key.split('\t')[:-1]) + value for key, value in coorsDict.items()]
else:
coors = ['{}{}'.format(key, value) for key, value in coorsDict.iteritems()]
coors = ['{}{}'.format(key, value) for key, value in coorsDict.items()]

coorsSorted = self.sortBed(coors, retList=True)
for itm in coorsSorted:
Expand Down
2 changes: 1 addition & 1 deletion DCC/IntervalTree.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def intersect(self, interval, report_func):
# use the intersect method of IntervalNode class, need make this function aware of strand

def traverse(self, func):
for item in self.chroms.itervalues():
for item in self.chroms.values():
item.traverse(func)


Expand Down
16 changes: 8 additions & 8 deletions DCC/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Import modules
from findcircRNA import Findcirc
from circFilter import Circfilter
from circAnnotate import CircAnnotate
from genecount import Genecount
from CombineCounts import Combine
from Circ_nonCirc_Exon_Match import CircNonCircExon
from IntervalTree import IntervalTree
from main import main
from .findcircRNA import Findcirc
from .circFilter import Circfilter
from .circAnnotate import CircAnnotate
from .genecount import Genecount
from .CombineCounts import Combine
from .Circ_nonCirc_Exon_Match import CircNonCircExon
from .IntervalTree import IntervalTree
from .main import main
2 changes: 1 addition & 1 deletion DCC/circAnnotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import HTSeq

from IntervalTree import IntervalTree
from .IntervalTree import IntervalTree


class CircAnnotate(object):
Expand Down
6 changes: 3 additions & 3 deletions DCC/circFilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import HTSeq

from IntervalTree import IntervalTree
from .IntervalTree import IntervalTree


##########################
Expand Down Expand Up @@ -63,7 +63,7 @@ def readcirc(self, countfile, coordinates):

# Do filtering
def filtercount(self, count, indx):
print 'Filtering by read counts'
print('Filtering by read counts')
sel = [] # store the passed filtering rows
for itm in range(len(count)):
if indx[itm][4] == '0':
Expand Down Expand Up @@ -117,7 +117,7 @@ def dummy_filter(self, indx0, count0):
np.savetxt(self.tmp_dir + 'tmp_unsortedWithChrM', nonrep, delimiter='\t', newline='\n', fmt='%s')

def removeChrM(self, withChrM):
print 'Remove ChrM'
print('Remove ChrM')
unremoved = open(withChrM, 'r').readlines()
removed = []
for lines in unremoved:
Expand Down
8 changes: 4 additions & 4 deletions DCC/findcircRNA.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def sepDuplicates(self, Chim_junc, duplicates, nonduplicates):
if reads.count(read) == 2:
dup.write(lines[indx])
elif reads.count(read) > 2:
print 'Read %s has more than 2 count.' % read
print('Read %s has more than 2 count.' % read)
try:
logging.warning('Read %s has more than 2 count.' % read)
except NameError:
Expand Down Expand Up @@ -159,8 +159,8 @@ def findcirc(self, Chim_junc, output, strand=True):
linecnt = linecnt + 1

if len(L) < 14:
print ("WARNING: File " + str(Chim_junc) + ", line " + str(linecnt) + " does not contain all features.")
print ("WARNING: " + str(Chim_junc) + " is probably corrupt.")
print(("WARNING: File " + str(Chim_junc) + ", line " + str(linecnt) + " does not contain all features."))
print(("WARNING: " + str(Chim_junc) + " is probably corrupt."))
if L[0] == "chr_donorA":
continue
if int(L[6]) >= 0 and L[0] == L[3] and L[2] == L[5] and (
Expand Down Expand Up @@ -217,7 +217,7 @@ def count(self, sortedlist, strand=True):
elif not strand:
circs = (itm[0], itm[1], itm[2])
else:
print "Please specify correct strand information."
print("Please specify correct strand information.")
cnt[circs] += 1
itm.append(str(cnt[circs]))
# tmp_count.append( [itm[0],itm[1],itm[2],itm[3],itm[7],itm[4],itm[5],itm[6]] )
Expand Down
10 changes: 5 additions & 5 deletions DCC/fix2chimera.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@ def modify_junctiontype(junctiontype):
continue
# check if the row has all fields
if len(line_split) < 14:
print ("WARNING: File " + str(chimeric_junction_mate2) + ", line " + str(linecnt)
+ " does not contain all features.")
print ("WARNING: " + str(chimeric_junction_mate2) + " is probably corrupt.")
print ("WARNING: Offending line: " + str(line))
print(("WARNING: File " + str(chimeric_junction_mate2) + ", line " + str(linecnt)
+ " does not contain all features."))
print(("WARNING: " + str(chimeric_junction_mate2) + " is probably corrupt."))
print(("WARNING: Offending line: " + str(line)))

linecnt += 1

Expand Down Expand Up @@ -110,7 +110,7 @@ def printduplicates(self, merged, duplicates, field=10):
if not os.path.isfile(merged):
sys.exit("ERROR: File " + str(merged) + " is missing!")
elif os.stat(merged).st_size == 0:
print ("WARNING: File " + str(merged) + " is empty!")
print(("WARNING: File " + str(merged) + " is empty!"))
else:
try:
inputfile = open(merged, 'r')
Expand Down
36 changes: 18 additions & 18 deletions DCC/genecount.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,33 +99,33 @@ def genecount(self, circ_coordinates, bamfile, ref, tid):
start_coordinates.close()
end_coordinates.close()

print ('Started linear gene expression counting for %s' % bamfile)
print(('Started linear gene expression counting for %s' % bamfile))

start = time.time()
# mpileup get the read counts of the start and end positions
print ("\t=> running mpileup for start positions [%s]" % bamfile)
print(("\t=> running mpileup for start positions [%s]" % bamfile))
mpileup_start = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_start_coordinates_' + tid)
end = time.time() - start
print ("\t=> mpileup for start positions for %s took %d seconds" % (bamfile, end))
print(("\t=> mpileup for start positions for %s took %d seconds" % (bamfile, end)))

start = time.time()
# mpileup get the read counts of the start and end positions
print ("\t=> running mpileup for end positions [%s]" % bamfile)
print(("\t=> running mpileup for end positions [%s]" % bamfile))
mpileup_end = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_end_coordinates_' + tid)
end = time.time() - start
print ("\t=> mpileup for end positions for %s took %d seconds" % (bamfile, end))
print(("\t=> mpileup for end positions for %s took %d seconds" % (bamfile, end)))

print "\t=> gathering read counts for start positions [%s]" % bamfile
print("\t=> gathering read counts for start positions [%s]" % bamfile)
startcount = self.getreadscount(mpileup_start, countmapped=True)

print "\t=> gathering read counts for end positions [%s]" % bamfile
print("\t=> gathering read counts for end positions [%s]" % bamfile)
endcount = self.getreadscount(mpileup_end, countmapped=True)

# remove tmp files
# os.remove(self.tmp_dir + 'tmp_start_coordinates_' + tid)
# os.remove(self.tmp_dir + 'tmp_end_coordinates_' + tid)

print 'Finished linear gene expression counting for %s' % bamfile
print('Finished linear gene expression counting for %s' % bamfile)

return startcount, endcount

Expand Down Expand Up @@ -194,29 +194,29 @@ def linearsplicedreadscount(self, circ_coor, bamfile, ref, header=True):
start_coor_1.close()
end_coor.close()
end_coor_1.close()
print ('Started linear spliced read counting for %s' % bamfile)
print(('Started linear spliced read counting for %s' % bamfile))

# mpileup get the number of spliced reads at circle start position and (start-1) position.

print ("\t=> running mpileup 1 for start positions [%s]" % bamfile)
print(("\t=> running mpileup 1 for start positions [%s]" % bamfile))
mpileup_start = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_start_coor_1')

print ("\t=> running mpileup 2 for start positions [%s]" % bamfile)
print(("\t=> running mpileup 2 for start positions [%s]" % bamfile))
mpileup_start_1 = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_start_coor_2')

# mpileup get the number of spliced reads at circle end position and (end+1) position.
print ("\t=> running mpileup 1 for end positions [%s]" % bamfile)
print(("\t=> running mpileup 1 for end positions [%s]" % bamfile))
mpileup_end = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_end_coor_1')

print ("\t=> running mpileup 2 for end positions [%s]" % bamfile)
print(("\t=> running mpileup 2 for end positions [%s]" % bamfile))
mpileup_end_1 = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_end_coor_2')

# get count

print "\t=> gathering read counts for start positions [%s]" % bamfile
print("\t=> gathering read counts for start positions [%s]" % bamfile)
startcount = self.submpileup(self.getreadscount(mpileup_start_1), self.getreadscount(mpileup_start))

print "\t=> gathering read counts for end positions [%s]" % bamfile
print("\t=> gathering read counts for end positions [%s]" % bamfile)
endcount = self.submpileup(self.getreadscount(mpileup_end), self.getreadscount(mpileup_end_1), left=False)

# remove tmp files
Expand All @@ -225,7 +225,7 @@ def linearsplicedreadscount(self, circ_coor, bamfile, ref, header=True):
# os.remove(self.tmp_dir + 'tmp_end_coor')
# os.remove(self.tmp_dir + 'tmp_end_coor_1')

print 'Finished linear spliced read counting for %s' % bamfile
print('Finished linear spliced read counting for %s' % bamfile)

return startcount, endcount

Expand Down Expand Up @@ -266,7 +266,7 @@ def comb_gen_count(self, circ_coor, bamfile, ref, output, countlinearsplicedread
# call genecount to get the start and end positon read counts
tmp_start, tmp_end = self.genecount(circ_coor, bamfile, ref, tid)

print 'Ended linear gene expression counting %s' % bamfile
print('Ended linear gene expression counting %s' % bamfile)
logging.info('Ended linear gene expression counting %s' % bamfile)

for line in tmp_start:
Expand Down Expand Up @@ -314,6 +314,6 @@ def comb_gen_count(self, circ_coor, bamfile, ref, output, countlinearsplicedread
# tmp_end.close()
count_table.close()

print 'Ended post processing %s' % bamfile
print('Ended post processing %s' % bamfile)
logging.info('Ended post processing %s' % bamfile)
return tid
Loading

0 comments on commit afd5cdd

Please sign in to comment.