-
Notifications
You must be signed in to change notification settings - Fork 7
/
getBedpeFBed.py
executable file
·110 lines (98 loc) · 3.04 KB
/
getBedpeFBed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/home/caoy7/anaconda2/envs/cLoops2/bin/python3
#--coding:utf-8--
"""
getBedpeFBed.py
Transfering single-end BED file to paired-end BEDPE file as input of cLoops2 .
"""
#systematic library
import os, time, gzip, argparse, sys
from datetime import datetime
from argparse import RawTextHelpFormatter
#3rd library
#cLoops2
from cLoops2.ds import PET
from cLoops2.utils import cFlush
def help():
description = """
Transfering single-end BED file to paired-end BEDPE file as input of
cLoops2 for furthur analysis.
The 6th column of the BED file of strand information is used to extend
the fragments.
If no strand information available, default treat it as + strand
Example:
getBedpeFBed.py -f a.bed.gz -o a
"""
parser = argparse.ArgumentParser(description=description,
formatter_class=RawTextHelpFormatter)
parser.add_argument(
'-f',
dest="fin",
required=True,
type=str,
help=
"Input bed files, or .bed.gz files. "
)
parser.add_argument('-o',
dest="out",
required=True,
type=str,
help="Output file prefix.")
parser.add_argument(
'-ext',
dest="ext",
required=False,
default=150,
type=int,
help=
"The expect fragment length of the bed file to extend from 5' to 3', default is 150."
)
op = parser.parse_args()
return op
def bed2bedpe(fin, fout, ext=150):
"""
Extend the BED file to BEDPE file according to expected fragment size.
"""
if fin.endswith(".gz"):
fino = gzip.open(fin, "rt")
else:
fino = open(fin)
if fout.endswith(".gz"):
fo = gzip.open(fout, "wt")
else:
fo = open(fout, "w")
for i, line in enumerate(fino):
if i % 10000 == 0:
cFlush("%s read from %s" % (i,fin))
line = line.split("\n")[0].split('\t')
if len(line) < 6: #no strand information
nline = [
line[0], line[1], line[2], line[0],
int(line[1]) + ext,
int(line[2]) + ext, ".", "44", "+", "-"
]
elif line[5] == "+":
nline = [
line[0], line[1], line[2], line[0],
int(line[1]) + ext,
int(line[2]) + ext, ".", "44", "+", "-"
]
else:
nline = [
line[0],
max(0, int(line[1])),
max(0,
int(line[2]) - ext), line[0], line[1], line[2], ".", "44",
"+", "-"
]
nline = "\t".join(list(map(str, nline))) + "\n"
fo.write(nline)
fino.close()
fo.close()
def main():
op = help()
bed2bedpe(op.fin, op.out+".bedpe.gz", ext=op.ext)
if __name__ == "__main__":
start_time = datetime.now()
main()
usedtime = datetime.now() - start_time
sys.stderr.write("Process finished. Used CPU time: %s Bye!\n" % usedtime)