-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbed2gtf
executable file
·100 lines (84 loc) · 2.85 KB
/
bed2gtf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
__author__="Vivek Rai"
__email__="vivek.rai@roche.com"
__date__="2023-04-28"
# The input should have the following format:
#############################################
# chr1 724805 725050 ...
# chr1 725859 725955 ...
# The output will be named the following way:
# chr1 source feature 724705 724804 . + . peak_id="chr1_724705_724804"
# chr1 source feature 724805 725050 . + . peak_id="chr1_724805_725050"
# chr1 source feature 725051 725150 . + . peak_id="chr1_725051_725150"
import argparse
import sys
import os.path
def run(infile, outfile, args):
for cont, linea in enumerate(infile):
linea_split = linea.strip().split()
chrom = linea_split[0]
ini_pos = int(linea_split[1])
fin_pos = int(linea_split[2])
if len(linea_split) < args.feature_column:
feature = "peak"
else:
feature = linea_split[int(args.feature_column) - 1]
if not args.strand_column or (len(linea_split) < args.strand_column):
strand = "."
else:
strand = linea_split[int(args.strand_column) - 1]
print(
"\t".join(
[
chrom,
args.source,
feature,
str(ini_pos),
str(fin_pos),
".",
strand,
".",
"peak_id=" + chrom + "_" + str(ini_pos) + "_" + str(fin_pos) + ";",
]
),
file=outfile,
)
outfile.close()
infile.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Converts a bed file to a gtf file.")
parser.add_argument(
"-i", "--input", help="File with the regions in bed format", default=sys.stdin
)
parser.add_argument(
"-o", "--output", help="Name of the gtf file output file.", default=sys.stdout
)
parser.add_argument(
"-f",
"--feature-column",
help="Column of the bed file with the feature name",
default=4,
type=int,
)
parser.add_argument("-s", "--source", help="Source of the data", default="bed2gtf")
parser.add_argument(
"-t",
"--strand-column",
help="Column of the bed file with strand info",
default=None,
type=int,
)
args = parser.parse_args()
if args.input == sys.stdin:
infile = sys.stdin
else:
if not os.path.isfile(args.input):
print("Error: Input file does not exist")
sys.exit(1)
infile = open(args.input, "r")
if args.output == sys.stdout:
outfile = sys.stdout
else:
outfile = open(args.output, "w")
run(infile, outfile, args)