-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrevcomp
executable file
·116 lines (99 loc) · 2.57 KB
/
revcomp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env python
import sys
RC = {'A': 'T',
'C': 'G',
'G': 'C',
'T': 'A',
'a': 't',
'c': 'g',
'g': 'c',
't': 'a',
'N': 'N',
'n': 'n'}
RNA_RC = {'A': 'U',
'C': 'G',
'G': 'C',
'U': 'A',
'a': 'u',
'c': 'g',
'g': 'c',
'u': 'a',
'N': 'N',
'n': 'n'}
def revcomp(s):
if "U" in s or "u" in s:
table = RNA_RC
else:
table = RC
rc = []
for b in s[::-1]:
if b in table:
rc.append(table[b])
else:
rc.append('X')
return "".join(rc)
def reverse(s):
bases = []
for b in s[::-1]:
bases.append(b)
return "".join(bases)
def complement(s):
if "U" in s or "u" in s:
table = RNA_RC
else:
table = RC
bases = []
for b in s:
if b in table:
bases.append(table[b])
else:
bases.append('X')
return "".join(bases)
def usage():
sys.stdout.write("""revcomp - reverse complement sequences
Usage: revcomp [mode] [seqs...]
Prints to standard output the reverse-complement (or reverse, or complement) of each
sequence passed as argument. When complementing, A<->T, C<->G, Ns remain Ns, all other
characters are converted to X.
If a sequence contains a U or a u it is assumed to be RNA, and U is used in place of T.
`mode' can be used to affect processing of all sequences following it. It can be one of
`-rc' (reverse-complement, the default), `-c' (complement), `-r' (reverse). For example:
revcomp S1 -r S2 S3 -c S4 -rc S5
will reverse-complement S1 and S5, reverse S2 and S3, and complement S4.
If no seqs are passed on the command line, read sequences from standard input. In this case
the only operation supported is reverse-complement, and lines beginning with ">" are ignored,
to allow processing fasta files.
""")
def main(args):
mode = "-rc"
if args:
for a in args:
if a in ["-rc", "-r", "-c"]:
mode = a
elif a[0] == '>':
sys.stdout.write(a)
elif mode == "-rc":
sys.stdout.write(revcomp(a) + "\n")
elif mode == "-r":
sys.stdout.write(reverse(a) + "\n")
elif mode == "-c":
sys.stdout.write(complement(a) + "\n")
else:
seq = ""
try:
for a in sys.stdin:
if a[0] == '>':
if seq:
sys.stdout.write(revcomp(seq) + "\n")
seq = ""
sys.stdout.write(a)
else:
seq += a.rstrip("\n")
sys.stdout.write(revcomp(seq) + "\n")
except KeyboardInterrupt:
pass
if __name__ == "__main__":
if "--help" in sys.argv or "-h" in sys.argv:
usage()
else:
main(sys.argv[1:])