-
Notifications
You must be signed in to change notification settings - Fork 0
/
pairwise.py
65 lines (43 loc) · 1.42 KB
/
pairwise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from sys import argv
from Bio import SeqIO
from Bio import Entrez
import subprocess
import os
def ids_to_fasta(ids):
Entrez.email = "merouehchady@gmail.com"
handle = Entrez.efetch(db="nucleotide", id=ids, rettype="fasta")
records = list(SeqIO.parse(handle, "fasta"))
for i, r in enumerate(records):
SeqIO.write(records, ids[i], 'fasta')
def calc_align_score(ids, outf):
"""Run EMBOSS's needle to align FASTA strings"""
cmd = ["./needle", "-asequence", ids[0], "-bsequence", ids[1], outf,
"-gapopen", "10", "-gapextend", "1", "-endweight", "-endopen",
"10", "-endextend", "1"]
subprocess.call(cmd, stderr=open(os.devnull, 'w'))
def read_score(outf):
"""Get score from needle output file"""
with open(outf, 'r') as inf:
data = inf.readlines()
scores = []
for line in data:
if "Score" in line:
scores.append(line.split()[-1])
return scores[-1]
def cleanup(ids, outf):
"""Remove intermediate files"""
os.remove(outf)
for fname in ids:
os.remove(fname)
def main():
with open(argv[1], 'r') as inf:
data = inf.read()
ids = data.strip().split()
outf = "tempout"
ids_to_fasta(ids)
calc_align_score(ids, outf)
score = read_score(outf)
cleanup(ids, outf)
print score
if __name__ == "__main__":
main()