-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse_shortH_dssp.py
76 lines (53 loc) · 1.63 KB
/
parse_shortH_dssp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python3
import os
import os.path
import pathlib
import glob
import itertools
import collections
import pprint
import sys
import re
#path to dssp directory
input_path = os.getcwd()+"/dssp_files"
#read all filenames in the dir
file_list = os.listdir(input_path)
#path to output directory
output_path = os.getcwd()+"/shortH_output"
#collecting the total number of dssp files in the directory.
num_files = len(glob.glob1(input_path,"*.dssp"))
print('There are',num_files, 'dssp files in the current directory\n\n')
#looking for short helices
short_matches = ["G 3", "G 4", "G X","G >", "G <", "G 3", "G X","G >", "G <", "G 4"]
for file_name in file_list:
import fnmatch
if fnmatch.fnmatch(file_name, '*.dssp'):
with open(os.path.join(input_path, file_name), "r") as src_file:
line_was_helix = 0
filecounter = 0
line_is_helix = 0
last_line_matched = 0
# short helix
for line in src_file:
set_line_header = ''
if '#' in line:
line_header = line
set_line_header = line_header
last_line_matched = line_is_helix
line_is_helix = 0
for helix in short_matches:
if helix in line:
line_is_helix = 1
if line_is_helix ==1:
if not last_line_matched:
filecounter +=1
set_line_header = line_header
#change to output directory
os.chdir(output_path)
#create output file and append helix to it.
with open(file_name+"_G_"+str(filecounter)+".dssp", "a") as g:
print(set_line_header+"\n"+line+"\n",file=g)
g.close()
else:
continue
print("The output is provided in shortH_output directory.")