-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmotif_conformations.py
147 lines (131 loc) · 5.69 KB
/
motif_conformations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
"""Calculate the internal coordinates of each instance of a sequence motif in
protein structures
Author: Adriaan Lategan
"""
import argparse
from Bio.PDB.Polypeptide import PPBuilder
from internal_coord import InternalCoordinates
from pdb_io import PdbQueries, PdbReader, PdbQueryCsv, CsvWriter
HAS_HEADER = True
def get_arguments() -> argparse.Namespace:
"""Fetch command-line arguments
Returns
-------
argparse.Namespace
"""
argument_parser = argparse.ArgumentParser("Calculate the internal "
"coordinates of each instance "
"of a sequence motif in protein "
"structures")
argument_parser.add_argument('motif',
type=str,
help='the amino acid sequence to find and '
'extract from the protein structures'
)
argument_parser.add_argument('-q', '--query_list',
type=str,
help='path to the file specifying the names, '
'file paths, and chain ids of the '
'protein structures to search for the '
'motif. If no query_list is specified,'
'it will read all files in '
'structure_directory.'
)
argument_parser.add_argument('structure_directory',
type=str,
help='path to the directory containing '
'protein structure files'
)
argument_parser.add_argument('structure_format',
choices=['pdb', 'cif'],
type=str,
default='pdb',
help='format of the protein structure files: '
'either "pdb" or "cif"'
)
argument_parser.add_argument('-g', '--gzipped',
action='store_true',
help='structure files are compressed using '
'gzip'
)
argument_parser.add_argument('output_file',
type=str,
help='file path for the output csv file'
)
arguments = argument_parser.parse_args()
return arguments
def motif_to_coordinates(
motif: str,
query_list: str,
pdb_directory: str,
pdb_format: str,
gzipped: bool,
output_file: str
) -> None:
""" Read the protein structure files in the specified directory,
search for the motif sequence in each structure and output the internal
coordinates of the motifs as a csv file
Parameters
----------
motif : str
an amino acid sequence to find in the protein structures
query_list : str
a csv file specifying protein file paths and polymer instances
pdb_directory : str
a directory containing protein structure files
pdb_format : str
the format of the protein structure files. Either "pdb" or "cif"
gzipped : bool
true if the protein structure files are compressed with gzip, false if
uncompressed
output_file : str
name of the file to which the internal coordinates are written
"""
has_header = HAS_HEADER
pdb_reader = PdbReader(pdb_directory, pdb_format, gzipped)
internal = InternalCoordinates()
if query_list:
chain_file = PdbQueryCsv(query_list, has_header)
pdb_queries: PdbQueries = chain_file.read
else:
pdb_queries: PdbQueries = pdb_reader.directory_queries
residue_data = (internal.get_coordinates(polypeptide.chain, residue)
for query in pdb_queries
for polypeptide in
query.get_polypeptides(pdb_reader, PPBuilder())
for fragment in polypeptide.find_motif(motif)
for residue in fragment
)
fields = ['Protein',
'Model',
'Chain',
'Position',
'Residue Name',
'Coordinate Type',
'Coordinate ID',
'Coordinate Value'
]
csv = CsvWriter(output_file, fields)
csv.write_headings()
for residue in residue_data:
for coordinate in residue.coordinates:
field_values = [residue.protein,
f'{residue.model}',
residue.chain,
f'{residue.position}',
residue.residue_name,
coordinate.coordinate_type,
coordinate.coordinate_id,
f'{coordinate.coordinate_value:f}'
]
csv.write_line(field_values)
csv.close()
if __name__ == "__main__":
arguments = get_arguments()
motif_to_coordinates(arguments.motif,
arguments.query_list,
arguments.structure_directory,
arguments.structure_format,
arguments.gzipped,
arguments.output_file
)