forked from stfc/aiida-mlip
-
Notifications
You must be signed in to change notification settings - Fork 0
/
train_parser.py
209 lines (169 loc) · 6.18 KB
/
train_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
"""Parser for mlip train."""
import json
from pathlib import Path
from typing import Any
from aiida.engine import ExitCode
from aiida.orm import Dict, FolderData
from aiida.orm.nodes.process.process import ProcessNode
from aiida.parsers.parser import Parser
from aiida_mlip.data.model import ModelData
class TrainParser(Parser):
"""
Parser class for parsing output of calculation.
Parameters
----------
node : aiida.orm.nodes.process.process.ProcessNode
ProcessNode of calculation.
Methods
-------
__init__(node: aiida.orm.nodes.process.process.ProcessNode)
Initialize the TrainParser instance.
parse(**kwargs: Any) -> int:
Parse outputs, store results in the database.
_get_remote_dirs(mlip_dict: [str, Any]) -> [str, Path]:
Get the remote directories based on mlip config file.
_validate_retrieved_files(output_filename: str, model_name: str) -> bool:
Validate that the expected files have been retrieved.
_save_models(model_output: Path, compiled_model_output: Path) -> None:
Save model and compiled model as outputs.
_parse_results(result_name: Path) -> None:
Parse the results file and store the results dictionary.
_save_folders(remote_dirs: [str, Path]) -> None:
Save log and checkpoint folders as outputs.
Returns
-------
int
An exit code.
Raises
------
exceptions.ParsingError
If the ProcessNode being passed was not produced by a `Train` Calcjob.
"""
def __init__(self, node: ProcessNode):
"""
Initialize the TrainParser instance.
Parameters
----------
node : aiida.orm.nodes.process.process.ProcessNode
ProcessNode of calculation.
"""
super().__init__(node)
def parse(self, **kwargs: Any) -> int:
"""
Parse outputs and store results in the database.
Parameters
----------
**kwargs : Any
Any keyword arguments.
Returns
-------
int
An exit code.
"""
mlip_dict = self.node.inputs.mlip_config.as_dictionary
output_filename = self.node.get_option("output_filename")
remote_dirs = self._get_remote_dirs(mlip_dict)
model_output = remote_dirs["model"] / f"{mlip_dict['name']}.model"
compiled_model_output = (
remote_dirs["model"] / f"{mlip_dict['name']}_compiled.model"
)
result_name = remote_dirs["results"] / f"{mlip_dict['name']}_run-123_train.txt"
if not self._validate_retrieved_files(output_filename, mlip_dict["name"]):
return self.exit_codes.ERROR_MISSING_OUTPUT_FILES
self._save_models(model_output, compiled_model_output)
self._parse_results(result_name)
self._save_folders(remote_dirs)
return ExitCode(0)
def _get_remote_dirs(self, mlip_dict: dict) -> dict:
"""
Get the remote directories based on mlip config file.
Parameters
----------
mlip_dict : dict
Dictionary containing mlip config file.
Returns
-------
dict
Dictionary of remote directories.
"""
rem_dir = Path(self.node.get_remote_workdir())
return {
typ: rem_dir / mlip_dict.get(f"{typ}_dir", default)
for typ, default in (
("log", "logs"),
("checkpoint", "checkpoints"),
("results", "results"),
("model", ""),
)
}
def _validate_retrieved_files(self, output_filename: str, model_name: str) -> bool:
"""
Validate that the expected files have been retrieved.
Parameters
----------
output_filename : str
The expected output filename.
model_name : str
The name of the model as found in the config file key `name`.
Returns
-------
bool
True if the expected files are retrieved, False otherwise.
"""
files_retrieved = self.retrieved.list_object_names()
files_expected = {output_filename, f"{model_name}.model"}
if not files_expected.issubset(files_retrieved):
self.logger.error(
f"Found files '{files_retrieved}', expected to find '{files_expected}'"
)
return False
return True
def _save_models(self, model_output: Path, compiled_model_output: Path) -> None:
"""
Save model and compiled model as outputs.
Parameters
----------
model_output : Path
Path to the model output file.
compiled_model_output : Path
Path to the compiled model output file.
"""
architecture = "mace_mp"
model = ModelData.from_local(model_output, architecture=architecture)
compiled_model = ModelData.from_local(
compiled_model_output, architecture=architecture
)
self.out("model", model)
self.out("compiled_model", compiled_model)
def _parse_results(self, result_name: Path) -> None:
"""
Parse the results file and store the results dictionary.
Parameters
----------
result_name : Path
Path to the result file.
"""
with open(result_name, encoding="utf-8") as file:
last_dict_str = None
for line in file:
try:
last_dict_str = json.loads(line.strip())
except json.JSONDecodeError:
continue
if last_dict_str is not None:
results_node = Dict(last_dict_str)
self.out("results_dict", results_node)
else:
raise ValueError("No valid dictionary in the file")
def _save_folders(self, remote_dirs: dict) -> None:
"""
Save log and checkpoint folders as outputs.
Parameters
----------
remote_dirs : dict
Dictionary of remote folders.
"""
log_node = FolderData(tree=remote_dirs["log"])
self.out("logs", log_node)
checkpoint_node = FolderData(tree=remote_dirs["checkpoint"])
self.out("checkpoints", checkpoint_node)