From 05b403a8aff99f8f5bed497ef3e8c79ccecf6471 Mon Sep 17 00:00:00 2001 From: Nebgen <315312@win.lanl.gov> Date: Fri, 31 Mar 2023 11:01:06 -0600 Subject: [PATCH 1/7] Added database options to remove outlier data def remove_high_property(self,key,perAtom,species_key=None,cut=None,std_factor=10): """ This function removes outlier data from the dataset Must be called before splitting "key": the property key in the dataset to check for high values "perAtom": True if the property is defined per atom in axis 1, otherwise property is treated as full system "std_factor": systems with values larger than this multiplier time the standard deviation of all data will be reomved. None to skip this step "cut_factor": systems with values larger than this number are reomved. None to skip this step. This step is done first. """ --- hippynn/databases/database.py | 56 +++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/hippynn/databases/database.py b/hippynn/databases/database.py index b454fc1b..3466d008 100644 --- a/hippynn/databases/database.py +++ b/hippynn/databases/database.py @@ -191,11 +191,10 @@ def make_generator(self, split_type, evaluation_mode, batch_size=None, subsample if not self.splitting_completed: raise ValueError("Database has not yet been split.") - - if split_type not in self.splits: - raise ValueError(f"Split {split_type} Invalid. Current splits:{list(self.splits.keys())}") - - data = [self.splits[split_type][k] for k in self.var_list] + if split_type in ("train", "valid", "test"): + data = [self.splits[split_type][k] for k in self.var_list] + else: + raise ValueError("Datatype {} Invalid. Must be one of 'train','valid','test'".format(split_type)) if evaluation_mode == "train": if split_type != "train": @@ -206,7 +205,7 @@ def make_generator(self, split_type, evaluation_mode, batch_size=None, subsample elif evaluation_mode == "eval": shuffle = False else: - raise ValueError(f"Evaluation_mode ({evaluation_mode}) must be one of 'train' or 'eval'") + raise ValueError("Evaluation_mode ({}) must be one of 'train' or 'eval'") dataset = NamedTensorDataset(self.var_list, *data) if subsample: @@ -226,6 +225,51 @@ def make_generator(self, split_type, evaluation_mode, batch_size=None, subsample ) return generator + + def trim_all_arrays(self,index): + """ + To be used in conjuction with remove_high_property + """ + for key in self.arr_dict: + self.arr_dict[key] = self.arr_dict[key][index] + + def remove_high_property(self,key,perAtom,species_key=None,cut=None,std_factor=10): + """ + This function removes outlier data from the dataset + Must be called before splitting + "key": the property key in the dataset to check for high values + "perAtom": True if the property is defined per atom in axis 1, otherwise property is treated as full system + "std_factor": systems with values larger than this multiplier time the standard deviation of all data will be reomved. None to skip this step + "cut_factor": systems with values larger than this number are reomved. None to skip this step. This step is done first. + """ + if perAtom: + if species_key==None: + raise RuntimeError("species_key must be defined to trim a per atom quantity") + atom_ind = self.arr_dict[species_key] > 0 + ndim = len(self.arr_dict[key].shape) + if cut!=None: + if perAtom: + Kmean = np.mean(self.arr_dict[key][atom_ind]) + else: + Kmean = np.mean(self.arr_dict[key]) + failArr = np.abs(self.arr_dict[key]-Kmean)>cut + #This does nothing with ndim=1 + trimArr = np.sum(failArr,axis=tuple(range(1,ndim)))==0 + self.trim_all_arrays(trimArr) + + if std_factor!=None: + if perAtom: + atom_ind = self.arr_dict[species_key] > 0 + Kmean = np.mean(self.arr_dict[key][atom_ind]) + std_cut = np.std(self.arr_dict[key][atom_ind]) * std_factor + else: + Kmean = np.mean(self.arr_dict[key]) + std_cut = np.std(self.arr_dict[key]) * std_factor + failArr = np.abs(self.arr_dict[key]-Kmean)>std_cut + #This does nothing with ndim=1 + trimArr = np.sum(failArr,axis=tuple(range(1,ndim)))==0 + self.trim_all_arrays(trimArr) + def compute_index_mask(indices, index_pool): From c841ddd35214c5336afb37d9a3f36d6be0e6e029 Mon Sep 17 00:00:00 2001 From: Nicholas Lubbers <56895592+lubbersnick@users.noreply.github.com> Date: Mon, 1 May 2023 11:06:51 -0600 Subject: [PATCH 2/7] unnecessary index type constraint for vectors (#31) (#32) This was causing lammps interfaces not to build correctly. --- hippynn/graphs/nodes/physics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/hippynn/graphs/nodes/physics.py b/hippynn/graphs/nodes/physics.py index eecbadae..8851f6a3 100644 --- a/hippynn/graphs/nodes/physics.py +++ b/hippynn/graphs/nodes/physics.py @@ -229,7 +229,6 @@ def expansion2(self, vector, helper, *, purpose, **kwargs): _parent_expander.assertlen(1) _parent_expander.get_main_outputs() - _parent_expander.require_idx_states(IdxType.Atoms) def __init__(self, name, parents, module="auto", _helper=None, **kwargs): parents = self.expand_parents(parents) From 6d571ea5aeb9c7814eeea55d55b26a426108002a Mon Sep 17 00:00:00 2001 From: Ben Nebgen Date: Wed, 3 May 2023 21:58:31 -0600 Subject: [PATCH 3/7] Removed ase interface reference from calculator This causes errors with later versions of ase --- hippynn/interfaces/ase_interface/calculator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/hippynn/interfaces/ase_interface/calculator.py b/hippynn/interfaces/ase_interface/calculator.py index 586fdf14..b4d1f671 100644 --- a/hippynn/interfaces/ase_interface/calculator.py +++ b/hippynn/interfaces/ase_interface/calculator.py @@ -4,7 +4,6 @@ import warnings import torch -from ase.calculators import interface from ase.calculators.calculator import compare_atoms, PropertyNotImplementedError, Calculator # Calculator is required to allow HIPNN to be used with ASE Mixing Calculators from hippynn.graphs import find_relatives, find_unique_relative, get_subgraph, copy_subgraph, replace_node, GraphModule From d3a7d69cc997e0794bc15b3500009d99eb25c05a Mon Sep 17 00:00:00 2001 From: Ben Nebgen Date: Sun, 16 Jul 2023 06:14:56 -0600 Subject: [PATCH 4/7] changed model to float 32 --- hippynn/interfaces/lammps_interface/mliap_interface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hippynn/interfaces/lammps_interface/mliap_interface.py b/hippynn/interfaces/lammps_interface/mliap_interface.py index db180cef..0b7a5eec 100644 --- a/hippynn/interfaces/lammps_interface/mliap_interface.py +++ b/hippynn/interfaces/lammps_interface/mliap_interface.py @@ -40,7 +40,7 @@ def __init__(self, energy_node, element_types, ndescriptors=1, # Build the calculator self.rcutfac, self.species_set, self.graph = setup_LAMMPS_graph(energy_node) self.nparams = sum(p.nelement() for p in self.graph.parameters()) - self.graph.to(torch.float64) + self.graph.to(torch.float32) def compute_gradients(self, data): pass @@ -61,7 +61,7 @@ def compute_forces(self, data): z_vals = self.species_set[elems+1] pair_i = self.as_tensor(data.pair_i).type(torch.int64) pair_j = self.as_tensor(data.pair_j).type(torch.int64) - rij = self.as_tensor(data.rij).type(torch.float64) + rij = self.as_tensor(data.rij).type(torch.float32) nlocal = self.as_tensor(data.nlistatoms) # note your sign for rij might need to be +1 or -1, depending on how your implementation works From 83e4b4d9c734979ec5214df6d2b789a67f8e39cb Mon Sep 17 00:00:00 2001 From: Ben Nebgen Date: Mon, 11 Sep 2023 15:16:32 -0600 Subject: [PATCH 5/7] Fixed devide in targes.py(70) --- hippynn/layers/targets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hippynn/layers/targets.py b/hippynn/layers/targets.py index 4b8128d0..98077c74 100644 --- a/hippynn/layers/targets.py +++ b/hippynn/layers/targets.py @@ -67,7 +67,7 @@ def forward(self, all_features, mol_index, n_molecules): total_hier = torch.zeros_like(total_energies) mol_hier = torch.zeros_like(total_energies) total_atom_hier = torch.zeros_like(total_atomen) - batch_hier = torch.zeros(1,dtype=total_energies.dtype,device=total_energies.dtype) + batch_hier = torch.zeros(1,dtype=total_energies.dtype,device=total_energies.device) return total_energies, total_atomen, partial_sums, total_hier, total_atom_hier, mol_hier, batch_hier From 80d8bd50caf1e558e0f8fc8351afd75c06fc62f0 Mon Sep 17 00:00:00 2001 From: Ben Nebgen Date: Mon, 9 Oct 2023 14:58:56 -0600 Subject: [PATCH 6/7] Fixes for lammps and ase interface lammps: reduce float64 to float32 for speed ase: remove reference to calculator --- hippynn/interfaces/ase_interface/calculator.py | 2 +- hippynn/interfaces/lammps_interface/mliap_interface.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hippynn/interfaces/ase_interface/calculator.py b/hippynn/interfaces/ase_interface/calculator.py index 586fdf14..213a3011 100644 --- a/hippynn/interfaces/ase_interface/calculator.py +++ b/hippynn/interfaces/ase_interface/calculator.py @@ -4,7 +4,7 @@ import warnings import torch -from ase.calculators import interface +#from ase.calculators import interface from ase.calculators.calculator import compare_atoms, PropertyNotImplementedError, Calculator # Calculator is required to allow HIPNN to be used with ASE Mixing Calculators from hippynn.graphs import find_relatives, find_unique_relative, get_subgraph, copy_subgraph, replace_node, GraphModule diff --git a/hippynn/interfaces/lammps_interface/mliap_interface.py b/hippynn/interfaces/lammps_interface/mliap_interface.py index acc27e32..fb2858ea 100644 --- a/hippynn/interfaces/lammps_interface/mliap_interface.py +++ b/hippynn/interfaces/lammps_interface/mliap_interface.py @@ -41,7 +41,7 @@ def __init__(self, energy_node, element_types, ndescriptors=1, # Build the calculator self.rcutfac, self.species_set, self.graph = setup_LAMMPS_graph(energy_node) self.nparams = sum(p.nelement() for p in self.graph.parameters()) - self.graph.to(torch.float64) + self.graph.to(torch.float32) def compute_gradients(self, data): pass @@ -62,7 +62,7 @@ def compute_forces(self, data): z_vals = self.species_set[elems+1] pair_i = self.as_tensor(data.pair_i).type(torch.int64) pair_j = self.as_tensor(data.pair_j).type(torch.int64) - rij = self.as_tensor(data.rij).type(torch.float64) + rij = self.as_tensor(data.rij).type(torch.float32) nlocal = self.as_tensor(data.nlistatoms) # note your sign for rij might need to be +1 or -1, depending on how your implementation works From 5bbdb6a5a4d65e665a1323d69423c737493fdefc Mon Sep 17 00:00:00 2001 From: Ben Nebgen Date: Fri, 13 Oct 2023 12:43:02 -0600 Subject: [PATCH 7/7] Reverted merge mistakes with database.py --- hippynn/databases/database.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hippynn/databases/database.py b/hippynn/databases/database.py index 3466d008..e9375b49 100644 --- a/hippynn/databases/database.py +++ b/hippynn/databases/database.py @@ -191,10 +191,10 @@ def make_generator(self, split_type, evaluation_mode, batch_size=None, subsample if not self.splitting_completed: raise ValueError("Database has not yet been split.") - if split_type in ("train", "valid", "test"): - data = [self.splits[split_type][k] for k in self.var_list] - else: - raise ValueError("Datatype {} Invalid. Must be one of 'train','valid','test'".format(split_type)) + if split_type not in self.splits: + raise ValueError(f"Split {split_type} Invalid. Current splits:{list(self.splits.keys())}") + + data = [self.splits[split_type][k] for k in self.var_list] if evaluation_mode == "train": if split_type != "train": @@ -205,7 +205,7 @@ def make_generator(self, split_type, evaluation_mode, batch_size=None, subsample elif evaluation_mode == "eval": shuffle = False else: - raise ValueError("Evaluation_mode ({}) must be one of 'train' or 'eval'") + raise ValueError(f"Evaluation_mode ({evaluation_mode}) must be one of 'train' or 'eval'") dataset = NamedTensorDataset(self.var_list, *data) if subsample: