From 5116cbcd09875ad4eb376e9214ce031843015666 Mon Sep 17 00:00:00 2001 From: Steven T Date: Sun, 22 Sep 2019 18:47:13 -0400 Subject: [PATCH 01/15] Added unit-tested string and dictionary methods for GPs --- flare/gp.py | 89 +++++++++++++++++++++++++++++++++++++++++++----- tests/test_gp.py | 52 ++++++++++++++++++++++++++-- 2 files changed, 130 insertions(+), 11 deletions(-) diff --git a/flare/gp.py b/flare/gp.py index 6b3e3b560..dcefa8aa9 100644 --- a/flare/gp.py +++ b/flare/gp.py @@ -8,7 +8,9 @@ from flare.gp_algebra import get_ky_mat, get_ky_and_hyp, \ get_like_from_ky_mat, get_like_grad_from_mats, get_neg_likelihood, \ get_neg_like_grad, get_ky_and_hyp_par - +from flare.kernels import str_to_kernel +import json as json +from flare.util import NumpyEncoder class GaussianProcess: """ Gaussian Process Regression Model. @@ -185,12 +187,6 @@ def predict(self, x_t: AtomicEnvironment, d: int) -> [float, float]: pred_var = self_kern - \ np.matmul(np.matmul(k_v, self.ky_mat_inv), k_v) - # # get predictive variance (possibly slow) - # v_vec = solve_triangular(self.l_mat, k_v, lower=True) - # self_kern = self.kernel(x_t, x_t, self.bodies, d, d, self.hyps, - # self.cutoffs) - # pred_var = self_kern - np.matmul(v_vec, v_vec) - return pred_mean, pred_var def predict_local_energy(self, x_t: AtomicEnvironment) -> float: @@ -292,8 +288,8 @@ def set_L_alpha(self): self.ky_mat_inv = ky_mat_inv self.l_mat_inv = l_mat_inv - self.like = like - self.like_grad = like_grad + self.likelihood = like + self.likelihood_gradient = like_grad def update_L_alpha(self): """ @@ -331,3 +327,78 @@ def update_L_alpha(self): self.alpha = alpha self.ky_mat_inv = ky_mat_inv self.l_mat_inv = l_mat_inv + + def __str__(self): + + thestr = "GaussianProcess Object\n" + thestr += 'Kernel: {}\n'.format(self.kernel_name) + thestr += "Training points: {}\n".format(len(self.training_data)) + thestr += 'Cutoffs: {}\n'.format(self.cutoffs) + thestr += 'Model Likelihood: {}\n'.format(self.likelihood) + + thestr += 'Hyperparameters: \n' + if self.hyp_labels is None: + # Put unlabeled hyperparameters on one line + thestr = thestr[:-1] + thestr += str(self.hyps)+'\n' + else: + for hyp, label in zip(self.hyps, self.hyp_labels): + thestr += "{}: {}\n".format(label, hyp) + + return thestr + + def as_dict(self): + + out_dict = dict(vars(self)) + + out_dict['training_data'] = [env.as_dict() for env in + self.training_data] + # Remove the callables + del out_dict['kernel'] + del out_dict['kernel_grad'] + + return out_dict + + @staticmethod + def from_dict(dictionary): + + force_kernel, grad = str_to_kernel(dictionary['kernel_name'], + include_grad=True) + + if dictionary['energy_kernel'] is not None: + energy_kernel = str_to_kernel(dictionary['energy_kernel']) + else: + energy_kernel = None + + if dictionary['energy_force_kernel'] is not None: + energy_force_kernel = str_to_kernel(dictionary[ + 'energy_force_kernel']) + else: + energy_force_kernel = None + + new_gp = GaussianProcess(kernel=force_kernel, + kernel_grad=grad, + energy_kernel=energy_kernel, + energy_force_kernel=energy_force_kernel, + cutoffs=np.array(dictionary['cutoffs']), + hyps=np.array(dictionary['hyps']), + hyp_labels=dictionary['hyp_labels'], + par=dictionary['par'], + maxiter=dictionary['maxiter'], + opt_algorithm=dictionary['algo']) + + # Save time by attempting to load in computed attributes + new_gp.l_mat = np.array(dictionary.get('l_mat', None)) + new_gp.l_mat_inv = np.array(dictionary.get('l_mat_inv', None)) + new_gp.alpha = np.array(dictionary.get('alpha', None)) + new_gp.ky_mat = np.array(dictionary.get('ky_mat', None)) + new_gp.ky_mat_inv = np.array(dictionary.get('ky_mat_inv', None)) + + new_gp.training_data = [AtomicEnvironment.from_dict(env) for env in + dictionary['training_data']] + new_gp.training_labels = dictionary['training_labels'] + + new_gp.likelihood = dictionary['likelihood'] + new_gp.likelihood_gradient = dictionary['likelihood_gradient'] + + return new_gp \ No newline at end of file diff --git a/tests/test_gp.py b/tests/test_gp.py index edd40f5f5..e091e35d3 100644 --- a/tests/test_gp.py +++ b/tests/test_gp.py @@ -35,7 +35,7 @@ def get_random_structure(cell, unique_species, noa): # set the scope to module so it will only be setup once @pytest.fixture(scope='module') -def two_body_gp(): +def two_body_gp()->GaussianProcess: """Returns a GP instance with a two-body numba-based kernel""" print("\nSetting up...\n") @@ -55,6 +55,7 @@ def two_body_gp(): GaussianProcess(kernel=en.three_body, kernel_grad=en.three_body_grad, hyps=np.array([1, 1, 1]), + hyp_labels=['Length', 'Signal Var.', 'Noise Var.'], cutoffs=cutoffs) gaussian.update_db(test_structure, forces) @@ -79,7 +80,7 @@ def params(): @pytest.fixture(scope='module') -def test_point(): +def test_point()->AtomicEnvironment: """Create test point for kernel to compare against""" # params cell = np.eye(3) @@ -212,3 +213,50 @@ def test_update_L_alpha(): ky_mat_from_set = np.copy(gp_model.ky_mat) assert (np.all(np.absolute(ky_mat_from_update-ky_mat_from_set)) < 1e-6) + +def test_representation_method(two_body_gp): + + the_str = str(two_body_gp) + assert 'GaussianProcess Object' in the_str + assert 'Kernel: three_body' in the_str + assert 'Cutoffs: [0.8 0.8]' in the_str + assert 'Model Likelihood: ' in the_str + assert 'Length: 1' in the_str + assert 'Signal Var.: 1' in the_str + assert "Noise Var.: 1" in the_str + + +def test_serialization_method(two_body_gp,test_point): + """ + Serialize and then un-serialize a GP and ensure that no info was lost. + Compare one calculation to ensure predictions work correctly. + :param two_body_gp: + :return: + """ + old_gp_dict = two_body_gp.as_dict() + new_gp = GaussianProcess.from_dict(old_gp_dict) + new_gp_dict = new_gp.as_dict() + + assert len(new_gp_dict) == len(old_gp_dict) + + for k1, k2 in zip(sorted(new_gp_dict.keys()), sorted(old_gp_dict.keys())): + + x = new_gp_dict[k1] + y = new_gp_dict[k2] + + if isinstance(x, np.ndarray): + assert np.equal(x, y).all() + elif hasattr(x, '__len__'): + + if isinstance(x[0], np.ndarray): + assert np.equal(x, y).all() + + else: + for xx, yy in zip(x, y): + assert xx == yy + else: + assert x == y + + for d in [0, 1, 2]: + assert np.all(two_body_gp.predict(x_t=test_point, d=d) == + new_gp.predict(x_t=test_point, d=d)) \ No newline at end of file From 956e3a38199f757abfde0398cdde9f533608ab0f Mon Sep 17 00:00:00 2001 From: Steven T Date: Sun, 22 Sep 2019 18:47:34 -0400 Subject: [PATCH 02/15] Addded string-to-kernel helper functions to facilitate GPs from Dicts --- flare/kernels.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/flare/kernels.py b/flare/kernels.py index 888e8515b..7171dfbe6 100644 --- a/flare/kernels.py +++ b/flare/kernels.py @@ -869,3 +869,34 @@ def triplet_force_en_kernel(ci1, ci2, ri1, ri2, ri3, rj1, rj2, rj3, print(kern_finite_diff) print(kern_analytical) assert(np.isclose(kern_finite_diff, kern_analytical, atol=tol)) + +_str_to_kernel = {'two_body': two_body, + 'two_body_en': two_body_en, + 'two_body_force_en': two_body_force_en, + 'three_body': three_body, + 'three_body_en': three_body_en, + 'three_body_force_en': three_body_force_en, + 'two_plus_three_body': two_plus_three_body, + 'two_plus_three_en': two_plus_three_en, + 'two_plus_three_force_en': two_plus_three_force_en + } + + +def str_to_kernel(string: str, include_grad: bool=False): + + if string not in _str_to_kernel.keys(): + raise ValueError("Kernel {} not found in list of available " + "kernels{}:".format(string,_str_to_kernel.keys())) + + if not include_grad: + return _str_to_kernel[string] + else: + if 'two' in string and 'three' in string: + return _str_to_kernel[string], two_plus_three_body_grad + elif 'two' in string and 'three' not in string: + return _str_to_kernel[string], two_body_grad + elif 'two' not in string and 'three' in string: + return _str_to_kernel[string], three_body_grad + else: + raise ValueError("Gradient callable for {} not found".format( + string)) From 1f626ea5f62e45e7c994c611e872dd0587fc08ec Mon Sep 17 00:00:00 2001 From: Steven T Date: Sun, 22 Sep 2019 18:48:04 -0400 Subject: [PATCH 03/15] Unit Tests for GP serialization / representation --- tests/test_gp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_gp.py b/tests/test_gp.py index e091e35d3..f201cbdee 100644 --- a/tests/test_gp.py +++ b/tests/test_gp.py @@ -221,9 +221,9 @@ def test_representation_method(two_body_gp): assert 'Kernel: three_body' in the_str assert 'Cutoffs: [0.8 0.8]' in the_str assert 'Model Likelihood: ' in the_str - assert 'Length: 1' in the_str - assert 'Signal Var.: 1' in the_str - assert "Noise Var.: 1" in the_str + assert 'Length: ' in the_str + assert 'Signal Var.: ' in the_str + assert "Noise Var.: " in the_str def test_serialization_method(two_body_gp,test_point): From f4242da3ffa775cda849f38b35dde7965e9c9934 Mon Sep 17 00:00:00 2001 From: Steven T Date: Sun, 22 Sep 2019 18:55:12 -0400 Subject: [PATCH 04/15] Improved PEP8 compliance --- flare/gp.py | 33 ++++++++++++++++++++------------- flare/util.py | 3 ++- tests/test_gp.py | 34 +++++++++++++++------------------- tests/test_util.py | 2 +- 4 files changed, 38 insertions(+), 34 deletions(-) diff --git a/flare/gp.py b/flare/gp.py index dcefa8aa9..0399942a6 100644 --- a/flare/gp.py +++ b/flare/gp.py @@ -9,8 +9,7 @@ get_like_from_ky_mat, get_like_grad_from_mats, get_neg_likelihood, \ get_neg_like_grad, get_ky_and_hyp_par from flare.kernels import str_to_kernel -import json as json -from flare.util import NumpyEncoder + class GaussianProcess: """ Gaussian Process Regression Model. @@ -38,17 +37,23 @@ def __init__(self, kernel: Callable, self.hyp_labels = hyp_labels self.cutoffs = cutoffs self.algo = opt_algorithm - self.l_mat = None - self.alpha = None + self.training_data = [] self.training_labels = [] self.training_labels_np = np.empty(0, ) self.maxiter = maxiter - self.likelihood = None - self.likelihood_gradient = None self.par = par self.output = output + # Parameters set during training + self.ky_mat = None + self.l_mat = None + self.alpha = None + self.ky_mat_inv = None + self.l_mat_inv = None + self.likelihood = None + self.likelihood_gradient = None + # TODO unit test custom range def update_db(self, struc: Structure, forces: list, custom_range: List[int] = ()): @@ -130,6 +135,7 @@ def train(self, output=None, custom_bounds=None, args = (self.training_data, self.training_labels_np, self.kernel_grad, self.cutoffs, output, self.par) + res = None if self.algo == 'L-BFGS-B': @@ -168,7 +174,8 @@ def train(self, output=None, custom_bounds=None, options={'disp': False, 'maxiter': self.maxiter, 'xtol': x_tol}) - + if res is None: + raise RuntimeError("Optimization failed for some reason.") self.hyps = res.x self.set_L_alpha() self.likelihood = -res.fun @@ -340,7 +347,7 @@ def __str__(self): if self.hyp_labels is None: # Put unlabeled hyperparameters on one line thestr = thestr[:-1] - thestr += str(self.hyps)+'\n' + thestr += str(self.hyps) + '\n' else: for hyp, label in zip(self.hyps, self.hyp_labels): thestr += "{}: {}\n".format(label, hyp) @@ -352,7 +359,7 @@ def as_dict(self): out_dict = dict(vars(self)) out_dict['training_data'] = [env.as_dict() for env in - self.training_data] + self.training_data] # Remove the callables del out_dict['kernel'] del out_dict['kernel_grad'] @@ -363,7 +370,7 @@ def as_dict(self): def from_dict(dictionary): force_kernel, grad = str_to_kernel(dictionary['kernel_name'], - include_grad=True) + include_grad=True) if dictionary['energy_kernel'] is not None: energy_kernel = str_to_kernel(dictionary['energy_kernel']) @@ -372,7 +379,7 @@ def from_dict(dictionary): if dictionary['energy_force_kernel'] is not None: energy_force_kernel = str_to_kernel(dictionary[ - 'energy_force_kernel']) + 'energy_force_kernel']) else: energy_force_kernel = None @@ -395,10 +402,10 @@ def from_dict(dictionary): new_gp.ky_mat_inv = np.array(dictionary.get('ky_mat_inv', None)) new_gp.training_data = [AtomicEnvironment.from_dict(env) for env in - dictionary['training_data']] + dictionary['training_data']] new_gp.training_labels = dictionary['training_labels'] new_gp.likelihood = dictionary['likelihood'] new_gp.likelihood_gradient = dictionary['likelihood_gradient'] - return new_gp \ No newline at end of file + return new_gp diff --git a/flare/util.py b/flare/util.py index 5ffd64b5f..b6e26ce6d 100644 --- a/flare/util.py +++ b/flare/util.py @@ -128,7 +128,8 @@ _Z_to_element = {z: elt for elt, z in _element_to_Z.items()} -def element_to_Z(element:str)->int: + +def element_to_Z(element: str)->int: """ Returns the atomic number Z associated with an elements 1-2 letter name. Returns the same integer if an integer is passed in. diff --git a/tests/test_gp.py b/tests/test_gp.py index f201cbdee..b014a74ff 100644 --- a/tests/test_gp.py +++ b/tests/test_gp.py @@ -1,6 +1,5 @@ import pytest import numpy as np -import sys from flare.gp import GaussianProcess from flare.env import AtomicEnvironment from flare.struc import Structure @@ -35,14 +34,13 @@ def get_random_structure(cell, unique_species, noa): # set the scope to module so it will only be setup once @pytest.fixture(scope='module') -def two_body_gp()->GaussianProcess: +def two_body_gp() -> GaussianProcess: """Returns a GP instance with a two-body numba-based kernel""" print("\nSetting up...\n") # params cell = np.eye(3) unique_species = [2, 1] - cutoff = 0.8 cutoffs = np.array([0.8, 0.8]) noa = 5 @@ -80,7 +78,7 @@ def params(): @pytest.fixture(scope='module') -def test_point()->AtomicEnvironment: +def test_point() -> AtomicEnvironment: """Create test point for kernel to compare against""" # params cell = np.eye(3) @@ -149,8 +147,6 @@ def test_set_L_alpha(two_body_gp, params): # params cell = np.eye(3) unique_species = [2, 1] - cutoff = 0.8 - cutoffs = np.array([0.8, 0.8]) noa = 2 # create test structure @@ -160,8 +156,8 @@ def test_set_L_alpha(two_body_gp, params): # set gp model kernel = en.two_plus_three_body kernel_grad = en.two_plus_three_body_grad - hyps = np.array([2.23751151e-01, 8.19990316e-01, 1.28421842e-04, - 1.07467158e+00, 5.50677932e-02]) + hyps = np.array([2.23751151e-01, 8.19990316e-01, 1.28421842e-04, + 1.07467158e+00, 5.50677932e-02]) cutoffs = np.array([5.4, 5.4]) hyp_labels = ['sig2', 'ls2', 'sig3', 'ls3', 'noise'] energy_force_kernel = en.two_plus_three_force_en @@ -184,19 +180,19 @@ def test_update_L_alpha(): kernel_grad = mc_simple.two_plus_three_body_mc_grad cutoffs = [6.0, 5.0] hyps = np.array([0.001770, 0.183868, -0.001415, 0.372588, 0.026315]) - + # get an otf traj from file for training data old_otf = OtfAnalysis('test_files/AgI_snippet.out') - call_no = 1 + call_no = 1 cell = old_otf.header['cell'] gp_model = old_otf.make_gp(kernel=kernel, kernel_grad=kernel_grad, - call_no=call_no, - cutoffs=cutoffs, + call_no=call_no, + cutoffs=cutoffs, hyps=hyps) - + # update database & use update_L_alpha to get ky_mat - for n in range(call_no, call_no+1): + for n in range(call_no, call_no + 1): positions = old_otf.gp_position_list[n] species = old_otf.gp_species_list[n] atoms = old_otf.gp_atom_list[n] @@ -211,11 +207,11 @@ def test_update_L_alpha(): # use set_L_alpha to get ky_mat gp_model.set_L_alpha() ky_mat_from_set = np.copy(gp_model.ky_mat) - - assert (np.all(np.absolute(ky_mat_from_update-ky_mat_from_set)) < 1e-6) -def test_representation_method(two_body_gp): + assert (np.all(np.absolute(ky_mat_from_update - ky_mat_from_set)) < 1e-6) + +def test_representation_method(two_body_gp): the_str = str(two_body_gp) assert 'GaussianProcess Object' in the_str assert 'Kernel: three_body' in the_str @@ -226,7 +222,7 @@ def test_representation_method(two_body_gp): assert "Noise Var.: " in the_str -def test_serialization_method(two_body_gp,test_point): +def test_serialization_method(two_body_gp, test_point): """ Serialize and then un-serialize a GP and ensure that no info was lost. Compare one calculation to ensure predictions work correctly. @@ -259,4 +255,4 @@ def test_serialization_method(two_body_gp,test_point): for d in [0, 1, 2]: assert np.all(two_body_gp.predict(x_t=test_point, d=d) == - new_gp.predict(x_t=test_point, d=d)) \ No newline at end of file + new_gp.predict(x_t=test_point, d=d)) diff --git a/tests/test_util.py b/tests/test_util.py index 0b149e4fe..a1f8e3126 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -23,7 +23,7 @@ def test_elt_warning(): def test_Z_to_element(): for i in range(1,118): - assert isinstance(Z_to_element(i),str) + assert isinstance(Z_to_element(i), str) for pair in zip([1, 6, '8', '118'], ['H', 'C', 'O', 'Og']): assert Z_to_element(pair[0]) == pair[1] From a102a747d6450e0924b28733643f06b6c089cc04 Mon Sep 17 00:00:00 2001 From: Steven T Date: Sun, 22 Sep 2019 18:59:37 -0400 Subject: [PATCH 05/15] Revert "Improved PEP8 compliance" This reverts commit f4242da3ffa775cda849f38b35dde7965e9c9934. --- flare/gp.py | 33 +++++++++++++-------------------- flare/util.py | 3 +-- tests/test_gp.py | 34 +++++++++++++++++++--------------- tests/test_util.py | 2 +- 4 files changed, 34 insertions(+), 38 deletions(-) diff --git a/flare/gp.py b/flare/gp.py index 0399942a6..dcefa8aa9 100644 --- a/flare/gp.py +++ b/flare/gp.py @@ -9,7 +9,8 @@ get_like_from_ky_mat, get_like_grad_from_mats, get_neg_likelihood, \ get_neg_like_grad, get_ky_and_hyp_par from flare.kernels import str_to_kernel - +import json as json +from flare.util import NumpyEncoder class GaussianProcess: """ Gaussian Process Regression Model. @@ -37,22 +38,16 @@ def __init__(self, kernel: Callable, self.hyp_labels = hyp_labels self.cutoffs = cutoffs self.algo = opt_algorithm - + self.l_mat = None + self.alpha = None self.training_data = [] self.training_labels = [] self.training_labels_np = np.empty(0, ) self.maxiter = maxiter - self.par = par - self.output = output - - # Parameters set during training - self.ky_mat = None - self.l_mat = None - self.alpha = None - self.ky_mat_inv = None - self.l_mat_inv = None self.likelihood = None self.likelihood_gradient = None + self.par = par + self.output = output # TODO unit test custom range def update_db(self, struc: Structure, forces: list, @@ -135,7 +130,6 @@ def train(self, output=None, custom_bounds=None, args = (self.training_data, self.training_labels_np, self.kernel_grad, self.cutoffs, output, self.par) - res = None if self.algo == 'L-BFGS-B': @@ -174,8 +168,7 @@ def train(self, output=None, custom_bounds=None, options={'disp': False, 'maxiter': self.maxiter, 'xtol': x_tol}) - if res is None: - raise RuntimeError("Optimization failed for some reason.") + self.hyps = res.x self.set_L_alpha() self.likelihood = -res.fun @@ -347,7 +340,7 @@ def __str__(self): if self.hyp_labels is None: # Put unlabeled hyperparameters on one line thestr = thestr[:-1] - thestr += str(self.hyps) + '\n' + thestr += str(self.hyps)+'\n' else: for hyp, label in zip(self.hyps, self.hyp_labels): thestr += "{}: {}\n".format(label, hyp) @@ -359,7 +352,7 @@ def as_dict(self): out_dict = dict(vars(self)) out_dict['training_data'] = [env.as_dict() for env in - self.training_data] + self.training_data] # Remove the callables del out_dict['kernel'] del out_dict['kernel_grad'] @@ -370,7 +363,7 @@ def as_dict(self): def from_dict(dictionary): force_kernel, grad = str_to_kernel(dictionary['kernel_name'], - include_grad=True) + include_grad=True) if dictionary['energy_kernel'] is not None: energy_kernel = str_to_kernel(dictionary['energy_kernel']) @@ -379,7 +372,7 @@ def from_dict(dictionary): if dictionary['energy_force_kernel'] is not None: energy_force_kernel = str_to_kernel(dictionary[ - 'energy_force_kernel']) + 'energy_force_kernel']) else: energy_force_kernel = None @@ -402,10 +395,10 @@ def from_dict(dictionary): new_gp.ky_mat_inv = np.array(dictionary.get('ky_mat_inv', None)) new_gp.training_data = [AtomicEnvironment.from_dict(env) for env in - dictionary['training_data']] + dictionary['training_data']] new_gp.training_labels = dictionary['training_labels'] new_gp.likelihood = dictionary['likelihood'] new_gp.likelihood_gradient = dictionary['likelihood_gradient'] - return new_gp + return new_gp \ No newline at end of file diff --git a/flare/util.py b/flare/util.py index b6e26ce6d..5ffd64b5f 100644 --- a/flare/util.py +++ b/flare/util.py @@ -128,8 +128,7 @@ _Z_to_element = {z: elt for elt, z in _element_to_Z.items()} - -def element_to_Z(element: str)->int: +def element_to_Z(element:str)->int: """ Returns the atomic number Z associated with an elements 1-2 letter name. Returns the same integer if an integer is passed in. diff --git a/tests/test_gp.py b/tests/test_gp.py index b014a74ff..f201cbdee 100644 --- a/tests/test_gp.py +++ b/tests/test_gp.py @@ -1,5 +1,6 @@ import pytest import numpy as np +import sys from flare.gp import GaussianProcess from flare.env import AtomicEnvironment from flare.struc import Structure @@ -34,13 +35,14 @@ def get_random_structure(cell, unique_species, noa): # set the scope to module so it will only be setup once @pytest.fixture(scope='module') -def two_body_gp() -> GaussianProcess: +def two_body_gp()->GaussianProcess: """Returns a GP instance with a two-body numba-based kernel""" print("\nSetting up...\n") # params cell = np.eye(3) unique_species = [2, 1] + cutoff = 0.8 cutoffs = np.array([0.8, 0.8]) noa = 5 @@ -78,7 +80,7 @@ def params(): @pytest.fixture(scope='module') -def test_point() -> AtomicEnvironment: +def test_point()->AtomicEnvironment: """Create test point for kernel to compare against""" # params cell = np.eye(3) @@ -147,6 +149,8 @@ def test_set_L_alpha(two_body_gp, params): # params cell = np.eye(3) unique_species = [2, 1] + cutoff = 0.8 + cutoffs = np.array([0.8, 0.8]) noa = 2 # create test structure @@ -156,8 +160,8 @@ def test_set_L_alpha(two_body_gp, params): # set gp model kernel = en.two_plus_three_body kernel_grad = en.two_plus_three_body_grad - hyps = np.array([2.23751151e-01, 8.19990316e-01, 1.28421842e-04, - 1.07467158e+00, 5.50677932e-02]) + hyps = np.array([2.23751151e-01, 8.19990316e-01, 1.28421842e-04, + 1.07467158e+00, 5.50677932e-02]) cutoffs = np.array([5.4, 5.4]) hyp_labels = ['sig2', 'ls2', 'sig3', 'ls3', 'noise'] energy_force_kernel = en.two_plus_three_force_en @@ -180,19 +184,19 @@ def test_update_L_alpha(): kernel_grad = mc_simple.two_plus_three_body_mc_grad cutoffs = [6.0, 5.0] hyps = np.array([0.001770, 0.183868, -0.001415, 0.372588, 0.026315]) - + # get an otf traj from file for training data old_otf = OtfAnalysis('test_files/AgI_snippet.out') - call_no = 1 + call_no = 1 cell = old_otf.header['cell'] gp_model = old_otf.make_gp(kernel=kernel, kernel_grad=kernel_grad, - call_no=call_no, - cutoffs=cutoffs, + call_no=call_no, + cutoffs=cutoffs, hyps=hyps) - + # update database & use update_L_alpha to get ky_mat - for n in range(call_no, call_no + 1): + for n in range(call_no, call_no+1): positions = old_otf.gp_position_list[n] species = old_otf.gp_species_list[n] atoms = old_otf.gp_atom_list[n] @@ -207,11 +211,11 @@ def test_update_L_alpha(): # use set_L_alpha to get ky_mat gp_model.set_L_alpha() ky_mat_from_set = np.copy(gp_model.ky_mat) - - assert (np.all(np.absolute(ky_mat_from_update - ky_mat_from_set)) < 1e-6) - + + assert (np.all(np.absolute(ky_mat_from_update-ky_mat_from_set)) < 1e-6) def test_representation_method(two_body_gp): + the_str = str(two_body_gp) assert 'GaussianProcess Object' in the_str assert 'Kernel: three_body' in the_str @@ -222,7 +226,7 @@ def test_representation_method(two_body_gp): assert "Noise Var.: " in the_str -def test_serialization_method(two_body_gp, test_point): +def test_serialization_method(two_body_gp,test_point): """ Serialize and then un-serialize a GP and ensure that no info was lost. Compare one calculation to ensure predictions work correctly. @@ -255,4 +259,4 @@ def test_serialization_method(two_body_gp, test_point): for d in [0, 1, 2]: assert np.all(two_body_gp.predict(x_t=test_point, d=d) == - new_gp.predict(x_t=test_point, d=d)) + new_gp.predict(x_t=test_point, d=d)) \ No newline at end of file diff --git a/tests/test_util.py b/tests/test_util.py index a1f8e3126..0b149e4fe 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -23,7 +23,7 @@ def test_elt_warning(): def test_Z_to_element(): for i in range(1,118): - assert isinstance(Z_to_element(i), str) + assert isinstance(Z_to_element(i),str) for pair in zip([1, 6, '8', '118'], ['H', 'C', 'O', 'Og']): assert Z_to_element(pair[0]) == pair[1] From 1e6fb32699a1a087910f7eefbe2830538d82d74d Mon Sep 17 00:00:00 2001 From: Steven T Date: Sun, 22 Sep 2019 18:59:47 -0400 Subject: [PATCH 06/15] Revert "Unit Tests for GP serialization / representation" This reverts commit 1f626ea5f62e45e7c994c611e872dd0587fc08ec. --- tests/test_gp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_gp.py b/tests/test_gp.py index f201cbdee..e091e35d3 100644 --- a/tests/test_gp.py +++ b/tests/test_gp.py @@ -221,9 +221,9 @@ def test_representation_method(two_body_gp): assert 'Kernel: three_body' in the_str assert 'Cutoffs: [0.8 0.8]' in the_str assert 'Model Likelihood: ' in the_str - assert 'Length: ' in the_str - assert 'Signal Var.: ' in the_str - assert "Noise Var.: " in the_str + assert 'Length: 1' in the_str + assert 'Signal Var.: 1' in the_str + assert "Noise Var.: 1" in the_str def test_serialization_method(two_body_gp,test_point): From 781a074d0756ba460cf3aa62412989001f8f3c91 Mon Sep 17 00:00:00 2001 From: Steven T Date: Sun, 22 Sep 2019 18:59:56 -0400 Subject: [PATCH 07/15] Revert "Addded string-to-kernel helper functions to facilitate GPs from Dicts" This reverts commit 956e3a38199f757abfde0398cdde9f533608ab0f. --- flare/kernels.py | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/flare/kernels.py b/flare/kernels.py index 7171dfbe6..888e8515b 100644 --- a/flare/kernels.py +++ b/flare/kernels.py @@ -869,34 +869,3 @@ def triplet_force_en_kernel(ci1, ci2, ri1, ri2, ri3, rj1, rj2, rj3, print(kern_finite_diff) print(kern_analytical) assert(np.isclose(kern_finite_diff, kern_analytical, atol=tol)) - -_str_to_kernel = {'two_body': two_body, - 'two_body_en': two_body_en, - 'two_body_force_en': two_body_force_en, - 'three_body': three_body, - 'three_body_en': three_body_en, - 'three_body_force_en': three_body_force_en, - 'two_plus_three_body': two_plus_three_body, - 'two_plus_three_en': two_plus_three_en, - 'two_plus_three_force_en': two_plus_three_force_en - } - - -def str_to_kernel(string: str, include_grad: bool=False): - - if string not in _str_to_kernel.keys(): - raise ValueError("Kernel {} not found in list of available " - "kernels{}:".format(string,_str_to_kernel.keys())) - - if not include_grad: - return _str_to_kernel[string] - else: - if 'two' in string and 'three' in string: - return _str_to_kernel[string], two_plus_three_body_grad - elif 'two' in string and 'three' not in string: - return _str_to_kernel[string], two_body_grad - elif 'two' not in string and 'three' in string: - return _str_to_kernel[string], three_body_grad - else: - raise ValueError("Gradient callable for {} not found".format( - string)) From d990bcc9d8928def1b0c3c29181f1b292be31c49 Mon Sep 17 00:00:00 2001 From: Steven T Date: Sun, 22 Sep 2019 19:00:01 -0400 Subject: [PATCH 08/15] Revert "Added unit-tested string and dictionary methods for GPs" This reverts commit 5116cbcd09875ad4eb376e9214ce031843015666. --- flare/gp.py | 89 +++++------------------------------------------- tests/test_gp.py | 52 ++-------------------------- 2 files changed, 11 insertions(+), 130 deletions(-) diff --git a/flare/gp.py b/flare/gp.py index dcefa8aa9..6b3e3b560 100644 --- a/flare/gp.py +++ b/flare/gp.py @@ -8,9 +8,7 @@ from flare.gp_algebra import get_ky_mat, get_ky_and_hyp, \ get_like_from_ky_mat, get_like_grad_from_mats, get_neg_likelihood, \ get_neg_like_grad, get_ky_and_hyp_par -from flare.kernels import str_to_kernel -import json as json -from flare.util import NumpyEncoder + class GaussianProcess: """ Gaussian Process Regression Model. @@ -187,6 +185,12 @@ def predict(self, x_t: AtomicEnvironment, d: int) -> [float, float]: pred_var = self_kern - \ np.matmul(np.matmul(k_v, self.ky_mat_inv), k_v) + # # get predictive variance (possibly slow) + # v_vec = solve_triangular(self.l_mat, k_v, lower=True) + # self_kern = self.kernel(x_t, x_t, self.bodies, d, d, self.hyps, + # self.cutoffs) + # pred_var = self_kern - np.matmul(v_vec, v_vec) + return pred_mean, pred_var def predict_local_energy(self, x_t: AtomicEnvironment) -> float: @@ -288,8 +292,8 @@ def set_L_alpha(self): self.ky_mat_inv = ky_mat_inv self.l_mat_inv = l_mat_inv - self.likelihood = like - self.likelihood_gradient = like_grad + self.like = like + self.like_grad = like_grad def update_L_alpha(self): """ @@ -327,78 +331,3 @@ def update_L_alpha(self): self.alpha = alpha self.ky_mat_inv = ky_mat_inv self.l_mat_inv = l_mat_inv - - def __str__(self): - - thestr = "GaussianProcess Object\n" - thestr += 'Kernel: {}\n'.format(self.kernel_name) - thestr += "Training points: {}\n".format(len(self.training_data)) - thestr += 'Cutoffs: {}\n'.format(self.cutoffs) - thestr += 'Model Likelihood: {}\n'.format(self.likelihood) - - thestr += 'Hyperparameters: \n' - if self.hyp_labels is None: - # Put unlabeled hyperparameters on one line - thestr = thestr[:-1] - thestr += str(self.hyps)+'\n' - else: - for hyp, label in zip(self.hyps, self.hyp_labels): - thestr += "{}: {}\n".format(label, hyp) - - return thestr - - def as_dict(self): - - out_dict = dict(vars(self)) - - out_dict['training_data'] = [env.as_dict() for env in - self.training_data] - # Remove the callables - del out_dict['kernel'] - del out_dict['kernel_grad'] - - return out_dict - - @staticmethod - def from_dict(dictionary): - - force_kernel, grad = str_to_kernel(dictionary['kernel_name'], - include_grad=True) - - if dictionary['energy_kernel'] is not None: - energy_kernel = str_to_kernel(dictionary['energy_kernel']) - else: - energy_kernel = None - - if dictionary['energy_force_kernel'] is not None: - energy_force_kernel = str_to_kernel(dictionary[ - 'energy_force_kernel']) - else: - energy_force_kernel = None - - new_gp = GaussianProcess(kernel=force_kernel, - kernel_grad=grad, - energy_kernel=energy_kernel, - energy_force_kernel=energy_force_kernel, - cutoffs=np.array(dictionary['cutoffs']), - hyps=np.array(dictionary['hyps']), - hyp_labels=dictionary['hyp_labels'], - par=dictionary['par'], - maxiter=dictionary['maxiter'], - opt_algorithm=dictionary['algo']) - - # Save time by attempting to load in computed attributes - new_gp.l_mat = np.array(dictionary.get('l_mat', None)) - new_gp.l_mat_inv = np.array(dictionary.get('l_mat_inv', None)) - new_gp.alpha = np.array(dictionary.get('alpha', None)) - new_gp.ky_mat = np.array(dictionary.get('ky_mat', None)) - new_gp.ky_mat_inv = np.array(dictionary.get('ky_mat_inv', None)) - - new_gp.training_data = [AtomicEnvironment.from_dict(env) for env in - dictionary['training_data']] - new_gp.training_labels = dictionary['training_labels'] - - new_gp.likelihood = dictionary['likelihood'] - new_gp.likelihood_gradient = dictionary['likelihood_gradient'] - - return new_gp \ No newline at end of file diff --git a/tests/test_gp.py b/tests/test_gp.py index e091e35d3..edd40f5f5 100644 --- a/tests/test_gp.py +++ b/tests/test_gp.py @@ -35,7 +35,7 @@ def get_random_structure(cell, unique_species, noa): # set the scope to module so it will only be setup once @pytest.fixture(scope='module') -def two_body_gp()->GaussianProcess: +def two_body_gp(): """Returns a GP instance with a two-body numba-based kernel""" print("\nSetting up...\n") @@ -55,7 +55,6 @@ def two_body_gp()->GaussianProcess: GaussianProcess(kernel=en.three_body, kernel_grad=en.three_body_grad, hyps=np.array([1, 1, 1]), - hyp_labels=['Length', 'Signal Var.', 'Noise Var.'], cutoffs=cutoffs) gaussian.update_db(test_structure, forces) @@ -80,7 +79,7 @@ def params(): @pytest.fixture(scope='module') -def test_point()->AtomicEnvironment: +def test_point(): """Create test point for kernel to compare against""" # params cell = np.eye(3) @@ -213,50 +212,3 @@ def test_update_L_alpha(): ky_mat_from_set = np.copy(gp_model.ky_mat) assert (np.all(np.absolute(ky_mat_from_update-ky_mat_from_set)) < 1e-6) - -def test_representation_method(two_body_gp): - - the_str = str(two_body_gp) - assert 'GaussianProcess Object' in the_str - assert 'Kernel: three_body' in the_str - assert 'Cutoffs: [0.8 0.8]' in the_str - assert 'Model Likelihood: ' in the_str - assert 'Length: 1' in the_str - assert 'Signal Var.: 1' in the_str - assert "Noise Var.: 1" in the_str - - -def test_serialization_method(two_body_gp,test_point): - """ - Serialize and then un-serialize a GP and ensure that no info was lost. - Compare one calculation to ensure predictions work correctly. - :param two_body_gp: - :return: - """ - old_gp_dict = two_body_gp.as_dict() - new_gp = GaussianProcess.from_dict(old_gp_dict) - new_gp_dict = new_gp.as_dict() - - assert len(new_gp_dict) == len(old_gp_dict) - - for k1, k2 in zip(sorted(new_gp_dict.keys()), sorted(old_gp_dict.keys())): - - x = new_gp_dict[k1] - y = new_gp_dict[k2] - - if isinstance(x, np.ndarray): - assert np.equal(x, y).all() - elif hasattr(x, '__len__'): - - if isinstance(x[0], np.ndarray): - assert np.equal(x, y).all() - - else: - for xx, yy in zip(x, y): - assert xx == yy - else: - assert x == y - - for d in [0, 1, 2]: - assert np.all(two_body_gp.predict(x_t=test_point, d=d) == - new_gp.predict(x_t=test_point, d=d)) \ No newline at end of file From 344c801cb02d34f694ffb48bce8f50a4c04d7c22 Mon Sep 17 00:00:00 2001 From: Steven T Date: Sun, 22 Sep 2019 19:09:34 -0400 Subject: [PATCH 09/15] Re-Added unit-tested string and dictionary methods for GPs"" This reverts commit d990bcc9d8928def1b0c3c29181f1b292be31c49. --- flare/gp.py | 89 +++++++++++++++++++++++++++++++++++++++++++----- tests/test_gp.py | 52 ++++++++++++++++++++++++++-- 2 files changed, 130 insertions(+), 11 deletions(-) diff --git a/flare/gp.py b/flare/gp.py index 6b3e3b560..dcefa8aa9 100644 --- a/flare/gp.py +++ b/flare/gp.py @@ -8,7 +8,9 @@ from flare.gp_algebra import get_ky_mat, get_ky_and_hyp, \ get_like_from_ky_mat, get_like_grad_from_mats, get_neg_likelihood, \ get_neg_like_grad, get_ky_and_hyp_par - +from flare.kernels import str_to_kernel +import json as json +from flare.util import NumpyEncoder class GaussianProcess: """ Gaussian Process Regression Model. @@ -185,12 +187,6 @@ def predict(self, x_t: AtomicEnvironment, d: int) -> [float, float]: pred_var = self_kern - \ np.matmul(np.matmul(k_v, self.ky_mat_inv), k_v) - # # get predictive variance (possibly slow) - # v_vec = solve_triangular(self.l_mat, k_v, lower=True) - # self_kern = self.kernel(x_t, x_t, self.bodies, d, d, self.hyps, - # self.cutoffs) - # pred_var = self_kern - np.matmul(v_vec, v_vec) - return pred_mean, pred_var def predict_local_energy(self, x_t: AtomicEnvironment) -> float: @@ -292,8 +288,8 @@ def set_L_alpha(self): self.ky_mat_inv = ky_mat_inv self.l_mat_inv = l_mat_inv - self.like = like - self.like_grad = like_grad + self.likelihood = like + self.likelihood_gradient = like_grad def update_L_alpha(self): """ @@ -331,3 +327,78 @@ def update_L_alpha(self): self.alpha = alpha self.ky_mat_inv = ky_mat_inv self.l_mat_inv = l_mat_inv + + def __str__(self): + + thestr = "GaussianProcess Object\n" + thestr += 'Kernel: {}\n'.format(self.kernel_name) + thestr += "Training points: {}\n".format(len(self.training_data)) + thestr += 'Cutoffs: {}\n'.format(self.cutoffs) + thestr += 'Model Likelihood: {}\n'.format(self.likelihood) + + thestr += 'Hyperparameters: \n' + if self.hyp_labels is None: + # Put unlabeled hyperparameters on one line + thestr = thestr[:-1] + thestr += str(self.hyps)+'\n' + else: + for hyp, label in zip(self.hyps, self.hyp_labels): + thestr += "{}: {}\n".format(label, hyp) + + return thestr + + def as_dict(self): + + out_dict = dict(vars(self)) + + out_dict['training_data'] = [env.as_dict() for env in + self.training_data] + # Remove the callables + del out_dict['kernel'] + del out_dict['kernel_grad'] + + return out_dict + + @staticmethod + def from_dict(dictionary): + + force_kernel, grad = str_to_kernel(dictionary['kernel_name'], + include_grad=True) + + if dictionary['energy_kernel'] is not None: + energy_kernel = str_to_kernel(dictionary['energy_kernel']) + else: + energy_kernel = None + + if dictionary['energy_force_kernel'] is not None: + energy_force_kernel = str_to_kernel(dictionary[ + 'energy_force_kernel']) + else: + energy_force_kernel = None + + new_gp = GaussianProcess(kernel=force_kernel, + kernel_grad=grad, + energy_kernel=energy_kernel, + energy_force_kernel=energy_force_kernel, + cutoffs=np.array(dictionary['cutoffs']), + hyps=np.array(dictionary['hyps']), + hyp_labels=dictionary['hyp_labels'], + par=dictionary['par'], + maxiter=dictionary['maxiter'], + opt_algorithm=dictionary['algo']) + + # Save time by attempting to load in computed attributes + new_gp.l_mat = np.array(dictionary.get('l_mat', None)) + new_gp.l_mat_inv = np.array(dictionary.get('l_mat_inv', None)) + new_gp.alpha = np.array(dictionary.get('alpha', None)) + new_gp.ky_mat = np.array(dictionary.get('ky_mat', None)) + new_gp.ky_mat_inv = np.array(dictionary.get('ky_mat_inv', None)) + + new_gp.training_data = [AtomicEnvironment.from_dict(env) for env in + dictionary['training_data']] + new_gp.training_labels = dictionary['training_labels'] + + new_gp.likelihood = dictionary['likelihood'] + new_gp.likelihood_gradient = dictionary['likelihood_gradient'] + + return new_gp \ No newline at end of file diff --git a/tests/test_gp.py b/tests/test_gp.py index edd40f5f5..e091e35d3 100644 --- a/tests/test_gp.py +++ b/tests/test_gp.py @@ -35,7 +35,7 @@ def get_random_structure(cell, unique_species, noa): # set the scope to module so it will only be setup once @pytest.fixture(scope='module') -def two_body_gp(): +def two_body_gp()->GaussianProcess: """Returns a GP instance with a two-body numba-based kernel""" print("\nSetting up...\n") @@ -55,6 +55,7 @@ def two_body_gp(): GaussianProcess(kernel=en.three_body, kernel_grad=en.three_body_grad, hyps=np.array([1, 1, 1]), + hyp_labels=['Length', 'Signal Var.', 'Noise Var.'], cutoffs=cutoffs) gaussian.update_db(test_structure, forces) @@ -79,7 +80,7 @@ def params(): @pytest.fixture(scope='module') -def test_point(): +def test_point()->AtomicEnvironment: """Create test point for kernel to compare against""" # params cell = np.eye(3) @@ -212,3 +213,50 @@ def test_update_L_alpha(): ky_mat_from_set = np.copy(gp_model.ky_mat) assert (np.all(np.absolute(ky_mat_from_update-ky_mat_from_set)) < 1e-6) + +def test_representation_method(two_body_gp): + + the_str = str(two_body_gp) + assert 'GaussianProcess Object' in the_str + assert 'Kernel: three_body' in the_str + assert 'Cutoffs: [0.8 0.8]' in the_str + assert 'Model Likelihood: ' in the_str + assert 'Length: 1' in the_str + assert 'Signal Var.: 1' in the_str + assert "Noise Var.: 1" in the_str + + +def test_serialization_method(two_body_gp,test_point): + """ + Serialize and then un-serialize a GP and ensure that no info was lost. + Compare one calculation to ensure predictions work correctly. + :param two_body_gp: + :return: + """ + old_gp_dict = two_body_gp.as_dict() + new_gp = GaussianProcess.from_dict(old_gp_dict) + new_gp_dict = new_gp.as_dict() + + assert len(new_gp_dict) == len(old_gp_dict) + + for k1, k2 in zip(sorted(new_gp_dict.keys()), sorted(old_gp_dict.keys())): + + x = new_gp_dict[k1] + y = new_gp_dict[k2] + + if isinstance(x, np.ndarray): + assert np.equal(x, y).all() + elif hasattr(x, '__len__'): + + if isinstance(x[0], np.ndarray): + assert np.equal(x, y).all() + + else: + for xx, yy in zip(x, y): + assert xx == yy + else: + assert x == y + + for d in [0, 1, 2]: + assert np.all(two_body_gp.predict(x_t=test_point, d=d) == + new_gp.predict(x_t=test_point, d=d)) \ No newline at end of file From 39a031426b597cc5992e760e11c24f1c1a96d6f4 Mon Sep 17 00:00:00 2001 From: Steven T Date: Sun, 22 Sep 2019 19:09:46 -0400 Subject: [PATCH 10/15] Re-Addded string-to-kernel helper functions to facilitate GPs from Dicts"" This reverts commit 781a074d0756ba460cf3aa62412989001f8f3c91. --- flare/kernels.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/flare/kernels.py b/flare/kernels.py index 888e8515b..7171dfbe6 100644 --- a/flare/kernels.py +++ b/flare/kernels.py @@ -869,3 +869,34 @@ def triplet_force_en_kernel(ci1, ci2, ri1, ri2, ri3, rj1, rj2, rj3, print(kern_finite_diff) print(kern_analytical) assert(np.isclose(kern_finite_diff, kern_analytical, atol=tol)) + +_str_to_kernel = {'two_body': two_body, + 'two_body_en': two_body_en, + 'two_body_force_en': two_body_force_en, + 'three_body': three_body, + 'three_body_en': three_body_en, + 'three_body_force_en': three_body_force_en, + 'two_plus_three_body': two_plus_three_body, + 'two_plus_three_en': two_plus_three_en, + 'two_plus_three_force_en': two_plus_three_force_en + } + + +def str_to_kernel(string: str, include_grad: bool=False): + + if string not in _str_to_kernel.keys(): + raise ValueError("Kernel {} not found in list of available " + "kernels{}:".format(string,_str_to_kernel.keys())) + + if not include_grad: + return _str_to_kernel[string] + else: + if 'two' in string and 'three' in string: + return _str_to_kernel[string], two_plus_three_body_grad + elif 'two' in string and 'three' not in string: + return _str_to_kernel[string], two_body_grad + elif 'two' not in string and 'three' in string: + return _str_to_kernel[string], three_body_grad + else: + raise ValueError("Gradient callable for {} not found".format( + string)) From ee98fc3ad16a38858c458b4fe38794686ad0e5cb Mon Sep 17 00:00:00 2001 From: Steven T Date: Sun, 22 Sep 2019 19:09:57 -0400 Subject: [PATCH 11/15] Re-add Unit Tests for GP serialization / representation --- tests/test_gp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_gp.py b/tests/test_gp.py index e091e35d3..f201cbdee 100644 --- a/tests/test_gp.py +++ b/tests/test_gp.py @@ -221,9 +221,9 @@ def test_representation_method(two_body_gp): assert 'Kernel: three_body' in the_str assert 'Cutoffs: [0.8 0.8]' in the_str assert 'Model Likelihood: ' in the_str - assert 'Length: 1' in the_str - assert 'Signal Var.: 1' in the_str - assert "Noise Var.: 1" in the_str + assert 'Length: ' in the_str + assert 'Signal Var.: ' in the_str + assert "Noise Var.: " in the_str def test_serialization_method(two_body_gp,test_point): From 253b8a7dbc75a10d38e75b6dca5d6722ba6e9d87 Mon Sep 17 00:00:00 2001 From: Steven T Date: Sun, 22 Sep 2019 19:10:11 -0400 Subject: [PATCH 12/15] "Improved PEP8 compliance --- flare/gp.py | 33 ++++++++++++++++++++------------- flare/util.py | 3 ++- tests/test_gp.py | 34 +++++++++++++++------------------- tests/test_util.py | 2 +- 4 files changed, 38 insertions(+), 34 deletions(-) diff --git a/flare/gp.py b/flare/gp.py index dcefa8aa9..0399942a6 100644 --- a/flare/gp.py +++ b/flare/gp.py @@ -9,8 +9,7 @@ get_like_from_ky_mat, get_like_grad_from_mats, get_neg_likelihood, \ get_neg_like_grad, get_ky_and_hyp_par from flare.kernels import str_to_kernel -import json as json -from flare.util import NumpyEncoder + class GaussianProcess: """ Gaussian Process Regression Model. @@ -38,17 +37,23 @@ def __init__(self, kernel: Callable, self.hyp_labels = hyp_labels self.cutoffs = cutoffs self.algo = opt_algorithm - self.l_mat = None - self.alpha = None + self.training_data = [] self.training_labels = [] self.training_labels_np = np.empty(0, ) self.maxiter = maxiter - self.likelihood = None - self.likelihood_gradient = None self.par = par self.output = output + # Parameters set during training + self.ky_mat = None + self.l_mat = None + self.alpha = None + self.ky_mat_inv = None + self.l_mat_inv = None + self.likelihood = None + self.likelihood_gradient = None + # TODO unit test custom range def update_db(self, struc: Structure, forces: list, custom_range: List[int] = ()): @@ -130,6 +135,7 @@ def train(self, output=None, custom_bounds=None, args = (self.training_data, self.training_labels_np, self.kernel_grad, self.cutoffs, output, self.par) + res = None if self.algo == 'L-BFGS-B': @@ -168,7 +174,8 @@ def train(self, output=None, custom_bounds=None, options={'disp': False, 'maxiter': self.maxiter, 'xtol': x_tol}) - + if res is None: + raise RuntimeError("Optimization failed for some reason.") self.hyps = res.x self.set_L_alpha() self.likelihood = -res.fun @@ -340,7 +347,7 @@ def __str__(self): if self.hyp_labels is None: # Put unlabeled hyperparameters on one line thestr = thestr[:-1] - thestr += str(self.hyps)+'\n' + thestr += str(self.hyps) + '\n' else: for hyp, label in zip(self.hyps, self.hyp_labels): thestr += "{}: {}\n".format(label, hyp) @@ -352,7 +359,7 @@ def as_dict(self): out_dict = dict(vars(self)) out_dict['training_data'] = [env.as_dict() for env in - self.training_data] + self.training_data] # Remove the callables del out_dict['kernel'] del out_dict['kernel_grad'] @@ -363,7 +370,7 @@ def as_dict(self): def from_dict(dictionary): force_kernel, grad = str_to_kernel(dictionary['kernel_name'], - include_grad=True) + include_grad=True) if dictionary['energy_kernel'] is not None: energy_kernel = str_to_kernel(dictionary['energy_kernel']) @@ -372,7 +379,7 @@ def from_dict(dictionary): if dictionary['energy_force_kernel'] is not None: energy_force_kernel = str_to_kernel(dictionary[ - 'energy_force_kernel']) + 'energy_force_kernel']) else: energy_force_kernel = None @@ -395,10 +402,10 @@ def from_dict(dictionary): new_gp.ky_mat_inv = np.array(dictionary.get('ky_mat_inv', None)) new_gp.training_data = [AtomicEnvironment.from_dict(env) for env in - dictionary['training_data']] + dictionary['training_data']] new_gp.training_labels = dictionary['training_labels'] new_gp.likelihood = dictionary['likelihood'] new_gp.likelihood_gradient = dictionary['likelihood_gradient'] - return new_gp \ No newline at end of file + return new_gp diff --git a/flare/util.py b/flare/util.py index 5ffd64b5f..b6e26ce6d 100644 --- a/flare/util.py +++ b/flare/util.py @@ -128,7 +128,8 @@ _Z_to_element = {z: elt for elt, z in _element_to_Z.items()} -def element_to_Z(element:str)->int: + +def element_to_Z(element: str)->int: """ Returns the atomic number Z associated with an elements 1-2 letter name. Returns the same integer if an integer is passed in. diff --git a/tests/test_gp.py b/tests/test_gp.py index f201cbdee..b014a74ff 100644 --- a/tests/test_gp.py +++ b/tests/test_gp.py @@ -1,6 +1,5 @@ import pytest import numpy as np -import sys from flare.gp import GaussianProcess from flare.env import AtomicEnvironment from flare.struc import Structure @@ -35,14 +34,13 @@ def get_random_structure(cell, unique_species, noa): # set the scope to module so it will only be setup once @pytest.fixture(scope='module') -def two_body_gp()->GaussianProcess: +def two_body_gp() -> GaussianProcess: """Returns a GP instance with a two-body numba-based kernel""" print("\nSetting up...\n") # params cell = np.eye(3) unique_species = [2, 1] - cutoff = 0.8 cutoffs = np.array([0.8, 0.8]) noa = 5 @@ -80,7 +78,7 @@ def params(): @pytest.fixture(scope='module') -def test_point()->AtomicEnvironment: +def test_point() -> AtomicEnvironment: """Create test point for kernel to compare against""" # params cell = np.eye(3) @@ -149,8 +147,6 @@ def test_set_L_alpha(two_body_gp, params): # params cell = np.eye(3) unique_species = [2, 1] - cutoff = 0.8 - cutoffs = np.array([0.8, 0.8]) noa = 2 # create test structure @@ -160,8 +156,8 @@ def test_set_L_alpha(two_body_gp, params): # set gp model kernel = en.two_plus_three_body kernel_grad = en.two_plus_three_body_grad - hyps = np.array([2.23751151e-01, 8.19990316e-01, 1.28421842e-04, - 1.07467158e+00, 5.50677932e-02]) + hyps = np.array([2.23751151e-01, 8.19990316e-01, 1.28421842e-04, + 1.07467158e+00, 5.50677932e-02]) cutoffs = np.array([5.4, 5.4]) hyp_labels = ['sig2', 'ls2', 'sig3', 'ls3', 'noise'] energy_force_kernel = en.two_plus_three_force_en @@ -184,19 +180,19 @@ def test_update_L_alpha(): kernel_grad = mc_simple.two_plus_three_body_mc_grad cutoffs = [6.0, 5.0] hyps = np.array([0.001770, 0.183868, -0.001415, 0.372588, 0.026315]) - + # get an otf traj from file for training data old_otf = OtfAnalysis('test_files/AgI_snippet.out') - call_no = 1 + call_no = 1 cell = old_otf.header['cell'] gp_model = old_otf.make_gp(kernel=kernel, kernel_grad=kernel_grad, - call_no=call_no, - cutoffs=cutoffs, + call_no=call_no, + cutoffs=cutoffs, hyps=hyps) - + # update database & use update_L_alpha to get ky_mat - for n in range(call_no, call_no+1): + for n in range(call_no, call_no + 1): positions = old_otf.gp_position_list[n] species = old_otf.gp_species_list[n] atoms = old_otf.gp_atom_list[n] @@ -211,11 +207,11 @@ def test_update_L_alpha(): # use set_L_alpha to get ky_mat gp_model.set_L_alpha() ky_mat_from_set = np.copy(gp_model.ky_mat) - - assert (np.all(np.absolute(ky_mat_from_update-ky_mat_from_set)) < 1e-6) -def test_representation_method(two_body_gp): + assert (np.all(np.absolute(ky_mat_from_update - ky_mat_from_set)) < 1e-6) + +def test_representation_method(two_body_gp): the_str = str(two_body_gp) assert 'GaussianProcess Object' in the_str assert 'Kernel: three_body' in the_str @@ -226,7 +222,7 @@ def test_representation_method(two_body_gp): assert "Noise Var.: " in the_str -def test_serialization_method(two_body_gp,test_point): +def test_serialization_method(two_body_gp, test_point): """ Serialize and then un-serialize a GP and ensure that no info was lost. Compare one calculation to ensure predictions work correctly. @@ -259,4 +255,4 @@ def test_serialization_method(two_body_gp,test_point): for d in [0, 1, 2]: assert np.all(two_body_gp.predict(x_t=test_point, d=d) == - new_gp.predict(x_t=test_point, d=d)) \ No newline at end of file + new_gp.predict(x_t=test_point, d=d)) diff --git a/tests/test_util.py b/tests/test_util.py index 0b149e4fe..a1f8e3126 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -23,7 +23,7 @@ def test_elt_warning(): def test_Z_to_element(): for i in range(1,118): - assert isinstance(Z_to_element(i),str) + assert isinstance(Z_to_element(i), str) for pair in zip([1, 6, '8', '118'], ['H', 'C', 'O', 'Og']): assert Z_to_element(pair[0]) == pair[1] From 3cc23551e4fd337af98b9b28f1026e264d10abd1 Mon Sep 17 00:00:00 2001 From: Steven T Date: Sun, 22 Sep 2019 19:33:40 -0400 Subject: [PATCH 13/15] Fixed like attribuet being used in different places --- flare/gp_from_aimd.py | 2 +- flare/otf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/flare/gp_from_aimd.py b/flare/gp_from_aimd.py index 747e10faf..48be45157 100644 --- a/flare/gp_from_aimd.py +++ b/flare/gp_from_aimd.py @@ -245,7 +245,7 @@ def train_gp(self): self.output.write_hyps(self.gp.hyp_labels, self.gp.hyps, self.start_time, - self.gp.like, self.gp.like_grad) + self.gp.likelihood, self.gp.likelihood_gradient) self.train_count += 1 def is_std_in_bound(self, frame: Structure)->(bool, List[int]): diff --git a/flare/otf.py b/flare/otf.py index 68723de34..dec6a3094 100644 --- a/flare/otf.py +++ b/flare/otf.py @@ -259,7 +259,7 @@ def train_gp(self): self.gp.train(self.output) self.output.write_hyps(self.gp.hyp_labels, self.gp.hyps, self.start_time, - self.gp.like, self.gp.like_grad) + self.gp.likelihood, self.gp.likelihood_gradient) def is_std_in_bound(self): # set uncertainty threshold From 9a4cc93b75603bb53b47a51078960975da3038e6 Mon Sep 17 00:00:00 2001 From: Steven T Date: Tue, 24 Sep 2019 17:02:13 -0400 Subject: [PATCH 14/15] Added multicomponent kernels --- flare/gp.py | 9 +++++++-- flare/mc_simple.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/flare/gp.py b/flare/gp.py index 0399942a6..89393f362 100644 --- a/flare/gp.py +++ b/flare/gp.py @@ -9,6 +9,7 @@ get_like_from_ky_mat, get_like_grad_from_mats, get_neg_likelihood, \ get_neg_like_grad, get_ky_and_hyp_par from flare.kernels import str_to_kernel +from flare.mc_simple import str_to_mc_kernel class GaussianProcess: @@ -369,8 +370,12 @@ def as_dict(self): @staticmethod def from_dict(dictionary): - force_kernel, grad = str_to_kernel(dictionary['kernel_name'], - include_grad=True) + if 'mc' in dictionary['kernel_name']: + force_kernel, grad = str_to_mc_kernel(dictionary['kernel_name'], + include_grad=True) + else: + force_kernel, grad = str_to_kernel(dictionary['kernel_name'], + include_grad=True) if dictionary['energy_kernel'] is not None: energy_kernel = str_to_kernel(dictionary['energy_kernel']) diff --git a/flare/mc_simple.py b/flare/mc_simple.py index 2be7ead55..49cbc8fb9 100644 --- a/flare/mc_simple.py +++ b/flare/mc_simple.py @@ -822,3 +822,35 @@ def two_body_mc_en_jit(bond_array_1, c1, etypes1, kern += fi * fj * sig2 * exp(-r11 * r11 * ls1) return kern + + +_str_to_kernel = {'two_body_mc': two_body_mc, + 'two_body_en_mc': two_body_mc_en, + 'two_body_mc_force_en': two_body_mc_force_en, + 'three_body_mc': three_body_mc, + 'three_body_mc_en': three_body_mc_en, + 'three_body_mc_force_en': three_body_mc_force_en, + 'two_plus_three_body_mc': two_plus_three_body_mc, + 'two_plus_three_mc_en': two_plus_three_mc_en, + 'two_plus_three_mc_force_en': two_plus_three_mc_force_en + } + + +def str_to_mc_kernel(string: str, include_grad: bool=False): + + if string not in _str_to_kernel.keys(): + raise ValueError("Kernel {} not found in list of available " + "kernels{}:".format(string, _str_to_kernel.keys())) + + if not include_grad: + return _str_to_kernel[string] + else: + if 'two' in string and 'three' in string: + return _str_to_kernel[string], two_plus_three_body_mc_grad + elif 'two' in string and 'three' not in string: + return _str_to_kernel[string], two_body_mc_grad + elif 'two' not in string and 'three' in string: + return _str_to_kernel[string], three_body_mc_grad + else: + raise ValueError("Gradient callable for {} not found".format( + string)) From f32495764a2b9d8e670d16ba4f4f60fe861d79f8 Mon Sep 17 00:00:00 2001 From: Jonathan Vandermause Date: Thu, 26 Sep 2019 10:29:11 -0400 Subject: [PATCH 15/15] some linting --- flare/gp.py | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/flare/gp.py b/flare/gp.py index 89393f362..358f04ff7 100644 --- a/flare/gp.py +++ b/flare/gp.py @@ -1,13 +1,13 @@ +"""Gaussian process model of the Born Oppenheimer potential energy surface.""" import math +from typing import List, Callable import numpy as np from scipy.linalg import solve_triangular from scipy.optimize import minimize -from typing import List, Callable from flare.env import AtomicEnvironment from flare.struc import Structure -from flare.gp_algebra import get_ky_mat, get_ky_and_hyp, \ - get_like_from_ky_mat, get_like_grad_from_mats, get_neg_likelihood, \ - get_neg_like_grad, get_ky_and_hyp_par +from flare.gp_algebra import get_ky_and_hyp, get_like_grad_from_mats, \ + get_neg_likelihood, get_neg_like_grad, get_ky_and_hyp_par from flare.kernels import str_to_kernel from flare.mc_simple import str_to_mc_kernel @@ -112,9 +112,9 @@ def force_list_to_np(forces: list) -> np.ndarray: """ forces_np = [] - for m in range(len(forces)): - for n in range(3): - forces_np.append(forces[m][n]) + for force in forces: + for force_comp in force: + forces_np.append(force_comp) forces_np = np.array(forces_np) @@ -124,12 +124,9 @@ def train(self, output=None, custom_bounds=None, grad_tol: float = 1e-4, x_tol: float = 1e-5, line_steps: int = 20): - """ - Train Gaussian Process model on training data. - Tunes the hyperparameters to maximize the Bayesian likelihood, - then computes L and alpha (related to the covariance matrix of the - training set). - """ + """Train Gaussian Process model on training data. Tunes the \ +hyperparameters to maximize the likelihood, then computes L and alpha \ +(related to the covariance matrix of the training set).""" x_0 = self.hyps @@ -183,6 +180,9 @@ def train(self, output=None, custom_bounds=None, self.likelihood_gradient = -res.jac def predict(self, x_t: AtomicEnvironment, d: int) -> [float, float]: + """Predict force component of an atomic environment and its \ +uncertainty.""" + # Kernel vector allows for evaluation of At. Env. k_v = self.get_kernel_vector(x_t, d) @@ -193,7 +193,7 @@ def predict(self, x_t: AtomicEnvironment, d: int) -> [float, float]: self_kern = self.kernel(x_t, x_t, d, d, self.hyps, self.cutoffs) pred_var = self_kern - \ - np.matmul(np.matmul(k_v, self.ky_mat_inv), k_v) + np.matmul(np.matmul(k_v, self.ky_mat_inv), k_v) return pred_mean, pred_var @@ -212,6 +212,9 @@ def predict_local_energy(self, x_t: AtomicEnvironment) -> float: return pred_mean def predict_local_energy_and_var(self, x_t: AtomicEnvironment): + """Predict the local energy of an atomic environment and its \ +uncertainty.""" + # get kernel vector k_v = self.en_kern_vec(x_t) @@ -252,6 +255,9 @@ def get_kernel_vector(self, x: AtomicEnvironment, return k_v def en_kern_vec(self, x: AtomicEnvironment) -> np.ndarray: + """Compute the vector of energy/force kernels between an atomic \ +environment and the environments in the training set.""" + ds = [1, 2, 3] size = len(self.training_data) * 3 k_v = np.zeros(size, ) @@ -337,6 +343,7 @@ def update_L_alpha(self): self.l_mat_inv = l_mat_inv def __str__(self): + """String representation of the GP model.""" thestr = "GaussianProcess Object\n" thestr += 'Kernel: {}\n'.format(self.kernel_name) @@ -356,6 +363,7 @@ def __str__(self): return thestr def as_dict(self): + """Dictionary representation of the GP model.""" out_dict = dict(vars(self)) @@ -369,10 +377,11 @@ def as_dict(self): @staticmethod def from_dict(dictionary): + """Create GP object from dictionary representation.""" if 'mc' in dictionary['kernel_name']: - force_kernel, grad = str_to_mc_kernel(dictionary['kernel_name'], - include_grad=True) + force_kernel, grad = \ + str_to_mc_kernel(dictionary['kernel_name'], include_grad=True) else: force_kernel, grad = str_to_kernel(dictionary['kernel_name'], include_grad=True) @@ -383,8 +392,8 @@ def from_dict(dictionary): energy_kernel = None if dictionary['energy_force_kernel'] is not None: - energy_force_kernel = str_to_kernel(dictionary[ - 'energy_force_kernel']) + energy_force_kernel = \ + str_to_kernel(dictionary['energy_force_kernel']) else: energy_force_kernel = None