diff --git a/tests/test_acsf.py b/tests/test_acsf.py deleted file mode 100644 index c8904d6b..00000000 --- a/tests/test_acsf.py +++ /dev/null @@ -1,125 +0,0 @@ -""" -This file contains tests for the tensorflow atom centred symmetry function module. It uses the numpy implementation -as a comparison. -""" - -import os - -import numpy as np -import tensorflow as tf - -from qmllib.aglaia import np_symm_funct, symm_funct - - -def test_acsf_1(): - """ - This test compares the atom centred symmetry functions generated with tensorflow and numpy. - The test system consists of 5 configurations of CH4 + CN radical. - :return: None - """ - - test_dir = os.path.dirname(os.path.realpath(__file__)) - - nRs2 = 3 - nRs3 = 3 - nTs = 3 - rcut = 5 - acut = 5 - zeta = 220.127 - eta = 30.8065 - bin_min = 0.0 - - input_data = test_dir + "/data/data_test_acsf.npz" - data = np.load(input_data) - - xyzs = data["arr_0"] - zs = data["arr_1"] - elements = data["arr_2"] - element_pairs = data["arr_3"] - - n_samples = xyzs.shape[0] - n_atoms = zs.shape[1] - - with tf.name_scope("Inputs"): - zs_tf = tf.placeholder(shape=[n_samples, n_atoms], dtype=tf.int32, name="zs") - xyz_tf = tf.placeholder(shape=[n_samples, n_atoms, 3], dtype=tf.float32, name="xyz") - - acsf_tf_t = symm_funct.generate_acsf_tf( - xyz_tf, zs_tf, elements, element_pairs, rcut, acut, nRs2, nRs3, nTs, zeta, eta, bin_min - ) - - sess = tf.Session() - sess.run(tf.global_variables_initializer()) - acsf_tf = sess.run(acsf_tf_t, feed_dict={xyz_tf: xyzs, zs_tf: zs}) - - acsf_np = np_symm_funct.generate_acsf_np( - xyzs, zs, elements, element_pairs, rcut, acut, nRs2, nRs3, nTs, zeta, eta, bin_min - ) - - n_samples = xyzs.shape[0] - n_atoms = xyzs.shape[1] - - for i in range(n_samples): - for j in range(n_atoms): - acsf_np_sort = np.sort(acsf_np[i][j]) - acsf_tf_sort = np.sort(acsf_tf[i][j]) - np.testing.assert_array_almost_equal(acsf_np_sort, acsf_tf_sort, decimal=4) - - -def test_acsf_2(): - """ - This test compares the atom centred symmetry functions generated with tensorflow and numpy. - The test system consists of 10 molecules from the QM7 data set. - :return: None - """ - test_dir = os.path.dirname(os.path.realpath(__file__)) - - nRs2 = 3 - nRs3 = 3 - nTs = 3 - rcut = 5 - acut = 5 - zeta = 220.127 - eta = 30.8065 - bin_min = 0.0 - - input_data = test_dir + "/data/qm7_testdata.npz" - data = np.load(input_data) - - xyzs = data["arr_0"] - zs = data["arr_1"] - elements = data["arr_2"] - element_pairs = data["arr_3"] - - n_samples = xyzs.shape[0] - max_n_atoms = zs.shape[1] - - with tf.name_scope("Inputs"): - zs_tf = tf.placeholder(shape=[n_samples, max_n_atoms], dtype=tf.int32, name="zs") - xyz_tf = tf.placeholder(shape=[n_samples, max_n_atoms, 3], dtype=tf.float32, name="xyz") - - acsf_tf_t = symm_funct.generate_acsf_tf( - xyz_tf, zs_tf, elements, element_pairs, rcut, acut, nRs2, nRs3, nTs, zeta, eta, bin_min - ) - - sess = tf.Session() - sess.run(tf.global_variables_initializer()) - acsf_tf = sess.run(acsf_tf_t, feed_dict={xyz_tf: xyzs, zs_tf: zs}) - - acsf_np = np_symm_funct.generate_acsf_np( - xyzs, zs, elements, element_pairs, rcut, acut, nRs2, nRs3, nTs, zeta, eta, bin_min - ) - - for i in range(n_samples): - for j in range(max_n_atoms): - if zs[i][j] == 0: - continue - else: - acsf_np_sort = np.sort(acsf_np[i][j]) - acsf_tf_sort = np.sort(acsf_tf[i][j]) - np.testing.assert_array_almost_equal(acsf_np_sort, acsf_tf_sort, decimal=4) - - -if __name__ == "__main__": - test_acsf_1() - test_acsf_2() diff --git a/tests/test_acsf_linear_angles.py b/tests/test_acsf_linear_angles.py deleted file mode 100644 index 9098c0c5..00000000 --- a/tests/test_acsf_linear_angles.py +++ /dev/null @@ -1,173 +0,0 @@ -""" -This file contains tests for the atom centred symmetry function module. -""" -from __future__ import print_function - -import os -from copy import deepcopy - -import numpy as np - -np.set_printoptions(linewidth=666, edgeitems=100000000000000000) -from qmllib import Compound -from qmllib.representations import generate_acsf, generate_fchl_acsf - -REP_PARAMS = dict() -REP_PARAMS["elements"] = [1, 6, 7] -# REP_PARAMS["pad"] = -# REP_PARAMS["nRs2"] = 30 -# REP_PARAMS["nRs3"] = 3 - - -def get_fchl_acsf_numgrad(mol, dx=1e-5): - - true_coords = deepcopy(mol.coordinates) - - true_rep = generate_fchl_acsf( - mol.nuclear_charges, mol.coordinates, gradients=False, **REP_PARAMS - ) - - gradient = np.zeros((3, mol.natoms, true_rep.shape[0], true_rep.shape[1])) - - for n, coord in enumerate(true_coords): - for xyz, x in enumerate(coord): - - temp_coords = deepcopy(true_coords) - temp_coords[n, xyz] = x + 2.0 * dx - - (rep, grad) = generate_fchl_acsf( - mol.nuclear_charges, temp_coords, gradients=True, **REP_PARAMS - ) - gradient[xyz, n] -= rep - - temp_coords[n, xyz] = x + dx - (rep, grad) = generate_fchl_acsf( - mol.nuclear_charges, temp_coords, gradients=True, **REP_PARAMS - ) - gradient[xyz, n] += 8.0 * rep - - temp_coords[n, xyz] = x - dx - (rep, grad) = generate_fchl_acsf( - mol.nuclear_charges, temp_coords, gradients=True, **REP_PARAMS - ) - gradient[xyz, n] -= 8.0 * rep - - temp_coords[n, xyz] = x - 2.0 * dx - (rep, grad) = generate_fchl_acsf( - mol.nuclear_charges, temp_coords, gradients=True, **REP_PARAMS - ) - gradient[xyz, n] += rep - - gradient /= 12 * dx - - gradient = np.swapaxes(gradient, 0, 1) - gradient = np.swapaxes(gradient, 2, 0) - gradient = np.swapaxes(gradient, 3, 1) - - return gradient - - -def get_acsf_numgrad(mol, dx=1e-5): - - true_coords = deepcopy(mol.coordinates) - - true_rep = generate_acsf(mol.nuclear_charges, mol.coordinates, gradients=False, **REP_PARAMS) - - gradient = np.zeros((3, mol.natoms, true_rep.shape[0], true_rep.shape[1])) - - for n, coord in enumerate(true_coords): - for xyz, x in enumerate(coord): - - temp_coords = deepcopy(true_coords) - temp_coords[n, xyz] = x + 2.0 * dx - - (rep, grad) = generate_acsf( - mol.nuclear_charges, temp_coords, gradients=True, **REP_PARAMS - ) - gradient[xyz, n] -= rep - - temp_coords[n, xyz] = x + dx - (rep, grad) = generate_acsf( - mol.nuclear_charges, temp_coords, gradients=True, **REP_PARAMS - ) - gradient[xyz, n] += 8.0 * rep - - temp_coords[n, xyz] = x - dx - (rep, grad) = generate_acsf( - mol.nuclear_charges, temp_coords, gradients=True, **REP_PARAMS - ) - gradient[xyz, n] -= 8.0 * rep - - temp_coords[n, xyz] = x - 2.0 * dx - (rep, grad) = generate_acsf( - mol.nuclear_charges, temp_coords, gradients=True, **REP_PARAMS - ) - gradient[xyz, n] += rep - - gradient /= 12 * dx - - gradient = np.swapaxes(gradient, 0, 1) - gradient = np.swapaxes(gradient, 2, 0) - gradient = np.swapaxes(gradient, 3, 1) - - return gradient - - -def test_fchl_acsf(): - - test_dir = os.path.dirname(os.path.realpath(__file__)) - - # mol = Compound(xyz=test_dir+ "/qm7/0101.xyz") - mol = Compound(xyz=test_dir + "/data/hcn.xyz") - - (repa, anal_grad) = generate_fchl_acsf( - mol.nuclear_charges, mol.coordinates, gradients=True, **REP_PARAMS - ) - - # help(generate_fchl_acsf) - print("ANALYTICAL") - print(anal_grad[0]) - - repb = generate_fchl_acsf(mol.nuclear_charges, mol.coordinates, gradients=False, **REP_PARAMS) - - assert np.allclose(repa, repb), "Error in FCHL-ACSF representation implementation" - - num_grad = get_fchl_acsf_numgrad(mol) - - print("NUMERICAL") - print(num_grad[0]) - - assert np.allclose(anal_grad, num_grad), "Error in FCHL-ACSF gradient implementation" - - -def test_acsf(): - - test_dir = os.path.dirname(os.path.realpath(__file__)) - - # mol = Compound(xyz=test_dir+ "/qm7/0101.xyz") - mol = Compound(xyz=test_dir + "/data/hcn.xyz") - - (repa, anal_grad) = generate_acsf( - mol.nuclear_charges, mol.coordinates, gradients=True, **REP_PARAMS - ) - - # help(generate_fchl_acsf) - print("ANALYTICAL") - # print(anal_grad[0]) - - repb = generate_acsf(mol.nuclear_charges, mol.coordinates, gradients=False, **REP_PARAMS) - - assert np.allclose(repa, repb), "Error in FCHL-ACSF representation implementation" - - num_grad = get_acsf_numgrad(mol) - - print("NUMERICAL") - # print(num_grad[0]) - - assert np.allclose(anal_grad, num_grad), "Error in FCHL-ACSF gradient implementation" - - -if __name__ == "__main__": - - test_fchl_acsf() - test_acsf() diff --git a/tests/test_armp.py b/tests/test_armp.py deleted file mode 100644 index 4a37f887..00000000 --- a/tests/test_armp.py +++ /dev/null @@ -1,372 +0,0 @@ -""" -This test checks if all the ways of setting up the estimator ARMP work. -""" - - -import glob -import os -import shutil - -import numpy as np - -from qmllib.aglaia.aglaia import ARMP -from qmllib.utils import InputError - - -def test_set_representation(): - """ - This function tests the function _set_representation. - """ - try: - ARMP(representation_name="slatm", representation_params={"slatm_sigma12": 0.05}) - raise Exception - except InputError: - pass - - try: - ARMP(representation_name="coulomb_matrix") - raise Exception - except InputError: - pass - - try: - ARMP(representation_name="slatm", representation_params={"slatm_alchemy": 0.05}) - raise Exception - except InputError: - pass - - parameters = { - "slatm_sigma1": 0.07, - "slatm_sigma2": 0.04, - "slatm_dgrid1": 0.02, - "slatm_dgrid2": 0.06, - "slatm_rcut": 5.0, - "slatm_rpower": 7, - "slatm_alchemy": True, - } - - estimator = ARMP(representation_name="slatm", representation_params=parameters) - - assert estimator.representation_name == "slatm" - assert estimator.slatm_parameters == parameters - - -def test_set_properties(): - """ - This test checks that the set_properties function sets the correct properties. - :return: - """ - test_dir = os.path.dirname(os.path.realpath(__file__)) - - energies = np.loadtxt(test_dir + "/CN_isobutane/prop_kjmol_training.txt", usecols=[1]) - - estimator = ARMP(representation_name="slatm") - - assert estimator.properties == None - - estimator.set_properties(energies) - - assert np.all(estimator.properties == energies) - - -def test_set_descriptor(): - """ - This test checks that the set_descriptor function works as expected. - :return: - """ - test_dir = os.path.dirname(os.path.realpath(__file__)) - - data_incorrect = np.load(test_dir + "/data/CN_isopent_light_UCM.npz") - data_correct = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") - descriptor_correct = data_correct["arr_0"] - descriptor_incorrect = data_incorrect["arr_0"] - - estimator = ARMP() - - assert estimator.representation == None - - estimator.set_representations(representations=descriptor_correct) - - assert np.all(estimator.representation == descriptor_correct) - - # Pass a descriptor with the wrong shape - try: - estimator.set_representations(representations=descriptor_incorrect) - raise Exception - except InputError: - pass - - -def test_fit_1(): - """ - This function tests the first way of fitting the descriptor: the data is passed by first creating compounds and then - the descriptors are created from the compounds. - """ - test_dir = os.path.dirname(os.path.realpath(__file__)) - - filenames = glob.glob(test_dir + "/CN_isobutane/*.xyz") - energies = np.loadtxt(test_dir + "/CN_isobutane/prop_kjmol_training.txt", usecols=[1]) - filenames.sort() - - estimator = ARMP(representation_name="acsf") - estimator.generate_compounds(filenames[:50]) - estimator.set_properties(energies[:50]) - estimator.generate_representation() - - idx = np.arange(0, 50) - estimator.fit(idx) - - -def test_fit_2(): - """ - This function tests the second way of fitting the descriptor: the data is passed by storing the compounds in the - class. - """ - test_dir = os.path.dirname(os.path.realpath(__file__)) - - data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") - descriptor = data["arr_0"] - classes = data["arr_1"] - energies = data["arr_2"] - - estimator = ARMP() - estimator.set_representations(representations=descriptor) - estimator.set_classes(classes=classes) - estimator.set_properties(energies) - - idx = np.arange(0, 100) - estimator.fit(idx) - - -def test_fit_3(): - """ - This function tests the thrid way of fitting the descriptor: the data is passed directly to the fit function. - """ - test_dir = os.path.dirname(os.path.realpath(__file__)) - - data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") - descriptor = data["arr_0"] - classes = data["arr_1"] - energies = data["arr_2"] - - estimator = ARMP() - estimator.fit(x=descriptor, y=energies, classes=classes) - - -def test_fit_4(): - """ - This function tests the second way of fitting the descriptor: the data is passed by storing the compounds in the - class. - """ - test_dir = os.path.dirname(os.path.realpath(__file__)) - - data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") - descriptor = data["arr_0"] - classes = data["arr_1"] - energies = data["arr_2"] - - estimator = ARMP(tensorboard=True, tensorboard_subdir="./tb_test_4") - estimator.set_representations(representations=descriptor) - estimator.set_classes(classes=classes) - estimator.set_properties(energies) - - idx = np.arange(0, 100) - estimator.fit(idx) - - shutil.rmtree("./tb_test_4") - - -def test_score_3(): - """ - This function tests that all the scoring functions work. - """ - test_dir = os.path.dirname(os.path.realpath(__file__)) - - data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") - descriptor = data["arr_0"] - classes = data["arr_1"] - energies = data["arr_2"] - - estimator_1 = ARMP(scoring_function="mae") - estimator_1.fit(x=descriptor, y=energies, classes=classes) - estimator_1.score(x=descriptor, y=energies, classes=classes) - - estimator_2 = ARMP(scoring_function="r2") - estimator_2.fit(x=descriptor, y=energies, classes=classes) - estimator_2.score(x=descriptor, y=energies, classes=classes) - - estimator_3 = ARMP(scoring_function="rmse") - estimator_3.fit(x=descriptor, y=energies, classes=classes) - estimator_3.score(x=descriptor, y=energies, classes=classes) - - -def test_predict_3(): - test_dir = os.path.dirname(os.path.realpath(__file__)) - - data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") - descriptor = data["arr_0"] - classes = data["arr_1"] - energies = data["arr_2"] - - estimator = ARMP() - estimator.fit(x=descriptor, y=energies, classes=classes) - energies_pred = estimator.predict(x=descriptor, classes=classes) - - assert energies.shape == energies_pred.shape - - -def test_predict_fromxyz(): - """ - This test checks that the predictions from the "predict" and the "predict_from_xyz" functions are the same. - It also checks that if the model is saved, when the model is reloaded the predictions are still the same. - """ - - xyz = np.array( - [ - [[0, 1, 0], [0, 1, 1], [1, 0, 1]], - [[1, 2, 2], [3, 1, 2], [1, 3, 4]], - [[4, 1, 2], [0.5, 5, 6], [-1, 2, 3]], - ] - ) - zs = np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) - - ene_true = np.array([0.5, 0.9, 1.0]) - - acsf_param = { - "nRs2": 5, - "nRs3": 5, - "nTs": 5, - "rcut": 5, - "acut": 5, - "zeta": 220.127, - "eta": 30.8065, - } - estimator = ARMP( - iterations=10, - l1_reg=0.0001, - l2_reg=0.005, - learning_rate=0.0005, - representation_name="acsf", - representation_params=acsf_param, - ) - - estimator.set_properties(ene_true) - estimator.generate_representation(xyz, zs) - - idx = list(range(xyz.shape[0])) - - estimator.fit(idx) - - pred1 = estimator.predict(idx) - pred2 = estimator.predict_from_xyz(xyz, zs) - - assert np.all(np.isclose(pred1, pred2, rtol=1.0e-5)) - - estimator.save_nn(save_dir="temp") - - new_estimator = ARMP( - iterations=10, - l1_reg=0.0001, - l2_reg=0.005, - learning_rate=0.0005, - representation_name="acsf", - representation_params=acsf_param, - ) - - new_estimator.load_nn(save_dir="temp") - - new_estimator.set_properties(ene_true) - new_estimator.generate_representation(xyz, zs) - - pred3 = new_estimator.predict(idx) - pred4 = new_estimator.predict_from_xyz(xyz, zs) - - shutil.rmtree("temp") - - assert np.all(np.isclose(pred3, pred4, rtol=1.0e-5)) - assert np.all(np.isclose(pred1, pred3, rtol=1.0e-5)) - - -def test_retraining(): - xyz = np.array( - [ - [[0, 1, 0], [0, 1, 1], [1, 0, 1]], - [[1, 2, 2], [3, 1, 2], [1, 3, 4]], - [[4, 1, 2], [0.5, 5, 6], [-1, 2, 3]], - ] - ) - zs = np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) - - ene_true = np.array([0.5, 0.9, 1.0]) - - acsf_param = { - "nRs2": 5, - "nRs3": 5, - "nTs": 5, - "rcut": 5, - "acut": 5, - "zeta": 220.127, - "eta": 30.8065, - } - estimator = ARMP( - iterations=10, - l1_reg=0.0001, - l2_reg=0.005, - learning_rate=0.0005, - representation_name="acsf", - representation_params=acsf_param, - ) - - estimator.set_properties(ene_true) - estimator.generate_representation(xyz, zs) - - idx = list(range(xyz.shape[0])) - - estimator.fit(idx) - estimator.save_nn(save_dir="temp") - - pred1 = estimator.predict(idx) - - estimator.loaded_model = True - - estimator.fit(idx) - - pred2 = estimator.predict(idx) - - new_estimator = ARMP( - iterations=10, - l1_reg=0.0001, - l2_reg=0.005, - learning_rate=0.0005, - representation_name="acsf", - representation_params=acsf_param, - ) - new_estimator.set_properties(ene_true) - new_estimator.generate_representation(xyz, zs) - - new_estimator.load_nn("temp") - - pred3 = new_estimator.predict(idx) - - new_estimator.fit(idx) - - pred4 = new_estimator.predict(idx) - - assert np.all(np.isclose(pred1, pred3, rtol=1.0e-5)) - assert np.all(np.isclose(pred2, pred4, rtol=1.0e-5)) - - shutil.rmtree("temp") - - -if __name__ == "__main__": - test_set_representation() - test_set_properties() - test_set_descriptor() - test_fit_1() - test_fit_2() - test_fit_3() - test_fit_4() - test_score_3() - test_predict_3() - test_predict_fromxyz() - test_retraining() diff --git a/tests/test_compound.py b/tests/test_compound.py deleted file mode 100644 index 2938f7b9..00000000 --- a/tests/test_compound.py +++ /dev/null @@ -1,45 +0,0 @@ -from __future__ import print_function - -import os - -from qmllib import Compound - - -def compare_lists(a, b): - for pair in zip(a, b): - if pair[0] != pair[1]: - return False - return True - - -def test_compound(): - - test_dir = os.path.dirname(os.path.realpath(__file__)) - c = Compound(xyz=test_dir + "/data/compound_test.xyz") - - ref_atomtypes = ["C", "Cl", "Br", "H", "H"] - ref_charges = [6, 17, 35, 1, 1] - - assert compare_lists(ref_atomtypes, c.atomtypes), "Failed parsing atomtypes" - assert compare_lists(ref_charges, c.nuclear_charges), "Failed parsing nuclear_charges" - - # Test extended xyz - c2 = Compound(xyz=test_dir + "/data/compound_test.exyz") - - ref_atomtypes = ["C", "Cl", "Br", "H", "H"] - ref_charges = [6, 17, 35, 1, 1] - - assert compare_lists(ref_atomtypes, c2.atomtypes), "Failed parsing atomtypes" - assert compare_lists(ref_charges, c2.nuclear_charges), "Failed parsing nuclear_charges" - - # Test extended xyz from a file pointer rather than a filename - with open(test_dir + "/data/compound_test.exyz") as fp: - c3 = Compound(xyz=fp) - - assert compare_lists(ref_atomtypes, c3.atomtypes), "Failed parsing atomtypes" - assert compare_lists(ref_charges, c3.nuclear_charges), "Failed parsing nuclear_charges" - - -if __name__ == "__main__": - - test_compound() diff --git a/tests/test_mrmp.py b/tests/test_mrmp.py deleted file mode 100644 index 554a6935..00000000 --- a/tests/test_mrmp.py +++ /dev/null @@ -1,290 +0,0 @@ -""" -This test checks if all the ways of setting up the estimator MRMP work. -""" - -import glob -import os -import shutil - -import numpy as np - -from qmllib.aglaia.aglaia import MRMP -from qmllib.utils import InputError - -try: - import tensorflow as tf -except ImportError: - print("Tensorflow not found but is needed for mrmp class.") - raise SystemExit - - -def test_set_representation(): - """ - This function tests the method MRMP._set_representation. - """ - try: - MRMP( - representation_name="unsorted_coulomb_matrix", - representation_params={"slatm_sigma1": 0.05}, - ) - raise Exception - except InputError: - pass - - try: - MRMP(representation_name="coulomb_matrix") - raise Exception - except InputError: - pass - - try: - MRMP(representation_name="slatm", representation_params={"slatm_alchemy": 0.05}) - raise Exception - except InputError: - pass - - parameters = { - "slatm_sigma1": 0.07, - "slatm_sigma2": 0.04, - "slatm_dgrid1": 0.02, - "slatm_dgrid2": 0.06, - "slatm_rcut": 5.0, - "slatm_rpower": 7, - "slatm_alchemy": True, - } - - estimator = MRMP(representation_name="slatm", representation_params=parameters) - - assert estimator.representation_name == "slatm" - assert estimator.slatm_parameters == parameters - - -def test_set_properties(): - """ - This test checks that the MRMP.set_properties method works. - """ - test_dir = os.path.dirname(os.path.realpath(__file__)) - - energies = np.loadtxt(test_dir + "/CN_isobutane/prop_kjmol_training.txt", usecols=[1]) - - estimator = MRMP(representation_name="unsorted_coulomb_matrix") - - assert estimator.properties == None - - estimator.set_properties(energies) - - assert np.all(estimator.properties == energies) - - -def test_set_descriptor(): - """ - This test checks that the set_descriptor function works as expected. - """ - - test_dir = os.path.dirname(os.path.realpath(__file__)) - - data_correct = np.load(test_dir + "/data/CN_isopent_light_UCM.npz") - data_incorrect = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") - descriptor_correct = data_correct["arr_0"] - descriptor_incorrect = data_incorrect["arr_0"] - - estimator = MRMP() - - assert estimator.representation == None - - estimator.set_representations(representations=descriptor_correct) - - assert np.all(estimator.representation == descriptor_correct) - - # Pass a descriptor with the wrong shape - try: - estimator.set_representations(representations=descriptor_incorrect) - raise Exception - except InputError: - pass - - -def test_fit_1(): - """ - This function tests the first way of preparing for fitting the neural network: - Compounds are created from xyz files and the energies are stored in the estimator. - The fit method is called with the indices of the molecules we want to fit. - """ - - test_dir = os.path.dirname(os.path.realpath(__file__)) - - filenames = glob.glob(test_dir + "/CN_isobutane/*.xyz") - energies = np.loadtxt(test_dir + "/CN_isobutane/prop_kjmol_training.txt", usecols=[1]) - filenames.sort() - - available_representations = [ - "sorted_coulomb_matrix", - "unsorted_coulomb_matrix", - "bag_of_bonds", - "slatm", - ] - - for rep in available_representations: - estimator = MRMP(representation_name=rep) - estimator.generate_compounds(filenames[:100]) - estimator.set_properties(energies[:100]) - estimator.generate_representation() - - idx = np.arange(0, 100) - estimator.fit(idx) - - -def test_fit_2(): - """ - This function tests a second way of fitting the descriptor: - The premade descriptors are stored in the estimator together with the energies. - The fit method is called with the indices of the molecules we want to fit. - """ - test_dir = os.path.dirname(os.path.realpath(__file__)) - - data = np.load(test_dir + "/data/CN_isopent_light_UCM.npz") - descriptor = data["arr_0"] - energies = data["arr_1"] - - estimator = MRMP() - estimator.set_representations(representations=descriptor) - estimator.set_properties(energies) - - idx = np.arange(0, 100) - estimator.fit(idx) - - -def test_fit_3(): - """ - This function tests a third way of fitting the descriptor: - The data is passed directly to the fit function. - """ - test_dir = os.path.dirname(os.path.realpath(__file__)) - - data = np.load(test_dir + "/data/CN_isopent_light_UCM.npz") - descriptor = data["arr_0"] - energies = data["arr_1"] - - estimator = MRMP() - estimator.fit(descriptor, energies) - - -def test_fit_4(): - """ - This function tests a third way of fitting the descriptor: - The data is passed directly to the fit function. - """ - test_dir = os.path.dirname(os.path.realpath(__file__)) - - data = np.load(test_dir + "/data/CN_isopent_light_UCM.npz") - descriptor = data["arr_0"] - energies = data["arr_1"] - - estimator = MRMP(tensorboard=True, tensorboard_subdir="./tb_test_4") - estimator.fit(descriptor, energies) - - shutil.rmtree("./tb_test_4") - - -def test_score(): - """ - This function tests that all the scoring functions work. - """ - test_dir = os.path.dirname(os.path.realpath(__file__)) - - data = np.load(test_dir + "/data/CN_isopent_light_UCM.npz") - descriptor = data["arr_0"] - energies = data["arr_1"] - - estimator_1 = MRMP(scoring_function="mae") - estimator_1.fit(descriptor, energies) - estimator_1.score(descriptor, energies) - - estimator_2 = MRMP(scoring_function="r2") - estimator_2.fit(descriptor, energies) - estimator_2.score(descriptor, energies) - - estimator_3 = MRMP(scoring_function="rmse") - estimator_3.fit(descriptor, energies) - estimator_3.score(descriptor, energies) - - -def test_save_local(): - """ - This function tests the saving and the loading of a trained model. - """ - - x = np.linspace(-10.0, 10.0, 2000) - y = x**2 - - x = np.reshape(x, (x.shape[0], 1)) - - estimator = MRMP() - estimator.fit(x=x, y=y) - - score_after_training = estimator.score(x, y) - estimator.save_nn(save_dir="saved_test_model") - - estimator.load_nn(save_dir="saved_test_model") - score_after_loading = estimator.score(x, y) - - assert score_after_loading == score_after_training - - shutil.rmtree("./saved_test_model") - - -def test_load_external(): - """ - This function tests if a model that has been trained on a different computer can be loaded and used on a different - computer. - """ - tf.reset_default_graph() - - test_dir = os.path.dirname(os.path.realpath(__file__)) - - x = np.linspace(-10.0, 10.0, 2000) - y = x**2 - x = np.reshape(x, (x.shape[0], 1)) - - estimator = MRMP() - estimator.load_nn(test_dir + "/saved_model") - - score_after_loading = estimator.score(x, y) - score_on_other_machine = -24.101043 - - assert np.isclose(score_after_loading, score_on_other_machine) - - -# def test_get_params(): -# """ -# This test checks whether the function get_params inherited by BaseEstimator works properly. -# """ - -# slatm_params = {'slatm_sigma1': 0.1, 'slatm_sigma2': 0.2} - -# estimator = MRMP(l1_reg=0.1, l2_reg=0.3, representation_params=slatm_params, representation='slatm') - -# parameters = estimator.get_params() - -# assert parameters["l1_reg"] == 0.1 -# assert parameters["l2_reg"] == 0.3 - -# if not type(parameters["representation_params"]) is dict: -# raise InputError("The descriptor parameters should be a dictionary.") - -# for key, value in slatm_params.items(): -# params_in_estimator = parameters["representation_params"] -# assert value == params_in_estimator[key] - -if __name__ == "__main__": - - test_set_properties() - test_set_descriptor() - test_set_representation() - test_fit_1() - test_fit_2() - test_fit_3() - test_fit_4() - test_score() - test_load_external() - # test_get_params() diff --git a/tests/test_neural_network.py b/tests/test_neural_network.py deleted file mode 100644 index ec173142..00000000 --- a/tests/test_neural_network.py +++ /dev/null @@ -1,386 +0,0 @@ -""" -Tests directly related to the class _NN and it's children. - -""" -import numpy as np -import tensorflow as tf - -# TODO relative imports -from qmllib.aglaia.aglaia import MRMP -from qmllib.utils import InputError - -# ------------ ** All functions to test the inputs to the classes ** --------------- - - -def hidden_layer_sizes(C): - # Exceptions that are supposed to be caught - def catch(s): - try: - C(hidden_layer_sizes=s) - raise Exception - except InputError: - pass - - # This should not raise an exception - C(hidden_layer_sizes=[4, 5]) - C(hidden_layer_sizes=(4, 5)) - C(hidden_layer_sizes=[4.0]) - - # This should be caught - catch([]) - catch([0, 4]) - catch([4.2]) - catch(["x"]) - catch([None]) - catch(None) - catch(4) - catch([0]) - - -def l1_reg(C): - # Exceptions that are supposed to be caught - def catch(s): - try: - C(l1_reg=s) - raise Exception - except InputError: - pass - - # This should not raise an exception - C(l1_reg=0.1) - C(l1_reg=0.0) - - # This should be caught - catch(-0.1) - catch("x") - catch(None) - catch([0]) - - -def l2_reg(C): - # Exceptions that are supposed to be caught - def catch(s): - try: - C(l2_reg=s) - raise Exception - except InputError: - pass - - # This should not raise an exception - C(l2_reg=0.1) - C(l2_reg=0.0) - - # This should be caught - catch(-0.1) - catch("x") - catch(None) - catch([0]) - - -def batch_size(C): - # Exceptions that are supposed to be caught - def catch(s): - try: - C(batch_size=s) - raise Exception - except InputError: - pass - - # This should not raise an exception - C(batch_size=2) - C(batch_size="auto") - - # This should be caught - catch(1) - catch(-2) - catch("x") - catch(4.2) - catch(None) - catch(2.0) - - -def learning_rate(C): - # Exceptions that are supposed to be caught - def catch(s): - try: - C(learning_rate=s) - raise Exception - except InputError: - pass - - # This should not raise an exception - C(learning_rate=0.1) - - # This should be caught - catch(0.0) - catch(-0.1) - catch("x") - catch(None) - - -def iterations(C): - # Exceptions that are supposed to be caught - def catch(s): - try: - C(iterations=s) - raise Exception - except InputError: - pass - - # This should not raise an exception - C(iterations=1) - - # This should be caught - catch(-2) - catch("x") - catch(4.2) - catch(None) - catch(1.0) - - -def tf_dtype(C): - # Exceptions that are supposed to be caught - def catch(s): - try: - C(tf_dtype=s) - raise Exception - except InputError: - pass - - # This should not raise an exception - C(tf_dtype="64") - C(tf_dtype=64) - C(tf_dtype="float64") - C(tf_dtype=tf.float64) - C(tf_dtype="32") - C(tf_dtype=32) - C(tf_dtype="float32") - C(tf_dtype=tf.float32) - C(tf_dtype="16") - C(tf_dtype=16) - C(tf_dtype="float16") - C(tf_dtype=tf.float16) - - # This should be caught - catch(8) - catch("x") - catch(None) - - -def hl1(C): - # Exceptions that are supposed to be caught - def catch(s): - try: - C(hl1=s) - raise Exception - except InputError: - pass - - # This should not raise an exception - C(hl1=1) - - # This should be caught - catch(0) - catch("x") - catch(4.2) - catch(None) - catch(-1) - catch(1.0) - - -def hl2(C): - # Exceptions that are supposed to be caught - def catch(s): - try: - C(hl2=s) - raise Exception - except InputError: - pass - - # This should not raise an exception - C(hl2=1) - C(hl2=0) - - # This should be caught - catch("x") - catch(4.2) - catch(None) - catch(-1) - catch(1.0) - - -def hl3(C): - # Exceptions that are supposed to be caught - def catch(s): - try: - C(hl2=2, hl3=s) - raise Exception - except InputError: - pass - - # This should not raise an exception - C(hl2=2, hl3=1) - C(hl2=2, hl3=0) - - # This should be caught - catch("x") - catch(4.2) - catch(None) - catch(-1) - catch(1.0) - - -def representation(C): - # Exceptions that are supposed to be caught - def catch(s): - try: - C(representation=s) - raise Exception - except InputError: - pass - - # This should not raise an exception - C(representation="unsorted_couLomb_matrix") - C(representation="sorted_couLomb_matrix") - C(representation="bag_of_bOnds") - C(representation="slAtm") - - # This should be caught - catch("none") - catch(4.2) - catch(None) - catch(-1) - - -def scoringfunction(C): - """ - This function checks that the function _set_scoring_function accepts only mae, rmsd and r2 as scoring functions. - """ - - def catch(s): - try: - C(scoring_function=s) - raise Exception - except InputError: - pass - - accepted_inputs = ["mae", "rmse", "r2"] - unaccepted_inputs = [0, "none", True, None] - - # This should not raise an exception - for item in accepted_inputs: - C(scoring_function=item) - - # This should be caught - for item in unaccepted_inputs: - catch(item) - - -def test_input(): - # Additional test that inheritance is ok - - C = MRMP - - hidden_layer_sizes(C) - l1_reg(C) - l2_reg(C) - batch_size(C) - learning_rate(C) - iterations(C) - tf_dtype(C) - scoringfunction(C) - - -# --------------------- ** tests for regularisation terms ** ----------------- - - -def test_l2_loss(): - """ - This tests the evaluation of the l2 regularisation term on the weights of the neural net. - :return: None - """ - - # Some example weights - weights = [tf.constant([2.0, 4.0], dtype=tf.float32)] - - # Creating object with known l2_reg parameter - obj = MRMP(l2_reg=0.1) - expected_result = [2.0] - - # Evaluating l2 term - l2_loss_tf = obj._l2_loss(weights=weights) - sess = tf.Session() - l2_loss = sess.run(l2_loss_tf) - - # Testing - assert np.isclose(l2_loss, expected_result) - - -def test_l1_loss(): - """ - This tests the evaluation of the l1 regularisation term on the weights of the neural net. - :return: None - """ - - # Some example weights - weights = [tf.constant([2.0, 4.0], dtype=tf.float32)] - - # Creating object with known l1_reg parameter - obj = MRMP(l1_reg=0.1) - expected_result = [0.6] - - # Evaluating l1 term - l1_loss_tf = obj._l1_loss(weights=weights) - sess = tf.Session() - l1_loss = sess.run(l1_loss_tf) - - # Testing - assert np.isclose(l1_loss, expected_result) - - -def test_get_batch_size(): - """ - This tests the get_batch_size function - :return: - """ - - example_data = [200, 50, 50] - possible_cases = ["auto", 100, 20] - expected_batch_sizes = [100, 50, 17] - - actual_batch_sizes = [] - for i, case in enumerate(possible_cases): - obj = MRMP(batch_size=case) - obj.n_samples = example_data[i] - actual_batch = obj._get_batch_size() - actual_batch_sizes.append(actual_batch) - - for i in range(len(expected_batch_sizes)): - assert actual_batch_sizes[i] == expected_batch_sizes[i] - - -def test_fit1(): - """This tests that the neural net can overfit a cubic function.""" - - x = np.linspace(-2.0, 2.0, 200) - X = np.reshape(x, (len(x), 1)) - y = x**3 - - estimator = MRMP(hidden_layer_sizes=(5, 5, 5), learning_rate=0.01, iterations=35000) - estimator.fit(X, y) - - x_test = np.linspace(-1.5, 1.5, 15) - X_test = np.reshape(x_test, (len(x_test), 1)) - y_test = x_test**3 - y_pred = estimator.predict(X_test) - - y_pred_row = np.reshape(y_pred, (y_pred.shape[0],)) - np.testing.assert_array_almost_equal(y_test, y_pred_row, decimal=1) - - -if __name__ == "__main__": - test_input() - test_l2_loss() - test_l1_loss() - test_get_batch_size() - test_fit1() diff --git a/tests/test_symm_funct.py b/tests/test_symm_funct.py deleted file mode 100644 index 2f1188ba..00000000 --- a/tests/test_symm_funct.py +++ /dev/null @@ -1,194 +0,0 @@ -""" -This file contains tests for the atom centred symmetry function module. -""" -from __future__ import print_function - -import os - -import numpy as np -import tensorflow as tf - -import qmllib -from qmllib.aglaia import symm_funct -from qmllib.representations import generate_acsf - - -def pad(size, coordinates, nuclear_charges): - - n_samples = len(coordinates) - - padded_coordinates = np.zeros((n_samples, size, 3)) - padded_nuclear_charges = np.zeros((n_samples, size), dtype=int) - - for i in range(n_samples): - natoms = coordinates[i].shape[0] - if natoms > size: - print("natoms larger than padded size") - quit() - padded_coordinates[i, :natoms] = coordinates[i] - padded_nuclear_charges[i, :natoms] = nuclear_charges[i] - - return padded_coordinates, padded_nuclear_charges - - -def test_acsf(): - files = [ - "qm7/0101.xyz", - "qm7/0102.xyz", - "qm7/0103.xyz", - "qm7/0104.xyz", - "qm7/0105.xyz", - "qm7/0106.xyz", - "qm7/0107.xyz", - "qm7/0108.xyz", - "qm7/0109.xyz", - "qm7/0110.xyz", - ] - - path = os.path.dirname(os.path.realpath(__file__)) - - mols = [] - for xyz_file in files: - mol = qmllib.Compound(xyz=path + "/" + xyz_file) - mols.append(mol) - - elements = set() - for mol in mols: - elements = elements.union(mol.nuclear_charges) - - elements = list(elements) - - fort_acsf_gradients(mols, path, elements) - tf_acsf(mols, path, elements) - fort_acsf(mols, path, elements) - - -def fort_acsf(mols, path, elements): - - # Generate atom centered symmetry functions representation - # using the Compound class - for i, mol in enumerate(mols): - mol.generate_acsf(elements=elements, bin_min=0.0) - - X_test = np.concatenate([mol.representation for mol in mols]) - X_ref = np.loadtxt(path + "/data/acsf_representation.txt") - assert np.allclose(X_test, X_ref), "Error in ACSF representation" - - # Generate atom centered symmetry functions representation - # directly from the representations module - rep = [] - for i, mol in enumerate(mols): - rep.append( - generate_acsf( - coordinates=mol.coordinates, - nuclear_charges=mol.nuclear_charges, - elements=elements, - bin_min=0.0, - ) - ) - - X_test = np.concatenate(rep) - X_ref = np.loadtxt(path + "/data/acsf_representation.txt") - assert np.allclose(X_test, X_ref), "Error in ACSF representation" - - -def tf_acsf(mols, path, elements): - radial_cutoff = 5 - angular_cutoff = 5 - n_radial_rs = 3 - n_angular_rs = 3 - n_theta_s = 3 - zeta = 1.0 - eta = 1.0 - bin_min = 0.0 - - element_pairs = [] - for i, ei in enumerate(elements): - for ej in elements[i:]: - element_pairs.append([ej, ei]) - - max_atoms = max(mol.natoms for mol in mols) - xyzs, zs = pad( - max_atoms, [mol.coordinates for mol in mols], [mol.nuclear_charges for mol in mols] - ) - - n_samples = xyzs.shape[0] - max_n_atoms = zs.shape[1] - - with tf.name_scope("Inputs"): - zs_tf = tf.placeholder(shape=[n_samples, max_n_atoms], dtype=tf.int32, name="zs") - xyz_tf = tf.placeholder(shape=[n_samples, max_n_atoms, 3], dtype=tf.float32, name="xyz") - - acsf_tf_t = symm_funct.generate_acsf_tf( - xyz_tf, - zs_tf, - elements, - element_pairs, - radial_cutoff, - angular_cutoff, - n_radial_rs, - n_angular_rs, - n_theta_s, - zeta, - eta, - bin_min, - ) - - sess = tf.Session() - sess.run(tf.global_variables_initializer()) - - X_test = sess.run(acsf_tf_t, feed_dict={xyz_tf: xyzs, zs_tf: zs})[zs > 0] - X_ref = np.loadtxt(path + "/data/acsf_representation.txt") - assert np.allclose(X_test, X_ref, atol=1e-4), "Error in ACSF representation" - - -def fort_acsf_gradients(mols, path, elements): - - # Generate atom centered symmetry functions representation - # and gradients using the Compound class - for i, mol in enumerate(mols): - mol.generate_acsf(elements=elements, gradients=True, bin_min=0.0) - - X_test = np.concatenate([mol.representation for mol in mols]) - X_ref = np.loadtxt(path + "/data/acsf_representation.txt") - assert np.allclose(X_test, X_ref), "Error in ACSF representation" - - Xgrad_test = np.concatenate( - [mol.gradients.reshape(mol.natoms**2, mol.gradients.shape[1] * 3) for mol in mols] - ) - Xgrad_ref = np.loadtxt(path + "/data/acsf_gradients.txt") - assert np.allclose(Xgrad_test, Xgrad_ref), "Error in ACSF gradients" - - # Generate atom centered symmetry functions representation - # and gradients directly from the representations module - rep = [] - grad = [] - for i, mol in enumerate(mols): - r, g = generate_acsf( - coordinates=mol.coordinates, - nuclear_charges=mol.nuclear_charges, - elements=elements, - gradients=True, - bin_min=0.0, - ) - rep.append(r) - grad.append(g) - - # Reshape the gradients to fit the test format - for i, mol in enumerate(mols): - g = grad[i] - natoms = mol.natoms - repsize = g.shape[1] - grad[i] = g.reshape(natoms**2, repsize * 3) - - X_test = np.concatenate(rep) - X_ref = np.loadtxt(path + "/data/acsf_representation.txt") - assert np.allclose(X_test, X_ref), "Error in ACSF representation" - - Xgrad_test = np.concatenate(grad, axis=0) - Xgrad_ref = np.loadtxt(path + "/data/acsf_gradients.txt") - assert np.allclose(Xgrad_test, Xgrad_ref), "Error in ACSF gradients" - - -if __name__ == "__main__": - test_acsf()