diff --git a/.gitignore b/.gitignore index 6893c946..2b7170ce 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ /training_core/results/*/*/scaler*.npz /training_core/results/*/*/*.keras /training_core/results/*/*/*.pickle +/training_core/results/*/*/scaler_*.json /training_core/results/*/*/*.png /training_core/results/*/*/*.weights.h5 /develop/ diff --git a/training_core/hyper/hyper_esol.py b/training_core/hyper/hyper_esol.py index 713596bf..71a2168e 100644 --- a/training_core/hyper/hyper_esol.py +++ b/training_core/hyper/hyper_esol.py @@ -38,10 +38,6 @@ "compile": { "optimizer": {"class_name": "Adam", "config": {"learning_rate": 1e-03}}, "loss": "mean_absolute_error", - "metrics": [{"class_name": "kgcnn>ScaledMeanAbsoluteError", - "config": {"name": "mean_absolute_error"}}, - {"class_name": "kgcnn>ScaledRootMeanSquaredError", - "config": {"name": "root_mean_squared_error"}}] }, "scaler": {"class_name": "StandardLabelScaler", "module_name": "kgcnn.data.transform.scaler.standard", "config": {"with_std": True, "with_mean": True, "copy": True}}, @@ -110,9 +106,13 @@ }, "compile": { "optimizer": {"class_name": "Adam", "config": {"learning_rate": 0.0005}}, - "loss": "mean_absolute_error", - "metrics": [{"class_name": "MeanAbsoluteError", "config": {"dtype": "float64"}}, - {"class_name": "RootMeanSquaredError", "config": {"dtype": "float64"}}] + "loss": {"class_name": "kgcnn>MeanAbsoluteError", "config": {"dtype": "float64"}}, + "metrics": [ + {"class_name": "MeanAbsoluteError", + "config": {"dtype": "float64", "name": "scaled_mean_absolute_error"}}, + {"class_name": "RootMeanSquaredError", + "config": {"dtype": "float64", "name": "scaled_root_mean_squared_error"}} + ] } }, "data": {}, @@ -173,9 +173,13 @@ }, "compile": { "optimizer": {"class_name": "Adam", "config": {"learning_rate": 5e-03}}, - "loss": "mean_absolute_error", - "metrics": [{"class_name": "MeanAbsoluteError", "config": {"dtype": "float64"}}, - {"class_name": "RootMeanSquaredError", "config": {"dtype": "float64"}}] + "loss": {"class_name": "kgcnn>MeanAbsoluteError", "config": {"dtype": "float64"}}, + "metrics": [ + {"class_name": "MeanAbsoluteError", + "config": {"dtype": "float64", "name": "scaled_mean_absolute_error"}}, + {"class_name": "RootMeanSquaredError", + "config": {"dtype": "float64", "name": "scaled_root_mean_squared_error"}} + ] }, "scaler": {"class_name": "StandardLabelScaler", "module_name": "kgcnn.data.transform.scaler.standard", "config": {"with_std": True, "with_mean": True, "copy": True}}, diff --git a/training_core/results/ESOLDataset/GCN/GCN_ESOLDataset_score.yaml b/training_core/results/ESOLDataset/GCN/GCN_ESOLDataset_score.yaml index d6be02f3..5131a480 100644 --- a/training_core/results/ESOLDataset/GCN/GCN_ESOLDataset_score.yaml +++ b/training_core/results/ESOLDataset/GCN/GCN_ESOLDataset_score.yaml @@ -1,11 +1,11 @@ OS: nt_win32 -backend: jax +backend: tensorflow cuda_available: 'False' data_unit: mol/L -date_time: '2023-09-17 23:13:16' -device_id: '[0]' -device_memory: '[None]' -device_name: '[CpuDevice(id=0)]' +date_time: '2023-09-21 15:19:49' +device_id: '[LogicalDevice(name=''/device:CPU:0'', device_type=''CPU'')]' +device_memory: '[]' +device_name: '[{}]' epochs: - 800 - 800 @@ -21,11 +21,11 @@ learning_rate: - 5.1727271056734025e-05 - 5.1727271056734025e-05 loss: -- 0.015133836306631565 -- 0.014123351313173771 -- 0.012902331538498402 -- 0.011374394409358501 -- 0.009070469997823238 +- 0.014700263738632202 +- 0.013396141119301319 +- 0.011277776211500168 +- 0.014349170960485935 +- 0.01143904123455286 max_learning_rate: - 0.0010000000474974513 - 0.0010000000474974513 @@ -33,47 +33,41 @@ max_learning_rate: - 0.0010000000474974513 - 0.0010000000474974513 max_loss: -- 1.51412832736969 -- 1.5016895532608032 -- 1.621337652206421 -- 1.6178152561187744 -- 1.5621287822723389 -max_mean_absolute_error: -- 1.5357917547225952 -- 1.5254226922988892 -- 1.6039842367172241 -- 1.6273863315582275 -- 1.5761184692382812 -max_root_mean_squared_error: -- 1.918033480644226 -- 1.8930559158325195 -- 2.0253753662109375 -- 2.0398333072662354 -- 1.9330493211746216 +- 0.7641133069992065 +- 0.7424471378326416 +- 0.745108962059021 +- 0.7430174350738525 +- 0.7755040526390076 +max_scaled_mean_absolute_error: +- 1.588536024093628 +- 1.561904788017273 +- 1.5957443714141846 +- 1.5703060626983643 +- 1.6362876892089844 +max_scaled_root_mean_squared_error: +- 1.9688690900802612 +- 1.9602442979812622 +- 1.9943281412124634 +- 1.9636659622192383 +- 2.007398843765259 max_val_loss: -- 0.6922479271888733 -- 0.656682014465332 -- 0.9420702457427979 -- 0.9071367979049683 -- 0.566788375377655 -max_val_mean_absolute_error: -- 0.6621466875076294 -- 0.6777573823928833 -- 0.8895420432090759 -- 0.8363233208656311 -- 0.566788375377655 -max_val_root_mean_squared_error: -- 0.9364566802978516 -- 0.8861027359962463 -- 1.095849871635437 -- 1.0854592323303223 -- 0.7563754916191101 -mean_absolute_error: -- 0.015225407667458057 -- 0.01431678980588913 -- 0.012784791179001331 -- 0.01137979980558157 -- 0.009164807386696339 +- 0.37646061182022095 +- 0.3912653625011444 +- 0.3479542136192322 +- 0.36888691782951355 +- 0.3552423119544983 +max_val_scaled_mean_absolute_error: +- 0.7251210808753967 +- 0.8881072998046875 +- 0.7157034873962402 +- 0.7646072506904602 +- 0.745976448059082 +max_val_scaled_root_mean_squared_error: +- 1.0473613739013672 +- 1.0891436338424683 +- 0.9292598366737366 +- 0.9997423887252808 +- 0.9514607191085815 min_learning_rate: - 5.1727271056734025e-05 - 5.1727271056734025e-05 @@ -81,74 +75,80 @@ min_learning_rate: - 5.1727271056734025e-05 - 5.1727271056734025e-05 min_loss: -- 0.015133836306631565 -- 0.014123351313173771 -- 0.012902331538498402 -- 0.011374394409358501 -- 0.009070469997823238 -min_mean_absolute_error: -- 0.015225407667458057 -- 0.01431678980588913 -- 0.012784791179001331 -- 0.01137979980558157 -- 0.009164807386696339 -min_root_mean_squared_error: -- 0.06683880090713501 -- 0.06312859803438187 -- 0.05362653359770775 -- 0.04734046384692192 -- 0.036896370351314545 +- 0.012154624797403812 +- 0.011357437819242477 +- 0.011277776211500168 +- 0.010577977634966373 +- 0.011433130130171776 +min_scaled_mean_absolute_error: +- 0.025809645652770996 +- 0.02442621998488903 +- 0.024177635088562965 +- 0.022636910900473595 +- 0.02436179481446743 +min_scaled_root_mean_squared_error: +- 0.06934428960084915 +- 0.08029545098543167 +- 0.07034015655517578 +- 0.06601940840482712 +- 0.047117479145526886 min_val_loss: -- 0.48386263847351074 -- 0.4261545240879059 -- 0.4654020071029663 -- 0.4218893349170685 -- 0.41208165884017944 -min_val_mean_absolute_error: -- 0.4786438047885895 -- 0.4198172688484192 -- 0.43209898471832275 -- 0.4470202922821045 -- 0.41208165884017944 -min_val_root_mean_squared_error: -- 0.7219273447990417 -- 0.5574941635131836 -- 0.6329845786094666 -- 0.6092503666877747 -- 0.5708439946174622 +- 0.24208006262779236 +- 0.20231576263904572 +- 0.23847146332263947 +- 0.2084009349346161 +- 0.21117988228797913 +min_val_scaled_mean_absolute_error: +- 0.5018375515937805 +- 0.4533664286136627 +- 0.45695292949676514 +- 0.4631824493408203 +- 0.44345852732658386 +min_val_scaled_root_mean_squared_error: +- 0.7365378737449646 +- 0.601824939250946 +- 0.6605672240257263 +- 0.6202259659767151 +- 0.6119416356086731 model_class: make_model model_name: GCN -model_version: 2023.09.30 +model_version: '2023-09-30' multi_target_indices: null number_histories: 5 -root_mean_squared_error: -- 0.0669766366481781 -- 0.06312859803438187 -- 0.05363968387246132 -- 0.04734046384692192 -- 0.036896370351314545 +scaled_mean_absolute_error: +- 0.030948080122470856 +- 0.028739066794514656 +- 0.024177635088562965 +- 0.03059293143451214 +- 0.02436179481446743 +scaled_root_mean_squared_error: +- 0.07311341911554337 +- 0.0803653746843338 +- 0.0703764334321022 +- 0.0688292384147644 +- 0.04794161394238472 seed: 42 time_list: -- '0:08:57.131873' -- '0:08:51.857452' -- '0:08:53.591406' -- '0:08:58.488845' -- '0:08:53.481242' +- '0:04:41.873803' +- '0:05:37.398287' +- '0:09:14.968248' +- '0:13:34.497785' +- '0:17:04.563790' val_loss: -- 0.5129508972167969 -- 0.46785056591033936 -- 0.5138030052185059 -- 0.47180789709091187 -- 0.4505428373813629 -val_mean_absolute_error: -- 0.5046505331993103 -- 0.4409920573234558 -- 0.4513247311115265 -- 0.46418580412864685 -- 0.4505428373813629 -val_root_mean_squared_error: -- 0.732132613658905 -- 0.5994848012924194 -- 0.6798126697540283 -- 0.6416217684745789 -- 0.6304311752319336 +- 0.2544833719730377 +- 0.20889292657375336 +- 0.27367258071899414 +- 0.2199447751045227 +- 0.2312503159046173 +val_scaled_mean_absolute_error: +- 0.5288447737693787 +- 0.4802064001560211 +- 0.5416200160980225 +- 0.48129501938819885 +- 0.48560455441474915 +val_scaled_root_mean_squared_error: +- 0.7877314686775208 +- 0.6519739627838135 +- 0.7435009479522705 +- 0.6704958081245422 +- 0.6541447639465332 diff --git a/training_core/results/ESOLDataset/GCN/GCN_hyper.json b/training_core/results/ESOLDataset/GCN/GCN_hyper.json index d10920df..20cc9732 100644 --- a/training_core/results/ESOLDataset/GCN/GCN_hyper.json +++ b/training_core/results/ESOLDataset/GCN/GCN_hyper.json @@ -1 +1 @@ -{"model": {"class_name": "make_model", "module_name": "kgcnn.literature_core.GCN", "config": {"name": "GCN", "inputs": [{"shape": [null, 41], "name": "node_attributes", "dtype": "float32"}, {"shape": [null, 1], "name": "edge_weights", "dtype": "float32"}, {"shape": [null, 2], "name": "edge_indices", "dtype": "int64"}, {"shape": [], "name": "total_nodes", "dtype": "int64"}, {"shape": [], "name": "total_edges", "dtype": "int64"}], "cast_disjoint_kwargs": {"padded_disjoint": true}, "input_node_embedding": {"input_dim": 95, "output_dim": 64}, "input_edge_embedding": {"input_dim": 25, "output_dim": 1}, "gcn_args": {"units": 140, "use_bias": true, "activation": "relu"}, "depth": 5, "verbose": 10, "output_embedding": "graph", "output_mlp": {"use_bias": [true, true, false], "units": [140, 70, 1], "activation": ["relu", "relu", "linear"]}, "output_scaling": {"name": "StandardLabelScaler"}}}, "training": {"fit": {"batch_size": 32, "epochs": 800, "validation_freq": 10, "verbose": 2, "callbacks": [{"class_name": "kgcnn>LinearLearningRateScheduler", "config": {"learning_rate_start": 0.001, "learning_rate_stop": 5e-05, "epo_min": 250, "epo": 800, "verbose": 0}}]}, "compile": {"optimizer": {"class_name": "Adam", "config": {"learning_rate": 0.001}}, "loss": "mean_absolute_error", "metrics": [{"class_name": "MeanAbsoluteError", "config": {"dtype": "float64"}}, {"class_name": "RootMeanSquaredError", "config": {"dtype": "float64"}}]}, "scaler": {"class_name": "StandardLabelScaler", "module_name": "kgcnn.data.transform.scaler.standard", "config": {"with_std": true, "with_mean": true, "copy": true}}}, "dataset": {"class_name": "ESOLDataset", "module_name": "kgcnn.data.datasets.ESOLDataset", "config": {}, "methods": [{"set_attributes": {}}, {"set_train_test_indices_k_fold": {"n_splits": 5, "random_state": 42, "shuffle": true}}, {"map_list": {"method": "normalize_edge_weights_sym"}}, {"map_list": {"method": "count_nodes_and_edges"}}]}, "data": {"data_unit": "mol/L"}, "info": {"postfix": "", "postfix_file": "", "kgcnn_version": "4.0.0"}} \ No newline at end of file +{"model": {"class_name": "make_model", "module_name": "kgcnn.literature_core.GCN", "config": {"name": "GCN", "inputs": [{"shape": [null, 41], "name": "node_attributes", "dtype": "float32"}, {"shape": [null, 1], "name": "edge_weights", "dtype": "float32"}, {"shape": [null, 2], "name": "edge_indices", "dtype": "int64"}, {"shape": [], "name": "total_nodes", "dtype": "int64"}, {"shape": [], "name": "total_edges", "dtype": "int64"}], "cast_disjoint_kwargs": {"padded_disjoint": false}, "input_node_embedding": {"input_dim": 95, "output_dim": 64}, "input_edge_embedding": {"input_dim": 25, "output_dim": 1}, "gcn_args": {"units": 140, "use_bias": true, "activation": "relu"}, "depth": 5, "verbose": 10, "output_embedding": "graph", "output_mlp": {"use_bias": [true, true, false], "units": [140, 70, 1], "activation": ["relu", "relu", "linear"]}}}, "training": {"fit": {"batch_size": 32, "epochs": 800, "validation_freq": 10, "verbose": 2, "callbacks": [{"class_name": "kgcnn>LinearLearningRateScheduler", "config": {"learning_rate_start": 0.001, "learning_rate_stop": 5e-05, "epo_min": 250, "epo": 800, "verbose": 0}}]}, "compile": {"optimizer": {"class_name": "Adam", "config": {"learning_rate": 0.001}}, "loss": "mean_absolute_error"}, "scaler": {"class_name": "StandardLabelScaler", "module_name": "kgcnn.data.transform.scaler.standard", "config": {"with_std": true, "with_mean": true, "copy": true}}}, "dataset": {"class_name": "ESOLDataset", "module_name": "kgcnn.data.datasets.ESOLDataset", "config": {}, "methods": [{"set_attributes": {}}, {"set_train_test_indices_k_fold": {"n_splits": 5, "random_state": 42, "shuffle": true}}, {"map_list": {"method": "normalize_edge_weights_sym"}}, {"map_list": {"method": "count_nodes_and_edges"}}]}, "data": {"data_unit": "mol/L"}, "info": {"postfix": "", "postfix_file": "", "kgcnn_version": "4.0.0"}} \ No newline at end of file diff --git a/training_core/train_force.py b/training_core/train_force.py index e69de29b..11eaf02a 100644 --- a/training_core/train_force.py +++ b/training_core/train_force.py @@ -0,0 +1,209 @@ +import numpy as np +import time +import os +import argparse +import keras_core as ks +from datetime import timedelta +import kgcnn.training_core.schedule +import kgcnn.training_core.scheduler +from kgcnn.data.transform.scaler.serial import deserialize as deserialize_scaler +from kgcnn.utils_core.devices import check_device +from kgcnn.training_core.history import save_history_score +from kgcnn.utils.plots import plot_train_test_loss, plot_predict_true +from kgcnn.models_core.serial import deserialize as deserialize_model +from kgcnn.data.serial import deserialize as deserialize_dataset +from kgcnn.training_core.hyper import HyperParameter +from kgcnn.metrics_core.metrics import ScaledMeanAbsoluteError +from kgcnn.data.transform.scaler.force import EnergyForceExtensiveLabelScaler + +# Input arguments from command line. +parser = argparse.ArgumentParser(description='Train a GNN on an Energy-Force Dataset.') +parser.add_argument("--hyper", required=False, help="Filepath to hyper-parameter config file (.py or .json).", + default="hyper/hyper_md17_revised.py") +parser.add_argument("--category", required=False, help="Graph model to train.", default="Schnet.EnergyForceModel") +parser.add_argument("--model", required=False, help="Graph model to train.", default=None) +parser.add_argument("--dataset", required=False, help="Name of the dataset.", default=None) +parser.add_argument("--make", required=False, help="Name of the class for model.", default=None) +parser.add_argument("--module", required=False, help="Name of the module for model.", default=None) +parser.add_argument("--gpu", required=False, help="GPU index used for training.", default=None, nargs="+", type=int) +parser.add_argument("--fold", required=False, help="Split or fold indices to run.", default=None, nargs="+", type=int) +parser.add_argument("--seed", required=False, help="Set random seed.", default=42, type=int) +args = vars(parser.parse_args()) +print("Input of argparse:", args) + +# Check for gpu +check_device() + +# Set seed. +np.random.seed(args["seed"]) +ks.utils.set_random_seed(args["seed"]) + +# HyperParameter is used to store and verify hyperparameter. +hyper = HyperParameter( + hyper_info=args["hyper"], hyper_category=args["category"], + model_name=args["model"], model_class=args["make"], dataset_class=args["dataset"], model_module=args["module"]) +hyper.verify() + +# Loading a specific per-defined dataset from a module in kgcnn.data.datasets. +# However, the construction must be fully defined in the data section of the hyperparameter, +# including all methods to run on the dataset. Information required in hyperparameter are for example 'file_path', +# 'data_directory' etc. +# Making a custom training script rather than configuring the dataset via hyperparameter can be +# more convenient. +dataset = deserialize_dataset(hyper["dataset"]) + +# Check if dataset has the required properties for model input. This includes a quick shape comparison. +# The name of the keras `Input` layer of the model is directly connected to property of the dataset. +# Example 'edge_indices' or 'node_attributes'. This couples the keras model to the dataset. +dataset.assert_valid_model_input(hyper["model"]["config"]["inputs"]) + +# Filter the dataset for invalid graphs. At the moment invalid graphs are graphs which do not have the property set, +# which is required by the model's input layers, or if a tensor-like property has zero length. +dataset.clean(hyper["model"]["config"]["inputs"]) +data_length = len(dataset) # Length of the cleaned dataset. + +# Always train on `energy` . +# Just making sure that the target is of shape `(N, #labels)`. This means output embedding is on graph level. +label_names, label_units = dataset.set_multi_target_labels( + "energy", + hyper["training"]["multi_target_indices"] if "multi_target_indices" in hyper["training"] else None, + data_unit=hyper["data"]["data_unit"] if "data_unit" in hyper["data"] else None +) + +# Make output directory +filepath = hyper.results_file_path() +postfix_file = hyper["info"]["postfix_file"] + +# Training on splits. Since training on Force datasets can be expensive, there is a 'execute_splits' parameter to not +# train on all splits for testing. Can be set via command line or hyperparameter. +execute_folds = args["fold"] if "execute_folds" not in hyper["training"] else hyper["training"]["execute_folds"] +splits_done = 0 +history_list, test_indices_list = [], [] +train_indices_all, test_indices_all = [], [] +model, hist, x_test, scaler = None, None, None, None +for current_split, (train_index, test_index) in enumerate(dataset.get_train_test_indices(train="train", test="test")): + + # Keep list of train/test indices. + test_indices_all.append(test_index) + train_indices_all.append(train_index) + + # Only do execute_splits out of the k-folds of cross-validation. + if execute_folds: + if current_split not in execute_folds: + continue + print("Running training on split: '%s'." % current_split) + + # Make the model for current split using model kwargs from hyperparameter. + model = deserialize_model(hyper["model"]) + + # First select training and test graphs from indices, then convert them into tensorflow tensor + # representation. Which property of the dataset and whether the tensor will be ragged is retrieved from the + dataset_train, dataset_test = dataset[train_index], dataset[test_index] + + # Normalize training and test targets. + # For Force datasets this training script uses the `EnergyForceExtensiveLabelScaler` class. + # Note that `EnergyForceExtensiveLabelScaler` uses both energy and forces for scaling. + # Adapt output-scale via a transform. + # Scaler is applied to target if 'scaler' appears in hyperparameter. Only use for regression. + scaled_metrics = None + if "scaler" in hyper["training"]: + print("Using Scaler to adjust output scale of model.") + scaler = deserialize_scaler(hyper["training"]["scaler"]) + scaler.fit_dataset(dataset_train) + if hasattr(model, "set_scale"): + print("Setting scale at model.") + model.set_scale(scaler) + else: + print("Transforming dataset.") + dataset_train = scaler.transform_dataset(dataset_train, copy_dataset=True, copy=True) + dataset_test = scaler.transform_dataset(dataset_test, copy_dataset=True, copy=True) + # If scaler was used we add rescaled standard metrics to compile, since otherwise the keras history will not + # directly log the original target values, but the scaled ones. + scaler_scale = scaler.get_scaling() + mae_metric_energy = ScaledMeanAbsoluteError((1, 1), name="scaled_mean_absolute_error") + mae_metric_force = ScaledMeanAbsoluteError((1, 1), name="scaled_mean_absolute_error") + if scaler_scale is not None: + mae_metric_energy.set_scale(scaler_scale) + mae_metric_force.set_scale(scaler_scale) + scaled_metrics = {"energy": [mae_metric_energy], "force": [mae_metric_force]} + + # Save scaler to file + scaler.save(os.path.join(filepath, f"scaler{postfix_file}_fold_{current_split}")) + + # Convert dataset to tensor information for model. + x_train = dataset_train.tensor(model["config"]["inputs"]) + x_test = dataset_test.tensor(model["config"]["inputs"]) + + # Compile model with optimizer and loss + model.compile(**hyper.compile( + loss={"energy": "mean_absolute_error", "force": "mean_absolute_error"}, + metrics=scaled_metrics)) + + model.predict(x_test) + print(model.summary()) + + # Convert targets into tensors. + labels_in_dataset = { + "energy": {"name": "energy"}, + "force": {"name": "force", "shape": (None, 3)} + } + y_train = dataset_train.tensor(labels_in_dataset) + y_test = dataset_test.tensor(labels_in_dataset) + + # Start and time training + start = time.time() + hist = model.fit( + x_train, y_train, + validation_data=(x_test, y_test), + **hyper.fit() + ) + stop = time.time() + print("Print Time for training: ", str(timedelta(seconds=stop - start))) + + # Get loss from history + history_list.append(hist) + test_indices_list.append([train_index, test_index]) + splits_done = splits_done + 1 + + # Plot prediction + predicted_y = model.predict(x_test, verbose=0) + true_y = y_test + + plot_predict_true(np.array(predicted_y[0]), np.array(true_y["energy"]), + filepath=filepath, data_unit=label_units, + model_name=hyper.model_name, dataset_name=hyper.dataset_class, target_names=label_names, + file_name=f"predict_energy{postfix_file}_fold_{splits_done}.png") + + plot_predict_true(np.concatenate([np.array(f) for f in predicted_y[1]], axis=0), + np.concatenate([np.array(f) for f in true_y["force"]], axis=0), + filepath=filepath, data_unit=label_units, + model_name=hyper.model_name, dataset_name=hyper.dataset_class, target_names=label_names, + file_name=f"predict_force{postfix_file}_fold_{splits_done}.png") + + # Save keras-model to output-folder. + model.save(os.path.join(filepath, f"model{postfix_file}_fold_{splits_done}")) + +# Save original data indices of the splits. +np.savez(os.path.join(filepath, f"{hyper.model_name}_test_indices_{postfix_file}.npz"), *test_indices_all) +np.savez(os.path.join(filepath, f"{hyper.model_name}_train_indices_{postfix_file}.npz"), *train_indices_all) + +# Plot training- and test-loss vs epochs for all splits. +data_unit = hyper["data"]["data_unit"] if "data_unit" in hyper["data"] else "" +plot_train_test_loss(history_list, loss_name=None, val_loss_name=None, + model_name=hyper.model_name, data_unit=data_unit, dataset_name=hyper.dataset_class, + filepath=filepath, file_name=f"loss{postfix_file}.png") + +# Save hyperparameter again, which were used for this fit. +hyper.save(os.path.join(filepath, f"{hyper.model_name}_hyper{postfix_file}.json")) + +# Save score of fit result for as text file. +save_history_score( + history_list, loss_name=None, val_loss_name=None, + model_name=hyper.model_name, data_unit=data_unit, dataset_name=hyper.dataset_class, + model_class=hyper.model_class, + multi_target_indices=hyper["training"]["multi_target_indices"] if "multi_target_indices" in hyper[ + "training"] else None, + execute_folds=execute_folds, seed=args["seed"], + filepath=filepath, file_name=f"score{postfix_file}.yaml", + trajectory_name=(dataset.trajectory_name if hasattr(dataset, "trajectory_name") else None) +) diff --git a/training_core/train_graph.py b/training_core/train_graph.py index 63d40985..f14a76f7 100644 --- a/training_core/train_graph.py +++ b/training_core/train_graph.py @@ -8,7 +8,7 @@ import kgcnn.losses_core.losses import kgcnn.metrics_core.metrics from kgcnn.training_core.history import save_history_score, load_history_list, load_time_list -from kgcnn.data.transform.scaler.serial import deserialize +from kgcnn.data.transform.scaler.serial import deserialize as deserialize_scaler from kgcnn.utils_core.plots import plot_train_test_loss, plot_predict_true from kgcnn.model.serial import deserialize as deserialize_model from kgcnn.data.serial import deserialize as deserialize_dataset @@ -94,36 +94,44 @@ # Make the model for current split using model kwargs from hyperparameter. model = deserialize_model(hyper["model"]) - # Compile model with optimizer and loss from hyperparameter. - # The metrics from this script is added to the hyperparameter entry for metrics. - model.compile(**hyper.compile()) - model.summary() - print(" Compiled with jit: %s" % model._jit_compile) # noqa - # Adapt output-scale via a transform. # Scaler is applied to target if 'scaler' appears in hyperparameter. Only use for regression. + scaled_metrics = None if "scaler" in hyper["training"]: print("Using Scaler to adjust output scale of model.") - scaler = deserialize(hyper["training"]["scaler"]) + scaler = deserialize_scaler(hyper["training"]["scaler"]) scaler.fit_dataset(dataset_train) if hasattr(model, "set_scale"): + print("Setting scale at model.") model.set_scale(scaler) else: - scaler.transform(dataset_train, copy_dataset=True, copy=True) - scaler.transform(dataset_test, copy_dataset=True, copy=True) + print("Transforming dataset.") + dataset_train = scaler.transform_dataset(dataset_train, copy_dataset=True, copy=True) + dataset_test = scaler.transform_dataset(dataset_test, copy_dataset=True, copy=True) # If scaler was used we add rescaled standard metrics to compile, since otherwise the keras history will not # directly log the original target values, but the scaled ones. scaler_scale = scaler.get_scaling() - for metric in model.metrics: - print(metric) - # Don't use scaled metrics if model has scale already. - if scaler_scale is not None: - if hasattr(metric, "set_scale"): - metric.set_scale(scaler_scale) + mae_metric = kgcnn.metrics_core.metrics.ScaledMeanAbsoluteError( + scaler_scale.shape, name="scaled_mean_absolute_error") + rms_metric = kgcnn.metrics_core.metrics.ScaledRootMeanSquaredError( + scaler_scale.shape, name="scaled_root_mean_squared_error") + if scaler_scale is not None: + mae_metric.set_scale(scaler_scale) + rms_metric.set_scale(scaler_scale) + scaled_metrics = [mae_metric, rms_metric] # Save scaler to file scaler.save(os.path.join(filepath, f"scaler{postfix_file}_fold_{current_split}")) + # Compile model with optimizer and loss from hyperparameter. + # The metrics from this script is added to the hyperparameter entry for metrics. + model.compile(**hyper.compile(metrics=scaled_metrics)) + + # Model summary + model.summary() + print(" Compiled with jit: %s" % model._jit_compile) # noqa + + # Pick train/test data. x_train = dataset_train.tensor(hyper["model"]["config"]["inputs"]) y_train = np.array(dataset_train.get("graph_labels")) x_test = dataset_test.tensor(hyper["model"]["config"]["inputs"]) @@ -162,9 +170,8 @@ # Save last keras-model to output-folder. model.save_weights(os.path.join(filepath, f"model{postfix_file}_fold_{current_split}.weights.h5")) - # Plot training- and test-loss vs epochs for all splits. -history_list = load_history_list(os.path.join(filepath, f"history{postfix_file}_fold_(i).pickle"), current_split+1) +history_list = load_history_list(os.path.join(filepath, f"history{postfix_file}_fold_(i).pickle"), current_split + 1) plot_train_test_loss(history_list, loss_name=None, val_loss_name=None, model_name=hyper.model_name, data_unit=label_units, dataset_name=hyper.dataset_class, filepath=filepath, file_name=f"loss{postfix_file}.png") @@ -178,10 +185,14 @@ hyper.save(os.path.join(filepath, f"{hyper.model_name}_hyper{postfix_file}.json")) # Save score of fit result for as text file. -time_list = load_time_list(os.path.join(filepath, f"time{postfix_file}_fold_(i).pickle"), current_split+1) -save_history_score(history_list, loss_name=None, val_loss_name=None, - model_name=hyper.model_name, data_unit=label_units, dataset_name=hyper.dataset_class, - model_class=hyper.model_class, - model_version=model.__kgcnn_model_version__ if hasattr(model, "__kgcnn_model_version__") else "", - filepath=filepath, file_name=f"score{postfix_file}.yaml", time_list=time_list, - seed=args["seed"]) +time_list = load_time_list(os.path.join(filepath, f"time{postfix_file}_fold_(i).pickle"), current_split + 1) +save_history_score( + history_list, loss_name=None, val_loss_name=None, + model_name=hyper.model_name, data_unit=label_units, dataset_name=hyper.dataset_class, + model_class=hyper.model_class, + multi_target_indices=hyper["training"]["multi_target_indices"] if "multi_target_indices" in hyper[ + "training"] else None, + model_version=model.__kgcnn_model_version__ if hasattr(model, "__kgcnn_model_version__") else "", + filepath=filepath, file_name=f"score{postfix_file}.yaml", time_list=time_list, + seed=args["seed"] +)