From d7cf04a160670bcdb70677d7ebfb8e735775dadd Mon Sep 17 00:00:00 2001 From: Florian Huber Date: Wed, 6 Mar 2024 13:39:02 +0100 Subject: [PATCH 1/6] add tensorboard option --- ms2deepscore/models/SiameseSpectralModel.py | 52 ++++++++++++++++----- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/ms2deepscore/models/SiameseSpectralModel.py b/ms2deepscore/models/SiameseSpectralModel.py index 6b89e442..5b1433d6 100644 --- a/ms2deepscore/models/SiameseSpectralModel.py +++ b/ms2deepscore/models/SiameseSpectralModel.py @@ -1,7 +1,9 @@ +import os import numpy as np import torch import torch.nn.functional as F from torch import nn, optim +from torch.utils.tensorboard import SummaryWriter from tqdm import tqdm from ms2deepscore.__version__ import __version__ from ms2deepscore.models.helper_functions import (initialize_device, @@ -147,6 +149,23 @@ def forward(self, spectra_tensors, metadata_tensors): return x +def initialize_training(model, learning_rate, use_tensorboard): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model.to(device) + optimizer = optim.Adam(model.parameters(), lr=learning_rate) + + if use_tensorboard: + # TensorBoard writer + log_dir ="runs" + if not os.path.exists(log_dir): + os.makedirs(log_dir) + + writer = SummaryWriter(log_dir) + else: + writer = None + return device, optimizer, writer + + def train(model: SiameseSpectralModel, data_generator, num_epochs: int, @@ -157,11 +176,12 @@ def train(model: SiameseSpectralModel, checkpoint_filename: str = None, loss_function="MSE", weighting_factor=0, - monitor_rmse: bool = True, collect_all_targets: bool = False, lambda_l1: float = 0, lambda_l2: float = 0, - progress_bar: bool = True): + progress_bar: bool = True, + use_tensorboard: bool = True, + ): """Train a model with given parameters. Parameters @@ -186,8 +206,6 @@ def train(model: SiameseSpectralModel, Pass a loss function (e.g. a pytorch default or a custom function). weighting_factor Default is set to 0, set to value between 0 and 1 to shift attention to higher target scores. - monitor_rmse - If True rmse will be monitored turing training. collect_all_targets If True, all training targets will be collected (e.g. for later statistics). lambda_l1 @@ -196,15 +214,12 @@ def train(model: SiameseSpectralModel, L2 regularization strength. """ # pylint: disable=too-many-arguments, too-many-locals - device = initialize_device() - model.to(device) + device, optimizer, writer = initialize_training(model, learning_rate, use_tensorboard) if loss_function.lower() not in LOSS_FUNCTIONS: raise ValueError(f"Unknown loss function. Must be one of: {LOSS_FUNCTIONS.keys()}") criterion = LOSS_FUNCTIONS[loss_function.lower()] - optimizer = optim.Adam(model.parameters(), lr=learning_rate) - history = { "losses": [], "val_losses": [], @@ -237,8 +252,8 @@ def train(model: SiameseSpectralModel, loss += l1_regularization(model, lambda_l1) + l2_regularization(model, lambda_l2) batch_losses.append(float(loss)) - if monitor_rmse: - batch_rmse.append(rmse_loss(outputs, targets.to(device)).cpu().detach().numpy()) + #batch_rmse.append(rmse_loss(outputs, targets.to(device)).cpu().detach().numpy()) + batch_rmse.append(rmse_loss(outputs, targets).cpu().detach().numpy()) # Backward pass and optimize loss.backward() @@ -249,6 +264,13 @@ def train(model: SiameseSpectralModel, loss=float(loss), rmse=np.mean(batch_rmse), ) + # Monitor + avg_loss = np.mean(batch_losses) + avg_rmse = np.mean(batch_rmse) + if use_tensorboard: + writer.add_scalar('LOSS/train', avg_loss, epoch) + writer.add_scalar('RMSE/train', avg_rmse, epoch) + history["losses"].append(np.mean(batch_losses)) history["rmse"].append(np.mean(batch_rmse)) @@ -256,8 +278,13 @@ def train(model: SiameseSpectralModel, val_losses = validation_loss_calculator.compute_binned_validation_loss(model, loss_types=(loss_function, "rmse")) val_loss = val_losses[loss_function] + + # Monitor history["val_losses"].append(val_loss) history["val_rmse"].append(val_losses["rmse"]) + if use_tensorboard: + writer.add_scalar('LOSS/val', avg_loss, epoch) + writer.add_scalar('RMSE/val', avg_rmse, epoch) if val_loss < min_val_loss: if checkpoint_filename: print("Saving checkpoint model.") @@ -271,9 +298,12 @@ def train(model: SiameseSpectralModel, break # Print statistics - print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {np.mean(batch_losses):.4f}") + print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}") if validation_loss_calculator is not None: print(f"Validation Loss: {val_loss:.4f} (RMSE: {val_losses['rmse']:.4f}).") + + if use_tensorboard: + writer.close() return history From eb985a350fb999932cca15db441868c5481c1fa2 Mon Sep 17 00:00:00 2001 From: Florian Huber Date: Wed, 6 Mar 2024 13:39:24 +0100 Subject: [PATCH 2/6] updates to run with/without tensorboard --- .gitignore | 1 + ms2deepscore/SettingsMS2Deepscore.py | 1 + ms2deepscore/train_new_model/train_ms2deepscore.py | 4 +++- tests/test_training_wrapper_function.py | 3 ++- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 7ee56018..d4d453f0 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ build dist .cache __pycache__ +runs htmlcov .coverage diff --git a/ms2deepscore/SettingsMS2Deepscore.py b/ms2deepscore/SettingsMS2Deepscore.py index f7bfd1b6..3ac2f108 100644 --- a/ms2deepscore/SettingsMS2Deepscore.py +++ b/ms2deepscore/SettingsMS2Deepscore.py @@ -104,6 +104,7 @@ def __init__(self, **settings): self.patience = 30 self.loss_function = "mse" self.weighting_factor = 0 + self.use_tensorboard = True # Folder names for storing self.model_file_name = "ms2deepscore_model.pt" diff --git a/ms2deepscore/train_new_model/train_ms2deepscore.py b/ms2deepscore/train_new_model/train_ms2deepscore.py index 507243b2..023dc34e 100644 --- a/ms2deepscore/train_new_model/train_ms2deepscore.py +++ b/ms2deepscore/train_new_model/train_ms2deepscore.py @@ -50,7 +50,9 @@ def train_ms2ds_model( validation_loss_calculator=validation_loss_calculator, patience=settings.patience, loss_function=settings.loss_function, - checkpoint_filename=output_model_file_name, lambda_l1=0, lambda_l2=0) + checkpoint_filename=output_model_file_name, lambda_l1=0, lambda_l2=0, + use_tensorboard=settings.use_tensorboard, + ) # Save plot of history plot_history(history["losses"], history["val_losses"], ms2ds_history_plot_file_name) diff --git a/tests/test_training_wrapper_function.py b/tests/test_training_wrapper_function.py index 9690f910..05bd39f1 100644 --- a/tests/test_training_wrapper_function.py +++ b/tests/test_training_wrapper_function.py @@ -23,7 +23,8 @@ def test_train_wrapper_ms2ds_model(tmp_path): "same_prob_bins": np.array([(0, 0.2), (0.2, 1.0)]), "average_pairs_per_bin": 2, "batch_size": 2, # to speed up tests --> usually larger - "random_seed": 42 + "random_seed": 42, + "use_tensorboard": False }) model_directory_name = train_ms2deepscore_wrapper(spectra_file_name, settings, validation_split_fraction=5) From 679c46c847b2b2d3200ca5b2e747e37854e7451e Mon Sep 17 00:00:00 2001 From: Florian Huber Date: Wed, 6 Mar 2024 13:53:04 +0100 Subject: [PATCH 3/6] add tensorboard --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 8593fb67..96ef467d 100644 --- a/setup.py +++ b/setup.py @@ -33,6 +33,7 @@ "numpy>=1.20.3", "pandas", "scikit-learn", + "tensorboard", "torch", "tqdm", "matplotlib==3.7.2" From e8a86658fe53a912fb6c754b87244bb276aa7b03 Mon Sep 17 00:00:00 2001 From: Florian Huber Date: Wed, 6 Mar 2024 14:06:15 +0100 Subject: [PATCH 4/6] linting --- ms2deepscore/models/SiameseSpectralModel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ms2deepscore/models/SiameseSpectralModel.py b/ms2deepscore/models/SiameseSpectralModel.py index 5b1433d6..ffca82fb 100644 --- a/ms2deepscore/models/SiameseSpectralModel.py +++ b/ms2deepscore/models/SiameseSpectralModel.py @@ -150,7 +150,7 @@ def forward(self, spectra_tensors, metadata_tensors): def initialize_training(model, learning_rate, use_tensorboard): - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + device = initialize_device() model.to(device) optimizer = optim.Adam(model.parameters(), lr=learning_rate) From dfcc973a4ed992aa366bf1ce45718cc34e5216ad Mon Sep 17 00:00:00 2001 From: Florian Huber Date: Wed, 13 Mar 2024 18:02:08 +0100 Subject: [PATCH 5/6] make log_dir parameter + edits --- ms2deepscore/SettingsMS2Deepscore.py | 1 + ms2deepscore/models/SiameseSpectralModel.py | 9 ++++++--- ms2deepscore/train_new_model/train_ms2deepscore.py | 1 + 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/ms2deepscore/SettingsMS2Deepscore.py b/ms2deepscore/SettingsMS2Deepscore.py index 3ac2f108..37dd83d3 100644 --- a/ms2deepscore/SettingsMS2Deepscore.py +++ b/ms2deepscore/SettingsMS2Deepscore.py @@ -105,6 +105,7 @@ def __init__(self, **settings): self.loss_function = "mse" self.weighting_factor = 0 self.use_tensorboard = True + self.log_dir = "runs" # Folder names for storing self.model_file_name = "ms2deepscore_model.pt" diff --git a/ms2deepscore/models/SiameseSpectralModel.py b/ms2deepscore/models/SiameseSpectralModel.py index ffca82fb..ba799d89 100644 --- a/ms2deepscore/models/SiameseSpectralModel.py +++ b/ms2deepscore/models/SiameseSpectralModel.py @@ -149,14 +149,15 @@ def forward(self, spectra_tensors, metadata_tensors): return x -def initialize_training(model, learning_rate, use_tensorboard): +def initialize_training(model, learning_rate, use_tensorboard, log_dir="runs"): + """Initializes device (cpu or gpu) as well as the optimizer and Tensorboard writer. + """ device = initialize_device() model.to(device) optimizer = optim.Adam(model.parameters(), lr=learning_rate) if use_tensorboard: # TensorBoard writer - log_dir ="runs" if not os.path.exists(log_dir): os.makedirs(log_dir) @@ -181,6 +182,7 @@ def train(model: SiameseSpectralModel, lambda_l2: float = 0, progress_bar: bool = True, use_tensorboard: bool = True, + log_dir: str = "runs", ): """Train a model with given parameters. @@ -252,7 +254,6 @@ def train(model: SiameseSpectralModel, loss += l1_regularization(model, lambda_l1) + l2_regularization(model, lambda_l2) batch_losses.append(float(loss)) - #batch_rmse.append(rmse_loss(outputs, targets.to(device)).cpu().detach().numpy()) batch_rmse.append(rmse_loss(outputs, targets).cpu().detach().numpy()) # Backward pass and optimize @@ -270,6 +271,7 @@ def train(model: SiameseSpectralModel, if use_tensorboard: writer.add_scalar('LOSS/train', avg_loss, epoch) writer.add_scalar('RMSE/train', avg_rmse, epoch) + writer.flush() history["losses"].append(np.mean(batch_losses)) history["rmse"].append(np.mean(batch_rmse)) @@ -285,6 +287,7 @@ def train(model: SiameseSpectralModel, if use_tensorboard: writer.add_scalar('LOSS/val', avg_loss, epoch) writer.add_scalar('RMSE/val', avg_rmse, epoch) + writer.flush() if val_loss < min_val_loss: if checkpoint_filename: print("Saving checkpoint model.") diff --git a/ms2deepscore/train_new_model/train_ms2deepscore.py b/ms2deepscore/train_new_model/train_ms2deepscore.py index 023dc34e..d2583e39 100644 --- a/ms2deepscore/train_new_model/train_ms2deepscore.py +++ b/ms2deepscore/train_new_model/train_ms2deepscore.py @@ -52,6 +52,7 @@ def train_ms2ds_model( loss_function=settings.loss_function, checkpoint_filename=output_model_file_name, lambda_l1=0, lambda_l2=0, use_tensorboard=settings.use_tensorboard, + log_dir=settings.log_dir, ) # Save plot of history plot_history(history["losses"], history["val_losses"], ms2ds_history_plot_file_name) From bffa1301d348b26b4ef31695f6baefbe25ce05d0 Mon Sep 17 00:00:00 2001 From: Florian Huber Date: Wed, 13 Mar 2024 18:29:45 +0100 Subject: [PATCH 6/6] fix parameter passing --- ms2deepscore/models/SiameseSpectralModel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ms2deepscore/models/SiameseSpectralModel.py b/ms2deepscore/models/SiameseSpectralModel.py index ba799d89..02413930 100644 --- a/ms2deepscore/models/SiameseSpectralModel.py +++ b/ms2deepscore/models/SiameseSpectralModel.py @@ -216,7 +216,7 @@ def train(model: SiameseSpectralModel, L2 regularization strength. """ # pylint: disable=too-many-arguments, too-many-locals - device, optimizer, writer = initialize_training(model, learning_rate, use_tensorboard) + device, optimizer, writer = initialize_training(model, learning_rate, use_tensorboard, log_dir=log_dir) if loss_function.lower() not in LOSS_FUNCTIONS: raise ValueError(f"Unknown loss function. Must be one of: {LOSS_FUNCTIONS.keys()}")