From fdf0c8b10a00404b51bdd62bf62231c0dbf4e50f Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 16 Nov 2023 23:40:21 +0100 Subject: [PATCH 1/5] chore(encoder): remove unused code --- TTS/encoder/utils/generic_utils.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/TTS/encoder/utils/generic_utils.py b/TTS/encoder/utils/generic_utils.py index 1da029611b..bbce6a8abb 100644 --- a/TTS/encoder/utils/generic_utils.py +++ b/TTS/encoder/utils/generic_utils.py @@ -2,7 +2,6 @@ import glob import os import random -import re import numpy as np from scipy import signal @@ -118,11 +117,6 @@ def apply_one(self, audio): return self.additive_noise(noise_type, audio) -def to_camel(text): - text = text.capitalize() - return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), text) - - def setup_encoder_model(config: "Coqpit"): if config.model_params["model_name"].lower() == "lstm": model = LSTMSpeakerEncoder( From 39fe38bda4d6937336255d32e542d4f84dd0fe15 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 16 Nov 2023 23:46:26 +0100 Subject: [PATCH 2/5] refactor: use save_fsspec() from Trainer --- TTS/encoder/utils/generic_utils.py | 2 +- TTS/encoder/utils/io.py | 2 +- TTS/utils/io.py | 13 +------------ 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/TTS/encoder/utils/generic_utils.py b/TTS/encoder/utils/generic_utils.py index bbce6a8abb..2b003ac8b6 100644 --- a/TTS/encoder/utils/generic_utils.py +++ b/TTS/encoder/utils/generic_utils.py @@ -5,10 +5,10 @@ import numpy as np from scipy import signal +from trainer.io import save_fsspec from TTS.encoder.models.lstm import LSTMSpeakerEncoder from TTS.encoder.models.resnet import ResNetSpeakerEncoder -from TTS.utils.io import save_fsspec class AugmentWAV(object): diff --git a/TTS/encoder/utils/io.py b/TTS/encoder/utils/io.py index d1dad3e24d..a8359be11a 100644 --- a/TTS/encoder/utils/io.py +++ b/TTS/encoder/utils/io.py @@ -1,7 +1,7 @@ import datetime import os -from TTS.utils.io import save_fsspec +from trainer.io import save_fsspec def save_checkpoint(model, optimizer, model_loss, out_path, current_step): diff --git a/TTS/utils/io.py b/TTS/utils/io.py index e9bdf3e686..9ab1075c6d 100644 --- a/TTS/utils/io.py +++ b/TTS/utils/io.py @@ -8,6 +8,7 @@ import fsspec import torch from coqpit import Coqpit +from trainer.io import save_fsspec from TTS.utils.generic_utils import get_user_data_dir @@ -102,18 +103,6 @@ def load_checkpoint( return model, state -def save_fsspec(state: Any, path: str, **kwargs): - """Like torch.save but can save to other locations (e.g. s3:// , gs://). - - Args: - state: State object to save - path: Any path or url supported by fsspec. - **kwargs: Keyword arguments forwarded to torch.save. - """ - with fsspec.open(path, "wb") as f: - torch.save(state, f, **kwargs) - - def save_model(config, model, optimizer, scaler, current_step, epoch, output_path, **kwargs): if hasattr(model, "module"): model_state = model.module.state_dict() From 5119e651a1dbccdc4e5fdb47dc386d33f378e621 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 16 Nov 2023 23:52:28 +0100 Subject: [PATCH 3/5] chore(utils.io): remove unused code These are all available in Trainer. --- TTS/utils/io.py | 104 ------------------------------------------------ 1 file changed, 104 deletions(-) diff --git a/TTS/utils/io.py b/TTS/utils/io.py index 9ab1075c6d..7aaedbe2ab 100644 --- a/TTS/utils/io.py +++ b/TTS/utils/io.py @@ -1,4 +1,3 @@ -import datetime import json import os import pickle as pickle_tts @@ -8,7 +7,6 @@ import fsspec import torch from coqpit import Coqpit -from trainer.io import save_fsspec from TTS.utils.generic_utils import get_user_data_dir @@ -101,105 +99,3 @@ def load_checkpoint( if eval: model.eval() return model, state - - -def save_model(config, model, optimizer, scaler, current_step, epoch, output_path, **kwargs): - if hasattr(model, "module"): - model_state = model.module.state_dict() - else: - model_state = model.state_dict() - if isinstance(optimizer, list): - optimizer_state = [optim.state_dict() for optim in optimizer] - elif optimizer.__class__.__name__ == "CapacitronOptimizer": - optimizer_state = [optimizer.primary_optimizer.state_dict(), optimizer.secondary_optimizer.state_dict()] - else: - optimizer_state = optimizer.state_dict() if optimizer is not None else None - - if isinstance(scaler, list): - scaler_state = [s.state_dict() for s in scaler] - else: - scaler_state = scaler.state_dict() if scaler is not None else None - - if isinstance(config, Coqpit): - config = config.to_dict() - - state = { - "config": config, - "model": model_state, - "optimizer": optimizer_state, - "scaler": scaler_state, - "step": current_step, - "epoch": epoch, - "date": datetime.date.today().strftime("%B %d, %Y"), - } - state.update(kwargs) - save_fsspec(state, output_path) - - -def save_checkpoint( - config, - model, - optimizer, - scaler, - current_step, - epoch, - output_folder, - **kwargs, -): - file_name = "checkpoint_{}.pth".format(current_step) - checkpoint_path = os.path.join(output_folder, file_name) - print("\n > CHECKPOINT : {}".format(checkpoint_path)) - save_model( - config, - model, - optimizer, - scaler, - current_step, - epoch, - checkpoint_path, - **kwargs, - ) - - -def save_best_model( - current_loss, - best_loss, - config, - model, - optimizer, - scaler, - current_step, - epoch, - out_path, - keep_all_best=False, - keep_after=10000, - **kwargs, -): - if current_loss < best_loss: - best_model_name = f"best_model_{current_step}.pth" - checkpoint_path = os.path.join(out_path, best_model_name) - print(" > BEST MODEL : {}".format(checkpoint_path)) - save_model( - config, - model, - optimizer, - scaler, - current_step, - epoch, - checkpoint_path, - model_loss=current_loss, - **kwargs, - ) - fs = fsspec.get_mapper(out_path).fs - # only delete previous if current is saved successfully - if not keep_all_best or (current_step < keep_after): - model_names = fs.glob(os.path.join(out_path, "best_model*.pth")) - for model_name in model_names: - if os.path.basename(model_name) != best_model_name: - fs.rm(model_name) - # create a shortcut which always points to the currently best model - shortcut_name = "best_model.pth" - shortcut_path = os.path.join(out_path, shortcut_name) - fs.copy(checkpoint_path, shortcut_path) - best_loss = current_loss - return best_loss From 96678c7ba227871d0929f2366d083219ccfa9262 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 17 Nov 2023 00:12:09 +0100 Subject: [PATCH 4/5] refactor: use copy_model_files() from Trainer --- TTS/bin/train_encoder.py | 4 ++-- TTS/encoder/utils/training.py | 2 +- TTS/utils/io.py | 31 ------------------------------- 3 files changed, 3 insertions(+), 34 deletions(-) diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py index f2e7779c0c..c4fb920faf 100644 --- a/TTS/bin/train_encoder.py +++ b/TTS/bin/train_encoder.py @@ -8,6 +8,7 @@ import torch from torch.utils.data import DataLoader +from trainer.io import copy_model_files from trainer.torch import NoamLR from trainer.trainer_utils import get_optimizer @@ -18,7 +19,6 @@ from TTS.tts.datasets import load_tts_samples from TTS.utils.audio import AudioProcessor from TTS.utils.generic_utils import count_parameters, remove_experiment_folder -from TTS.utils.io import copy_model_files from TTS.utils.samplers import PerfectBatchSampler from TTS.utils.training import check_update @@ -276,7 +276,7 @@ def main(args): # pylint: disable=redefined-outer-name if c.loss == "softmaxproto" and c.model != "speaker_encoder": c.map_classid_to_classname = map_classid_to_classname - copy_model_files(c, OUT_PATH) + copy_model_files(c, OUT_PATH, new_fields={}) if args.restore_path: criterion, args.restore_step = model.load_checkpoint( diff --git a/TTS/encoder/utils/training.py b/TTS/encoder/utils/training.py index 7c58a232e7..ff8f271d80 100644 --- a/TTS/encoder/utils/training.py +++ b/TTS/encoder/utils/training.py @@ -3,13 +3,13 @@ from coqpit import Coqpit from trainer import TrainerArgs, get_last_checkpoint +from trainer.io import copy_model_files from trainer.logging import logger_factory from trainer.logging.console_logger import ConsoleLogger from TTS.config import load_config, register_config from TTS.tts.utils.text.characters import parse_symbols from TTS.utils.generic_utils import get_experiment_folder_path, get_git_branch -from TTS.utils.io import copy_model_files @dataclass diff --git a/TTS/utils/io.py b/TTS/utils/io.py index 7aaedbe2ab..3107ba661b 100644 --- a/TTS/utils/io.py +++ b/TTS/utils/io.py @@ -1,12 +1,9 @@ -import json import os import pickle as pickle_tts -import shutil from typing import Any, Callable, Dict, Union import fsspec import torch -from coqpit import Coqpit from TTS.utils.generic_utils import get_user_data_dir @@ -27,34 +24,6 @@ def __init__(self, *args, **kwargs): self.__dict__ = self -def copy_model_files(config: Coqpit, out_path, new_fields=None): - """Copy config.json and other model files to training folder and add - new fields. - - Args: - config (Coqpit): Coqpit config defining the training run. - out_path (str): output path to copy the file. - new_fields (dict): new fileds to be added or edited - in the config file. - """ - copy_config_path = os.path.join(out_path, "config.json") - # add extra information fields - if new_fields: - config.update(new_fields, allow_new=True) - # TODO: Revert to config.save_json() once Coqpit supports arbitrary paths. - with fsspec.open(copy_config_path, "w", encoding="utf8") as f: - json.dump(config.to_dict(), f, indent=4) - - # copy model stats file if available - if config.audio.stats_path is not None: - copy_stats_path = os.path.join(out_path, "scale_stats.npy") - filesystem = fsspec.get_mapper(copy_stats_path).fs - if not filesystem.exists(copy_stats_path): - with fsspec.open(config.audio.stats_path, "rb") as source_file: - with fsspec.open(copy_stats_path, "wb") as target_file: - shutil.copyfileobj(source_file, target_file) - - def load_fsspec( path: str, map_location: Union[str, Callable, torch.device, Dict[Union[str, torch.device], Union[str, torch.device]]] = None, From 0fb0d67de7bd05ef4afd80f05e242217e9800c80 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 17 Nov 2023 00:39:11 +0100 Subject: [PATCH 5/5] refactor: use save_checkpoint()/save_best_model() from Trainer --- TTS/bin/train_encoder.py | 21 +++++++++--- TTS/encoder/utils/generic_utils.py | 40 ----------------------- TTS/encoder/utils/io.py | 38 --------------------- tests/aux_tests/test_embedding_manager.py | 4 +-- tests/aux_tests/test_speaker_manager.py | 4 +-- tests/inference_tests/test_synthesizer.py | 3 +- 6 files changed, 23 insertions(+), 87 deletions(-) delete mode 100644 TTS/encoder/utils/io.py diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py index c4fb920faf..448fefc712 100644 --- a/TTS/bin/train_encoder.py +++ b/TTS/bin/train_encoder.py @@ -8,12 +8,12 @@ import torch from torch.utils.data import DataLoader -from trainer.io import copy_model_files +from trainer.io import copy_model_files, save_best_model, save_checkpoint from trainer.torch import NoamLR from trainer.trainer_utils import get_optimizer from TTS.encoder.dataset import EncoderDataset -from TTS.encoder.utils.generic_utils import save_best_model, save_checkpoint, setup_encoder_model +from TTS.encoder.utils.generic_utils import setup_encoder_model from TTS.encoder.utils.training import init_training from TTS.encoder.utils.visual import plot_embeddings from TTS.tts.datasets import load_tts_samples @@ -222,7 +222,9 @@ def train(model, optimizer, scheduler, criterion, data_loader, eval_data_loader, if global_step % c.save_step == 0: # save model - save_checkpoint(model, optimizer, criterion, loss.item(), OUT_PATH, global_step, epoch) + save_checkpoint( + c, model, optimizer, None, global_step, epoch, OUT_PATH, criterion=criterion.state_dict() + ) end_time = time.time() @@ -245,7 +247,18 @@ def train(model, optimizer, scheduler, criterion, data_loader, eval_data_loader, flush=True, ) # save the best checkpoint - best_loss = save_best_model(model, optimizer, criterion, eval_loss, best_loss, OUT_PATH, global_step, epoch) + best_loss = save_best_model( + eval_loss, + best_loss, + c, + model, + optimizer, + None, + global_step, + epoch, + OUT_PATH, + criterion=criterion.state_dict(), + ) model.train() return best_loss, global_step diff --git a/TTS/encoder/utils/generic_utils.py b/TTS/encoder/utils/generic_utils.py index 2b003ac8b6..236d6fe937 100644 --- a/TTS/encoder/utils/generic_utils.py +++ b/TTS/encoder/utils/generic_utils.py @@ -1,11 +1,9 @@ -import datetime import glob import os import random import numpy as np from scipy import signal -from trainer.io import save_fsspec from TTS.encoder.models.lstm import LSTMSpeakerEncoder from TTS.encoder.models.resnet import ResNetSpeakerEncoder @@ -136,41 +134,3 @@ def setup_encoder_model(config: "Coqpit"): audio_config=config.audio, ) return model - - -def save_checkpoint(model, optimizer, criterion, model_loss, out_path, current_step, epoch): - checkpoint_path = "checkpoint_{}.pth".format(current_step) - checkpoint_path = os.path.join(out_path, checkpoint_path) - print(" | | > Checkpoint saving : {}".format(checkpoint_path)) - - new_state_dict = model.state_dict() - state = { - "model": new_state_dict, - "optimizer": optimizer.state_dict() if optimizer is not None else None, - "criterion": criterion.state_dict(), - "step": current_step, - "epoch": epoch, - "loss": model_loss, - "date": datetime.date.today().strftime("%B %d, %Y"), - } - save_fsspec(state, checkpoint_path) - - -def save_best_model(model, optimizer, criterion, model_loss, best_loss, out_path, current_step, epoch): - if model_loss < best_loss: - new_state_dict = model.state_dict() - state = { - "model": new_state_dict, - "optimizer": optimizer.state_dict(), - "criterion": criterion.state_dict(), - "step": current_step, - "epoch": epoch, - "loss": model_loss, - "date": datetime.date.today().strftime("%B %d, %Y"), - } - best_loss = model_loss - bestmodel_path = "best_model.pth" - bestmodel_path = os.path.join(out_path, bestmodel_path) - print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path)) - save_fsspec(state, bestmodel_path) - return best_loss diff --git a/TTS/encoder/utils/io.py b/TTS/encoder/utils/io.py deleted file mode 100644 index a8359be11a..0000000000 --- a/TTS/encoder/utils/io.py +++ /dev/null @@ -1,38 +0,0 @@ -import datetime -import os - -from trainer.io import save_fsspec - - -def save_checkpoint(model, optimizer, model_loss, out_path, current_step): - checkpoint_path = "checkpoint_{}.pth".format(current_step) - checkpoint_path = os.path.join(out_path, checkpoint_path) - print(" | | > Checkpoint saving : {}".format(checkpoint_path)) - - new_state_dict = model.state_dict() - state = { - "model": new_state_dict, - "optimizer": optimizer.state_dict() if optimizer is not None else None, - "step": current_step, - "loss": model_loss, - "date": datetime.date.today().strftime("%B %d, %Y"), - } - save_fsspec(state, checkpoint_path) - - -def save_best_model(model, optimizer, model_loss, best_loss, out_path, current_step): - if model_loss < best_loss: - new_state_dict = model.state_dict() - state = { - "model": new_state_dict, - "optimizer": optimizer.state_dict(), - "step": current_step, - "loss": model_loss, - "date": datetime.date.today().strftime("%B %d, %Y"), - } - best_loss = model_loss - bestmodel_path = "best_model.pth" - bestmodel_path = os.path.join(out_path, bestmodel_path) - print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path)) - save_fsspec(state, bestmodel_path) - return best_loss diff --git a/tests/aux_tests/test_embedding_manager.py b/tests/aux_tests/test_embedding_manager.py index 7392150163..e3acd62bee 100644 --- a/tests/aux_tests/test_embedding_manager.py +++ b/tests/aux_tests/test_embedding_manager.py @@ -3,11 +3,11 @@ import numpy as np import torch +from trainer.io import save_checkpoint from tests import get_tests_input_path from TTS.config import load_config from TTS.encoder.utils.generic_utils import setup_encoder_model -from TTS.encoder.utils.io import save_checkpoint from TTS.tts.utils.managers import EmbeddingManager from TTS.utils.audio import AudioProcessor @@ -31,7 +31,7 @@ def test_speaker_embedding(): # create a dummy speaker encoder model = setup_encoder_model(config) - save_checkpoint(model, None, None, get_tests_input_path(), 0) + save_checkpoint(config, model, None, None, 0, 0, get_tests_input_path()) # load audio processor and speaker encoder manager = EmbeddingManager(encoder_model_path=encoder_model_path, encoder_config_path=encoder_config_path) diff --git a/tests/aux_tests/test_speaker_manager.py b/tests/aux_tests/test_speaker_manager.py index 397f9c81f6..402fbca459 100644 --- a/tests/aux_tests/test_speaker_manager.py +++ b/tests/aux_tests/test_speaker_manager.py @@ -3,11 +3,11 @@ import numpy as np import torch +from trainer.io import save_checkpoint from tests import get_tests_input_path from TTS.config import load_config from TTS.encoder.utils.generic_utils import setup_encoder_model -from TTS.encoder.utils.io import save_checkpoint from TTS.tts.utils.speakers import SpeakerManager from TTS.utils.audio import AudioProcessor @@ -30,7 +30,7 @@ def test_speaker_embedding(): # create a dummy speaker encoder model = setup_encoder_model(config) - save_checkpoint(model, None, None, get_tests_input_path(), 0) + save_checkpoint(config, model, None, None, 0, 0, get_tests_input_path()) # load audio processor and speaker encoder ap = AudioProcessor(**config.audio) diff --git a/tests/inference_tests/test_synthesizer.py b/tests/inference_tests/test_synthesizer.py index 40e830178c..ce4fc751c2 100644 --- a/tests/inference_tests/test_synthesizer.py +++ b/tests/inference_tests/test_synthesizer.py @@ -1,10 +1,11 @@ import os import unittest +from trainer.io import save_checkpoint + from tests import get_tests_input_path from TTS.config import load_config from TTS.tts.models import setup_model -from TTS.utils.io import save_checkpoint from TTS.utils.synthesizer import Synthesizer