From 132ac71696010ed46a51d1e5b8fc72dd2e3c0254 Mon Sep 17 00:00:00 2001 From: Guilherme <86894155+ShiromiyaG@users.noreply.github.com> Date: Tue, 27 Aug 2024 08:43:14 -0300 Subject: [PATCH 1/6] Add more information to the model --- assets/i18n/languages/en_US.json | 5 +++- core.py | 10 +++++++ rvc/train/process/extract_model.py | 19 +++++++++++++- rvc/train/process/model_information.py | 8 ++++++ rvc/train/train.py | 36 ++++++++++++++++++++++++++ tabs/extra/extra.py | 5 ++-- tabs/extra/processing/processing.py | 2 +- tabs/train/train.py | 9 +++++++ 8 files changed, 88 insertions(+), 6 deletions(-) diff --git a/assets/i18n/languages/en_US.json b/assets/i18n/languages/en_US.json index 8b3bb321..2545b488 100644 --- a/assets/i18n/languages/en_US.json +++ b/assets/i18n/languages/en_US.json @@ -301,5 +301,8 @@ "Folder Name": "Folder Name", "Upload .bin": "Upload .bin", "Upload .json": "Upload .json", - "Move files to custom embedder folder": "Move files to custom embedder folder" + "Move files to custom embedder folder": "Move files to custom embedder folder", + "model information": "model information", + "Model Creator": "Model Creator", + "Name of the model creator.": "Name of the model creator." } diff --git a/core.py b/core.py index bca001c5..af33f44b 100644 --- a/core.py +++ b/core.py @@ -443,6 +443,7 @@ def run_train_script( custom_pretrained: bool = False, g_pretrained_path: str = None, d_pretrained_path: str = None, + model_creator: str = None, ): if pretrained == True: @@ -484,6 +485,7 @@ def run_train_script( overtraining_detector, overtraining_threshold, sync_graph, + model_creator, ], ), ] @@ -526,6 +528,7 @@ def run_model_extract_script( # Model information def run_model_information_script(pth_path: str): print(model_information(pth_path)) + return model_information(pth_path) # Model blender @@ -1351,6 +1354,12 @@ def parse_arguments(): help="Enable graph synchronization for distributed training.", default=False, ) + train_parser.add_argument( + "--model_creator", + type=str, + help="Model creator name.", + default=None, + ) train_parser.add_argument( "--cache_data_in_gpu", type=lambda x: bool(strtobool(x)), @@ -1655,6 +1664,7 @@ def main(): pretrained=args.pretrained, custom_pretrained=args.custom_pretrained, sync_graph=args.sync_graph, + model_creator=args.model_creator, index_algorithm=args.index_algorithm, cache_data_in_gpu=args.cache_data_in_gpu, g_pretrained_path=args.g_pretrained_path, diff --git a/rvc/train/process/extract_model.py b/rvc/train/process/extract_model.py index b0940e34..23536d8a 100644 --- a/rvc/train/process/extract_model.py +++ b/rvc/train/process/extract_model.py @@ -18,7 +18,20 @@ def replace_keys_in_dict(d, old_key_part, new_key_part): return updated_dict -def extract_model(ckpt, sr, pitch_guidance, name, model_dir, epoch, step, version, hps): +def extract_model( + ckpt, + sr, + pitch_guidance, + name, + model_dir, + epoch, + step, + version, + hps, + model_creator, + overtrain_info, + dataset_lenght, +): try: print(f"Saved model '{model_dir}' (epoch {epoch} and step {step})") @@ -70,6 +83,10 @@ def extract_model(ckpt, sr, pitch_guidance, name, model_dir, epoch, step, versio hash_input = f"{str(ckpt)} {epoch} {step} {datetime.datetime.now().isoformat()}" model_hash = hashlib.sha256(hash_input.encode()).hexdigest() opt["model_hash"] = model_hash + opt["model_name"] = name + opt["model_creator"] = model_creator + opt["overtrain_info"] = overtrain_info + opt["dataset_lenght"] = dataset_lenght torch.save(opt, os.path.join(model_dir_path, pth_file)) diff --git a/rvc/train/process/model_information.py b/rvc/train/process/model_information.py index be74269e..6c150b22 100644 --- a/rvc/train/process/model_information.py +++ b/rvc/train/process/model_information.py @@ -17,24 +17,32 @@ def model_information(path): print(f"Loaded model from {path}") + model_name = model_data.get("model_name", "None") + model_creator = model_data.get("model_creator", "None") epochs = model_data.get("epoch", "None") steps = model_data.get("step", "None") sr = model_data.get("sr", "None") f0 = model_data.get("f0", "None") + dataset_lenght = model_data.get("dataset_lenght", "None") version = model_data.get("version", "None") creation_date = model_data.get("creation_date", "None") model_hash = model_data.get("model_hash", None) + overtrain_info = model_data.get("overtrain_info", "None") pitch_guidance = "True" if f0 == 1 else "False" creation_date_str = prettify_date(creation_date) if creation_date else "None" return ( + f"Model Name: {model_name}\n" + f"Model Creator: {model_creator}\n" f"Epochs: {epochs}\n" f"Steps: {steps}\n" f"RVC Version: {version}\n" f"Sampling Rate: {sr}\n" f"Pitch Guidance: {pitch_guidance}\n" + f"Dataset Length: {dataset_lenght}\n" f"Creation Date: {creation_date_str}\n" f"Hash (ID): {model_hash}" + f"Overtrain Info: {overtrain_info}" ) diff --git a/rvc/train/train.py b/rvc/train/train.py index 2adbcee7..a3e06d68 100644 --- a/rvc/train/train.py +++ b/rvc/train/train.py @@ -28,6 +28,7 @@ from torch.utils.tensorboard import SummaryWriter import torch.distributed as dist import torch.multiprocessing as mp +from pydub import AudioSegment now_dir = os.getcwd() sys.path.append(os.path.join(now_dir)) @@ -72,10 +73,12 @@ overtraining_detector = strtobool(sys.argv[14]) overtraining_threshold = int(sys.argv[15]) sync_graph = strtobool(sys.argv[16]) +model_creator = sys.argv[17] current_dir = os.getcwd() experiment_dir = os.path.join(current_dir, "logs", model_name) config_save_path = os.path.join(experiment_dir, "config.json") +dataset_path = os.path.join(experiment_dir, "sliced_audios") with open(config_save_path, "r") as f: config = json.load(f) @@ -97,6 +100,8 @@ smoothed_loss_disc_history = [] lowest_value = {"step": 0, "value": float("inf"), "epoch": 0} training_file_path = os.path.join(experiment_dir, "training_data.json") +dataset_duration = 0 +overtrain_info = None import logging @@ -124,6 +129,24 @@ def record(self): return f"time={current_time} | training_speed={elapsed_time_str}" +def ms_to_min_sec(ms): + seconds = ms // 1000 + minutes = seconds // 60 + seconds = seconds % 60 + return f"{minutes}:{seconds:02}" + + +def get_audio_durations(dataset_path): + durations = [] + for filename in os.listdir(dataset_path): + if filename.endswith(".wav"): # Assumindo que os arquivos de áudio são .wav + audio_path = os.path.join(dataset_path, filename) + audio = AudioSegment.from_wav(audio_path) + duration_ms = len(audio) + durations.append(ms_to_min_sec(duration_ms)) + return durations + + def main(): """ Main function to start the training process. @@ -203,6 +226,8 @@ def continue_overtrain_detector(training_file_path): print("GPU not detected, reverting to CPU (not recommended)") n_gpus = 1 + dataset_duration = get_audio_durations(dataset_path) + if sync_graph == True: print( "Sync graph is now activated! With sync graph enabled, the model undergoes a single epoch of training. Once the graphs are synchronized, training proceeds for the previously specified number of epochs." @@ -821,6 +846,9 @@ def train_and_evaluate( step=global_step, version=version, hps=hps, + model_creator=model_creator, + overtrain_info=overtrain_info, + dataset_lenght=dataset_duration, ) def check_overtraining(smoothed_loss_history, threshold, epsilon=0.004): @@ -917,6 +945,8 @@ def save_to_json( consecutive_increases_gen += 1 else: consecutive_increases_gen = 0 + + overtrain_info = f"Smoothed loss_g {smoothed_value_gen:.3f} and loss_d {smoothed_value_disc:.3f}" # Save the data in the JSON file if the epoch is divisible by save_every_epoch if epoch % save_every_epoch == 0: save_to_json( @@ -965,6 +995,9 @@ def save_to_json( step=global_step, version=version, hps=hps, + model_creator=model_creator, + overtrain_info=overtrain_info, + dataset_lenght=dataset_duration, ) # Print training progress @@ -1025,6 +1058,9 @@ def save_to_json( step=global_step, version=version, hps=hps, + model_creator=model_creator, + overtrain_info=overtrain_info, + dataset_lenght=dataset_duration, ) sleep(1) os._exit(2333333) diff --git a/tabs/extra/extra.py b/tabs/extra/extra.py index 40bf778a..eeb25649 100644 --- a/tabs/extra/extra.py +++ b/tabs/extra/extra.py @@ -21,12 +21,11 @@ def extra_tab(): "This section contains some extra utilities that often may be in experimental phases." ) ) + with gr.TabItem(i18n("Model information")): + processing_tab() with gr.TabItem(i18n("F0 Curve")): f0_extractor_tab() - with gr.TabItem(i18n("Processing")): - processing_tab() - with gr.TabItem(i18n("Audio Analyzer")): analyzer_tab() diff --git a/tabs/extra/processing/processing.py b/tabs/extra/processing/processing.py index dbc9e7cb..1eb037ac 100644 --- a/tabs/extra/processing/processing.py +++ b/tabs/extra/processing/processing.py @@ -27,7 +27,7 @@ def processing_tab(): label=i18n("Output Information"), info=i18n("The output information will be displayed here."), value="", - max_lines=8, + max_lines=11, ) model_view_button = gr.Button(i18n("View"), variant="primary") model_view_button.click( diff --git a/tabs/train/train.py b/tabs/train/train.py index 211b159b..60eab759 100644 --- a/tabs/train/train.py +++ b/tabs/train/train.py @@ -311,6 +311,14 @@ def train_tab(): interactive=True, allow_custom_value=True, ) + model_creator = gr.Dropdown( + label=i18n("Model Creator"), + info=i18n("Name of the model creator."), + value="", + interactive=True, + visible=True, + allow_custom_value=True, + ) sampling_rate = gr.Radio( label=i18n("Sampling Rate"), info=i18n("The sampling rate of the audio files."), @@ -752,6 +760,7 @@ def train_tab(): custom_pretrained, g_pretrained_path, d_pretrained_path, + model_creator, ], outputs=[train_output_info], api_name="start_training", From 3da9187bac26c7f831bd7acadac7229524d854ed Mon Sep 17 00:00:00 2001 From: Guilherme <86894155+ShiromiyaG@users.noreply.github.com> Date: Tue, 27 Aug 2024 08:57:50 -0300 Subject: [PATCH 2/6] Make the author's name compatible with Fumiama's fork --- rvc/train/process/extract_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rvc/train/process/extract_model.py b/rvc/train/process/extract_model.py index 23536d8a..76964fc7 100644 --- a/rvc/train/process/extract_model.py +++ b/rvc/train/process/extract_model.py @@ -84,7 +84,7 @@ def extract_model( model_hash = hashlib.sha256(hash_input.encode()).hexdigest() opt["model_hash"] = model_hash opt["model_name"] = name - opt["model_creator"] = model_creator + opt["author"] = model_creator opt["overtrain_info"] = overtrain_info opt["dataset_lenght"] = dataset_lenght From e88a953f2a540df679471d7b15c66cf29d9b73ec Mon Sep 17 00:00:00 2001 From: Guilherme <86894155+ShiromiyaG@users.noreply.github.com> Date: Tue, 27 Aug 2024 08:59:00 -0300 Subject: [PATCH 3/6] oops --- rvc/train/process/model_information.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rvc/train/process/model_information.py b/rvc/train/process/model_information.py index 6c150b22..88214f95 100644 --- a/rvc/train/process/model_information.py +++ b/rvc/train/process/model_information.py @@ -18,7 +18,7 @@ def model_information(path): print(f"Loaded model from {path}") model_name = model_data.get("model_name", "None") - model_creator = model_data.get("model_creator", "None") + model_creator = model_data.get("author", "None") epochs = model_data.get("epoch", "None") steps = model_data.get("step", "None") sr = model_data.get("sr", "None") From a8d238e3a663f069055f48c143a95d65add3cb40 Mon Sep 17 00:00:00 2001 From: Guilherme <86894155+ShiromiyaG@users.noreply.github.com> Date: Tue, 27 Aug 2024 09:12:28 -0300 Subject: [PATCH 4/6] Make the creator's name optional --- core.py | 1 + rvc/train/process/extract_model.py | 2 ++ tabs/extra/model_information.py | 2 +- tabs/train/train.py | 2 +- 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/core.py b/core.py index af33f44b..7b4c5d70 100644 --- a/core.py +++ b/core.py @@ -1359,6 +1359,7 @@ def parse_arguments(): type=str, help="Model creator name.", default=None, + required=False, ) train_parser.add_argument( "--cache_data_in_gpu", diff --git a/rvc/train/process/extract_model.py b/rvc/train/process/extract_model.py index 76964fc7..17f2a544 100644 --- a/rvc/train/process/extract_model.py +++ b/rvc/train/process/extract_model.py @@ -84,6 +84,8 @@ def extract_model( model_hash = hashlib.sha256(hash_input.encode()).hexdigest() opt["model_hash"] = model_hash opt["model_name"] = name + if model_creator is None: + model_creator = "Unknown" opt["author"] = model_creator opt["overtrain_info"] = overtrain_info opt["dataset_lenght"] = dataset_lenght diff --git a/tabs/extra/model_information.py b/tabs/extra/model_information.py index 27e8f77e..de0fc73d 100644 --- a/tabs/extra/model_information.py +++ b/tabs/extra/model_information.py @@ -18,7 +18,7 @@ def model_information_tab(): label=i18n("Output Information"), info=i18n("The output information will be displayed here."), value="", - max_lines=8, + max_lines=11, interactive=False, ) model_information_button = gr.Button(i18n("See Model Information")) diff --git a/tabs/train/train.py b/tabs/train/train.py index 60eab759..73056629 100644 --- a/tabs/train/train.py +++ b/tabs/train/train.py @@ -314,7 +314,7 @@ def train_tab(): model_creator = gr.Dropdown( label=i18n("Model Creator"), info=i18n("Name of the model creator."), - value="", + value=None, interactive=True, visible=True, allow_custom_value=True, From 9a900845f1425ba8bfb34adc76f8bbaf19df67e8 Mon Sep 17 00:00:00 2001 From: Guilherme <86894155+ShiromiyaG@users.noreply.github.com> Date: Tue, 27 Aug 2024 10:41:43 -0300 Subject: [PATCH 5/6] Fix TTS --- core.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/core.py b/core.py index 7b4c5d70..cb9ec88f 100644 --- a/core.py +++ b/core.py @@ -340,6 +340,21 @@ def run_tts_script( f0_file=f0_file, embedder_model=embedder_model, embedder_model_custom=embedder_model_custom, + formant_shifting=None, + formant_qfrency=None, + formant_timbre=None, + post_process=None, + reverb=None, + pitch_shift=None, + limiter=None, + gain=None, + distortion=None, + chorus=None, + bitcrush=None, + clipping=None, + compressor=None, + delay=None, + sliders=None, ) return f"Text {tts_text} synthesized successfully.", output_rvc_path.replace( From d890d6621d4492291500637b3d40c3913b3ff559 Mon Sep 17 00:00:00 2001 From: Guilherme <86894155+ShiromiyaG@users.noreply.github.com> Date: Tue, 27 Aug 2024 11:59:57 -0300 Subject: [PATCH 6/6] Add a note --- assets/i18n/languages/en_US.json | 2 +- tabs/train/train.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/i18n/languages/en_US.json b/assets/i18n/languages/en_US.json index 2545b488..2a7da46a 100644 --- a/assets/i18n/languages/en_US.json +++ b/assets/i18n/languages/en_US.json @@ -304,5 +304,5 @@ "Move files to custom embedder folder": "Move files to custom embedder folder", "model information": "model information", "Model Creator": "Model Creator", - "Name of the model creator.": "Name of the model creator." + "Name of the model creator. (Default: Unknown)": "Name of the model creator. (Default: Unknown)" } diff --git a/tabs/train/train.py b/tabs/train/train.py index 73056629..2d5b4f6f 100644 --- a/tabs/train/train.py +++ b/tabs/train/train.py @@ -313,7 +313,7 @@ def train_tab(): ) model_creator = gr.Dropdown( label=i18n("Model Creator"), - info=i18n("Name of the model creator."), + info=i18n("Name of the model creator. (Default: Unknown)"), value=None, interactive=True, visible=True,