diff --git a/core.py b/core.py index da3aa820..8429e4cc 100644 --- a/core.py +++ b/core.py @@ -359,6 +359,7 @@ def run_tts_script( f0_file: str, embedder_model: str, embedder_model_custom: str = None, + sid: int = 0, ): tts_script_path = os.path.join("rvc", "lib", "tools", "tts.py") @@ -402,6 +403,7 @@ def run_tts_script( f0_file=f0_file, embedder_model=embedder_model, embedder_model_custom=embedder_model_custom, + sid=sid, formant_shifting=None, formant_qfrency=None, formant_timbre=None, diff --git a/tabs/tts/tts.py b/tabs/tts/tts.py index 55a5bb48..86df8343 100644 --- a/tabs/tts/tts.py +++ b/tabs/tts/tts.py @@ -4,6 +4,7 @@ import json import random import shutil +import torch from core import ( run_tts_script, @@ -51,7 +52,11 @@ ] -def change_choices(): +def change_choices(model): + if model: + speakers = get_speakers_id(model) + else: + speakers = 0 names = [ os.path.join(root, file) for root, _, files in os.walk(model_root_relative, topdown=False) @@ -80,6 +85,7 @@ def change_choices(): {"choices": sorted(indexes_list), "__type__": "update"}, {"choices": sorted(custom_embedders), "__type__": "update"}, {"choices": sorted(custom_embedders), "__type__": "update"}, + {"choices": sorted(speakers), "__type__": "update"}, ) @@ -136,6 +142,11 @@ def save_drop_custom_embedder(dropbox): ) return None +def get_speakers_id(model): + if model: + model_data = torch.load(model, map_location="cpu") + speakers_id = model_data.get("speakers_id", 0) + return list(range(speakers_id)) # TTS tab def tts_tab(): @@ -235,6 +246,13 @@ def tts_tab(): value="WAV", interactive=True, ) + sid = gr.Dropdown( + label=i18n("Speaker ID"), + info=i18n("Select the speaker ID to use for the conversion."), + choices=get_speakers_id(model_file.value), + value=0, + interactive=True, + ) split_audio = gr.Checkbox( label=i18n("Split Audio"), info=i18n( @@ -421,8 +439,8 @@ def toggle_visible_embedder_custom(embedder_model): ) refresh_button.click( fn=change_choices, - inputs=[], - outputs=[model_file, index_file], + inputs=[model_file], + outputs=[model_file, index_file, sid], ) txt_file.upload( fn=process_input, @@ -470,6 +488,7 @@ def toggle_visible_embedder_custom(embedder_model): f0_file, embedder_model, embedder_model_custom, + sid, ], outputs=[vc_output1, vc_output2], )