Merge pull request #749 from ShiromiyaG/tts-speaker

TTS speaker
IAHispano · Sep 28, 2024 · 65a5ed6 · 65a5ed6
2 parents cec5696 + 2e02eac
commit 65a5ed6
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 3 deletions.
diff --git a/core.py b/core.py
@@ -359,6 +359,7 @@ def run_tts_script(
     f0_file: str,
     embedder_model: str,
     embedder_model_custom: str = None,
+    sid: int = 0,
 ):
 
     tts_script_path = os.path.join("rvc", "lib", "tools", "tts.py")
@@ -402,6 +403,7 @@ def run_tts_script(
         f0_file=f0_file,
         embedder_model=embedder_model,
         embedder_model_custom=embedder_model_custom,
+        sid=sid,
         formant_shifting=None,
         formant_qfrency=None,
         formant_timbre=None,

diff --git a/tabs/tts/tts.py b/tabs/tts/tts.py
@@ -4,6 +4,7 @@
 import json
 import random
 import shutil
+import torch
 
 from core import (
     run_tts_script,
@@ -51,7 +52,11 @@
 ]
 
 
-def change_choices():
+def change_choices(model):
+    if model:
+        speakers = get_speakers_id(model)
+    else:
+        speakers = 0
     names = [
         os.path.join(root, file)
         for root, _, files in os.walk(model_root_relative, topdown=False)
@@ -80,6 +85,7 @@ def change_choices():
         {"choices": sorted(indexes_list), "__type__": "update"},
         {"choices": sorted(custom_embedders), "__type__": "update"},
         {"choices": sorted(custom_embedders), "__type__": "update"},
+        {"choices": sorted(speakers), "__type__": "update"},
     )
 
 
@@ -136,6 +142,11 @@ def save_drop_custom_embedder(dropbox):
         )
     return None
 
+def get_speakers_id(model):
+    if model:
+        model_data = torch.load(model, map_location="cpu")
+        speakers_id = model_data.get("speakers_id", 0)
+        return list(range(speakers_id))
 
 # TTS tab
 def tts_tab():
@@ -235,6 +246,13 @@ def tts_tab():
                 value="WAV",
                 interactive=True,
             )
+            sid = gr.Dropdown(
+                label=i18n("Speaker ID"),
+                info=i18n("Select the speaker ID to use for the conversion."),
+                choices=get_speakers_id(model_file.value),
+                value=0,
+                interactive=True,
+            )
             split_audio = gr.Checkbox(
                 label=i18n("Split Audio"),
                 info=i18n(
@@ -421,8 +439,8 @@ def toggle_visible_embedder_custom(embedder_model):
     )
     refresh_button.click(
         fn=change_choices,
-        inputs=[],
-        outputs=[model_file, index_file],
+        inputs=[model_file],
+        outputs=[model_file, index_file, sid],
     )
     txt_file.upload(
         fn=process_input,
@@ -470,6 +488,7 @@ def toggle_visible_embedder_custom(embedder_model):
             f0_file,
             embedder_model,
             embedder_model_custom,
+            sid,
         ],
         outputs=[vc_output1, vc_output2],
     )