some improvements to tts

IAHispano · Dec 6, 2024 · 2f37ec4 · 2f37ec4
1 parent eeb2858
commit 2f37ec4
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 2 deletions.
diff --git a/rvc/infer/infer.py b/rvc/infer/infer.py
@@ -248,7 +248,12 @@ def convert_audio(
             sid (int, optional): Speaker ID. Default is 0.
             **kwargs: Additional keyword arguments.
         """
+        if not model_path:
+            print("No model path provided. Aborting conversion.")
+            return
+
         self.get_vc(model_path, sid)
+
         try:
             start_time = time.time()
             print(f"Converting audio '{audio_input_path}'...")
@@ -432,6 +437,7 @@ def get_vc(self, weight_root, sid):
                 self.setup_vc_instance()
             self.loaded_model = weight_root
 
+
     def cleanup_model(self):
         """
         Cleans up the model and releases resources.
@@ -471,11 +477,13 @@ def setup_network(self):
 
             self.version = self.cpt.get("version", "v1")
             self.text_enc_hidden_dim = 768 if self.version == "v2" else 256
+            self.vocoder = self.cpt.get("vocoder", "HiFi-GAN")
             self.net_g = Synthesizer(
                 *self.cpt["config"],
                 use_f0=self.use_f0,
                 text_enc_hidden_dim=self.text_enc_hidden_dim,
                 is_half=self.config.is_half,
+                vocoder=self.vocoder
             )
             del self.net_g.enc_q
             self.net_g.load_state_dict(self.cpt["weight"], strict=False)

diff --git a/rvc/lib/tools/tts.py b/rvc/lib/tools/tts.py
@@ -22,7 +22,7 @@ async def main():
             with open(tts_file, "r") as file:
                 text = file.read()
     await edge_tts.Communicate(text, voice, rate=rates).save(output_file)
-    print(f"TTS with {voice} completed. Output TTS file: '{output_file}'")
+    # print(f"TTS with {voice} completed. Output TTS file: '{output_file}'")
 
 
 if __name__ == "__main__":

diff --git a/tabs/tts/tts.py b/tabs/tts/tts.py
@@ -85,7 +85,7 @@ def tts_tab():
 
     gr.Markdown(
         i18n(
-            f"Applio is a Speech-to-Speech conversion software, utilizing EdgeTTS as middleware for running the Text-to-Speech (TTS) component. Read more about it [here!](https://docs.applio.org/getting-started/tts#disclaimer)"
+            f"Applio is a Speech-to-Speech conversion software, utilizing EdgeTTS as middleware for running the Text-to-Speech (TTS) component. Read more about it [here!](https://docs.applio.org/applio/getting-started/tts)"
         )
     )
     tts_voice = gr.Dropdown(