Merge pull request #814 from AznamirWoW/tts_from_file

TTS from File now actually does it from file
IAHispano · Oct 14, 2024 · f2ad93d · f2ad93d
2 parents 892006d + 6d92e93
commit f2ad93d
Show file tree

Hide file tree

Showing 4 changed files with 38 additions and 11 deletions.
diff --git a/assets/i18n/languages/en_US.json b/assets/i18n/languages/en_US.json
@@ -203,6 +203,8 @@
   "Text to Synthesize": "Text to Synthesize",
   "Enter the text to synthesize.": "Enter the text to synthesize.",
   "Upload a .txt file": "Upload a .txt file",
+  "Input path for text file": "Input path for text file",
+  "The path to the text file that contains content for text to speech.": "The path to the text file that contains content for text to speech.",
   "Enter text to synthesize": "Enter text to synthesize",
   "Output Path for TTS Audio": "Output Path for TTS Audio",
   "Output Path for RVC Audio": "Output Path for RVC Audio",

diff --git a/core.py b/core.py
@@ -342,6 +342,7 @@ def run_batch_infer_script(
 
 # TTS
 def run_tts_script(
+    tts_file: str,
     tts_text: str,
     tts_voice: str,
     tts_rate: int,
@@ -380,6 +381,7 @@ def run_tts_script(
             [
                 python,
                 tts_script_path,
+                tts_file,
                 tts_text,
                 tts_voice,
                 tts_rate,
@@ -1711,6 +1713,9 @@ def parse_arguments():
 
     # Parser for 'tts' mode
     tts_parser = subparsers.add_parser("tts", help="Run TTS inference")
+    tts_parser.add_argument(
+        "--tts_file", type=str, help="File with a text to be synthesized", required=True
+    )
     tts_parser.add_argument(
         "--tts_text", type=str, help="Text to be synthesized", required=True
     )
@@ -2458,6 +2463,7 @@ def main():
             )
         elif args.mode == "tts":
             run_tts_script(
+                tts_file=args.tts_file,
                 tts_text=args.tts_text,
                 tts_voice=args.tts_voice,
                 tts_rate=args.tts_rate,

diff --git a/rvc/lib/tools/tts.py b/rvc/lib/tools/tts.py
@@ -1,17 +1,25 @@
 import sys
 import asyncio
 import edge_tts
-
+import os
 
 async def main():
     # Parse command line arguments
-    text = str(sys.argv[1])
-    voice = str(sys.argv[2])
-    rate = int(sys.argv[3])
-    output_file = str(sys.argv[4])
+    tts_file = str(sys.argv[1])
+    text = str(sys.argv[2])
+    voice = str(sys.argv[3])
+    rate = int(sys.argv[4])
+    output_file = str(sys.argv[5])
 
     rates = f"+{rate}%" if rate >= 0 else f"{rate}%"
-
+    if tts_file and os.path.exists(tts_file):
+        text = ""
+        try:
+            with open(tts_file, 'r', encoding="utf-8") as file:
+                text = file.read()
+        except UnicodeDecodeError:
+            with open(tts_file, 'r') as file:
+                text = file.read()
     await edge_tts.Communicate(text, voice, rate=rates).save(output_file)
     print(f"TTS with {voice} completed. Output TTS file: '{output_file}'")
 

diff --git a/tabs/tts/tts.py b/tabs/tts/tts.py
@@ -33,10 +33,14 @@
 
 
 def process_input(file_path):
-    with open(file_path, "r", encoding="utf-8") as file:
-        file_contents = file.read()
-    gr.Info(f"The text from the txt file has been loaded!")
-    return file_contents, None
+    try:
+        with open(file_path, "r", encoding="utf-8") as file:
+            file.read()
+        gr.Info(f"The file has been loaded!")
+        return file_path, file_path
+    except UnicodeDecodeError:
+        gr.Info(f"The file has to be in UTF-8 encoding.")
+        return None, None
 
 
 # TTS tab
@@ -115,6 +119,12 @@ def tts_tab():
                 label=i18n("Upload a .txt file"),
                 type="filepath",
             )
+            input_tts_path = gr.Textbox(
+                label=i18n("Input path for text file"),
+                placeholder=i18n("The path to the text file that contains content for text to speech."),
+                value="",
+                interactive=True,
+            )
 
     with gr.Accordion(i18n("Advanced Settings"), open=False):
         with gr.Column():
@@ -362,7 +372,7 @@ def toggle_visible_embedder_custom(embedder_model):
     txt_file.upload(
         fn=process_input,
         inputs=[txt_file],
-        outputs=[tts_text, txt_file],
+        outputs=[input_tts_path, txt_file],
     )
     embedder_model.change(
         fn=toggle_visible_embedder_custom,
@@ -382,6 +392,7 @@ def toggle_visible_embedder_custom(embedder_model):
     convert_button.click(
         fn=run_tts_script,
         inputs=[
+            input_tts_path,           
             tts_text,
             tts_voice,
             tts_rate,