From 34ffe32e9a69676c26d63a56b690fd5a973cb6e2 Mon Sep 17 00:00:00 2001 From: Alexey Shmelev Date: Sun, 13 Oct 2024 10:24:25 -0400 Subject: [PATCH 1/3] TTS from File now actually does it from file --- core.py | 5 +++++ rvc/lib/tools/tts.py | 20 ++++++++++++++------ tabs/tts/tts.py | 21 ++++++++++++++++----- 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/core.py b/core.py index 61d8cefa2..a0659e7c0 100644 --- a/core.py +++ b/core.py @@ -338,6 +338,7 @@ def run_batch_infer_script( # TTS def run_tts_script( + tts_file: str, tts_text: str, tts_voice: str, tts_rate: int, @@ -375,6 +376,7 @@ def run_tts_script( [ python, tts_script_path, + tts_file, tts_text, tts_voice, tts_rate, @@ -1690,6 +1692,9 @@ def parse_arguments(): # Parser for 'tts' mode tts_parser = subparsers.add_parser("tts", help="Run TTS inference") + tts_parser.add_argument( + "--tts_file", type=str, help="File with a text to be synthesized", required=True + ) tts_parser.add_argument( "--tts_text", type=str, help="Text to be synthesized", required=True ) diff --git a/rvc/lib/tools/tts.py b/rvc/lib/tools/tts.py index a9994dbd0..d42691b71 100644 --- a/rvc/lib/tools/tts.py +++ b/rvc/lib/tools/tts.py @@ -1,17 +1,25 @@ import sys import asyncio import edge_tts - +import os async def main(): # Parse command line arguments - text = str(sys.argv[1]) - voice = str(sys.argv[2]) - rate = int(sys.argv[3]) - output_file = str(sys.argv[4]) + tts_file = str(sys.argv[1]) + text = str(sys.argv[2]) + voice = str(sys.argv[3]) + rate = int(sys.argv[4]) + output_file = str(sys.argv[5]) rates = f"+{rate}%" if rate >= 0 else f"{rate}%" - + if tts_file and os.path.exists(tts_file): + text = "" + try: + with open(tts_file, 'r', encoding="utf-8") as file: + text = file.read() + except UnicodeDecodeError: + with open(tts_file, 'r') as file: + text = file.read() await edge_tts.Communicate(text, voice, rate=rates).save(output_file) print(f"TTS with {voice} completed. Output TTS file: '{output_file}'") diff --git a/tabs/tts/tts.py b/tabs/tts/tts.py index de7896320..066331da6 100644 --- a/tabs/tts/tts.py +++ b/tabs/tts/tts.py @@ -33,10 +33,14 @@ def process_input(file_path): - with open(file_path, "r", encoding="utf-8") as file: - file_contents = file.read() - gr.Info(f"The text from the txt file has been loaded!") - return file_contents, None + try: + with open(file_path, "r", encoding="utf-8") as file: + file.read() + gr.Info(f"The file has been loaded!") + return file_path, file_path + except UnicodeDecodeError: + gr.Info(f"The file has to be in UTF-8 encoding.") + return None, None # TTS tab @@ -115,6 +119,12 @@ def tts_tab(): label=i18n("Upload a .txt file"), type="filepath", ) + input_tts_path = gr.Textbox( + label=i18n("Input path for text file"), + placeholder=i18n("The path to the text file that contains content for text to speech."), + value="", + interactive=True, + ) with gr.Accordion(i18n("Advanced Settings"), open=False): with gr.Column(): @@ -346,7 +356,7 @@ def toggle_visible_embedder_custom(embedder_model): txt_file.upload( fn=process_input, inputs=[txt_file], - outputs=[tts_text, txt_file], + outputs=[input_tts_path, txt_file], ) embedder_model.change( fn=toggle_visible_embedder_custom, @@ -366,6 +376,7 @@ def toggle_visible_embedder_custom(embedder_model): convert_button.click( fn=run_tts_script, inputs=[ + input_tts_path, tts_text, tts_voice, tts_rate, From b3d0ffe29c3f002dcc9bec15f2f0f90abc5c0a93 Mon Sep 17 00:00:00 2001 From: Alexey Shmelev Date: Sun, 13 Oct 2024 10:30:26 -0400 Subject: [PATCH 2/3] added i18n --- assets/i18n/languages/en_US.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/assets/i18n/languages/en_US.json b/assets/i18n/languages/en_US.json index a620e2174..dc71e6f77 100644 --- a/assets/i18n/languages/en_US.json +++ b/assets/i18n/languages/en_US.json @@ -203,6 +203,8 @@ "Text to Synthesize": "Text to Synthesize", "Enter the text to synthesize.": "Enter the text to synthesize.", "Upload a .txt file": "Upload a .txt file", + "Input path for text file": "Input path for text file", + "The path to the text file that contains content for text to speech.": "The path to the text file that contains content for text to speech.", "Enter text to synthesize": "Enter text to synthesize", "Output Path for TTS Audio": "Output Path for TTS Audio", "Output Path for RVC Audio": "Output Path for RVC Audio", From 6d92e93ec5e351e09edf8a36203f5f0bf77deba0 Mon Sep 17 00:00:00 2001 From: Alexey Shmelev Date: Sun, 13 Oct 2024 10:37:05 -0400 Subject: [PATCH 3/3] added missing argument --- core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/core.py b/core.py index a0659e7c0..b0e0e87c2 100644 --- a/core.py +++ b/core.py @@ -2433,6 +2433,7 @@ def main(): ) elif args.mode == "tts": run_tts_script( + tts_file=args.tts_file, tts_text=args.tts_text, tts_voice=args.tts_voice, tts_rate=args.tts_rate,