minor changes on preprocess

IAHispano · Aug 17, 2024 · cd5597f · cd5597f
1 parent d85e980
commit cd5597f
Show file tree

Hide file tree

Showing 4 changed files with 37 additions and 54 deletions.
diff --git a/assets/i18n/languages/en_US.json b/assets/i18n/languages/en_US.json
@@ -3,7 +3,6 @@
   "This section contains some extra utilities that often may be in experimental phases.": "This section contains some extra utilities that often may be in experimental phases.",
   "Output Information": "Output Information",
   "The output information will be displayed here.": "The output information will be displayed here.",
-
   "Inference": "Inference",
   "Train": "Train",
   "Extra": "Extra",
@@ -15,12 +14,10 @@
   "Download": "Download",
   "Report a Bug": "Report a Bug",
   "Settings": "Settings",
-
   "Preprocess": "Preprocess",
   "Audio cutting": "Audio cutting",
   "It's recommended to deactivate this option if your dataset has already been processed.": "It's recommended to deactivate this option if your dataset has already been processed.",
-  "No Filters": "No Filters",
-  "Disables all preprocessing filters.": "Disables all preprocessing filters.",
+  "Process effects": "Process effects",
   "Model Name": "Model Name",
   "Name of the new model.": "Name of the new model.",
   "Enter model name": "Enter model name",
@@ -39,7 +36,6 @@
   "RVC Version": "RVC Version",
   "The RVC version of the model.": "The RVC version of the model.",
   "Preprocess Dataset": "Preprocess Dataset",
-
   "Embedder Model": "Embedder Model",
   "Model used for learning speaker embedding.": "Model used for learning speaker embedding.",
   "Extract": "Extract",
@@ -89,26 +85,22 @@
   "Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "Set the maximum number of epochs you want your model to stop training if no improvement is detected.",
   "Sync Graph": "Sync Graph",
   "Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model.": "Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model.",
-
   "Start Training": "Start Training",
   "Stop Training": "Stop Training",
   "Generate Index": "Generate Index",
-
   "Export Model": "Export Model",
   "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.": "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.",
   "Exported Pth file": "Exported Pth file",
   "Exported Index file": "Exported Index file",
   "Select the pth file to be exported": "Select the pth file to be exported",
   "Select the index file to be exported": "Select the index file to be exported",
   "Upload": "Upload",
-
   "Voice Model": "Voice Model",
   "Select the voice model to use for the conversion.": "Select the voice model to use for the conversion.",
   "Index File": "Index File",
   "Select the index file to use for the conversion.": "Select the index file to use for the conversion.",
   "Refresh": "Refresh",
   "Unload Voice": "Unload Voice",
-
   "Single": "Single",
   "Upload Audio": "Upload Audio",
   "Select Audio": "Select Audio",
@@ -147,20 +139,16 @@
   "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.",
   "Pitch extraction algorithm": "Pitch extraction algorithm",
   "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.",
-
   "Convert": "Convert",
   "Export Audio": "Export Audio",
-
   "Batch": "Batch",
   "Input Folder": "Input Folder",
   "Select the folder containing the audios to convert.": "Select the folder containing the audios to convert.",
   "Enter input path": "Enter input path",
   "Output Folder": "Output Folder",
   "Select the folder where the output audios will be saved.": "Select the folder where the output audios will be saved.",
   "Enter output path": "Enter output path",
-
   "Get information about the audio": "Get information about the audio",
-
   "## Voice Blender": "## Voice Blender",
   "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.",
   "Voice Blender": "Voice Blender",
@@ -169,7 +157,6 @@
   "Blend Ratio": "Blend Ratio",
   "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Adjusting the position more towards one side or the other will make the model more similar to the first or second.",
   "Fusion": "Fusion",
-
   "Path to Model": "Path to Model",
   "Enter path to model": "Enter path to model",
   "Model information to be placed": "Model information to be placed",
@@ -182,23 +169,18 @@
   "Model conversion": "Model conversion",
   "Pth file": "Pth file",
   "Output of the pth file": "Output of the pth file",
-
   "Extract F0 Curve": "Extract F0 Curve",
   "The f0 curve represents the variations in the base frequency of a voice over time, showing how pitch rises and falls.": "The f0 curve represents the variations in the base frequency of a voice over time, showing how pitch rises and falls.",
-
   "# How to Report an Issue on GitHub": "# How to Report an Issue on GitHub",
   "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.",
   "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).",
   "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.",
   "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.",
-
   "Record Screen": "Record Screen",
   "Record": "Record",
   "Stop Recording": "Stop Recording",
-
   "Introduce the model .pth path": "Introduce the model .pth path",
   "See Model Information": "See Model Information",
-
   "## Download Model": "## Download Model",
   "Model Link": "Model Link",
   "Introduce the model link": "Introduce the model link",
@@ -212,7 +194,6 @@
   "## Download Pretrained Models": "## Download Pretrained Models",
   "Select the pretrained model you want to download.": "Select the pretrained model you want to download.",
   "And select the sampling rate": "And select the sampling rate.",
-
   "TTS Voices": "TTS Voices",
   "TTS Speed": "TTS Speed",
   "Increase or decrease TTS speed.": "Increase or decrease TTS speed.",
@@ -223,7 +204,6 @@
   "Enter text to synthesize": "Enter text to synthesize",
   "Output Path for TTS Audio": "Output Path for TTS Audio",
   "Output Path for RVC Audio": "Output Path for RVC Audio",
-
   "Enable Applio integration with Discord presence": "Enable Applio integration with Discord presence",
   "It will activate the possibility of displaying the current Applio activity in Discord.": "It will activate the possibility of displaying the current Applio activity in Discord.",
   "Enable Applio integration with applio.org/models using flask": "Enable Applio integration with applio.org/models using flask",
@@ -238,11 +218,9 @@
   "Precision": "Precision",
   "Select the precision you want to use for training and inference.": "Select the precision you want to use for training and inference.",
   "Update precision": "Update precision",
-
   "Plugin Installer": "Plugin Installer",
   "Drag your plugin.zip to install it": "Drag your plugin.zip to install it",
-
   "Version Checker": "Version Checker",
   "Check which version of Applio is the latest to see if you need to update.": "Check which version of Applio is the latest to see if you need to update.",
   "Check for updates": "Check for updates"
-}
+}
diff --git a/core.py b/core.py
@@ -260,7 +260,7 @@ def run_preprocess_script(
     sample_rate: int,
     cpu_cores: int,
     cut_preprocess: bool,
-    no_filters: bool,
+    process_effects: bool,
 ):
     config = get_config()
     per = 3.0 if config.is_half else 3.7
@@ -277,7 +277,7 @@ def run_preprocess_script(
                 per,
                 cpu_cores,
                 cut_preprocess,
-                no_filters,
+                process_effects,
             ],
         ),
     ]
@@ -1051,7 +1051,7 @@ def parse_arguments():
         required=False,
     )
     preprocess_parser.add_argument(
-        "--no_filters",
+        "--process_effects",
         type=lambda x: bool(strtobool(x)),
         choices=[True, False],
         help="Disable all filters during preprocessing.",
@@ -1525,7 +1525,7 @@ def main():
                 sample_rate=args.sample_rate,
                 cpu_cores=args.cpu_cores,
                 cut_preprocess=args.cut_preprocess,
-                no_filters=args.no_filters,
+                process_effects=args.process_effects,
             )
         elif args.mode == "extract":
             run_extract_script(

diff --git a/rvc/train/preprocess/preprocess.py b/rvc/train/preprocess/preprocess.py
@@ -71,9 +71,9 @@ def process_audio_segment(
         audio_segment: torch.Tensor,
         idx0: int,
         idx1: int,
-        no_filters: bool,
+        process_effects: bool,
     ):
-        if no_filters:
+        if process_effects == False:
             normalized_audio = audio_segment
         else:
             normalized_audio = self._normalize_audio(audio_segment)
@@ -92,11 +92,11 @@ def process_audio_segment(
         self._write_audio(audio_16k, wav_16k_path, SAMPLE_RATE_16K)
 
     def process_audio(
-        self, path: str, idx0: int, cut_preprocess: bool, no_filters: bool
+        self, path: str, idx0: int, cut_preprocess: bool, process_effects: bool
     ):
         try:
             audio = load_audio(path, self.sr)
-            if no_filters:
+            if process_effects == False:
                 audio = torch.tensor(audio, device=self.device).float()
             else:
                 audio = torch.tensor(
@@ -117,34 +117,34 @@ def process_audio(
                                 start : start + int(self.per * self.sr)
                             ]
                             self.process_audio_segment(
-                                tmp_audio, idx0, idx1, no_filters
+                                tmp_audio, idx0, idx1, process_effects
                             )
                             idx1 += 1
                         else:
                             tmp_audio = audio_segment[start:]
                             self.process_audio_segment(
-                                tmp_audio, idx0, idx1, no_filters
+                                tmp_audio, idx0, idx1, process_effects
                             )
                             idx1 += 1
                             break
             else:
-                self.process_audio_segment(audio, idx0, idx1, no_filters)
+                self.process_audio_segment(audio, idx0, idx1, process_effects)
         except Exception as error:
             print(f"An error occurred on {path} path: {error}")
 
-    def process_audio_file(self, file_path_idx, cut_preprocess, no_filters):
+    def process_audio_file(self, file_path_idx, cut_preprocess, process_effects):
         file_path, idx0 = file_path_idx
         ext = os.path.splitext(file_path)[1].lower()
         if ext not in [".wav"]:
             audio = AudioSegment.from_file(file_path)
             file_path = os.path.join("/tmp", f"{idx0}.wav")
             audio.export(file_path, format="wav")
-        self.process_audio(file_path, idx0, cut_preprocess, no_filters)
+        self.process_audio(file_path, idx0, cut_preprocess, process_effects)
 
 
 def process_file(args):
-    pp, file, cut_preprocess, no_filters = args
-    pp.process_audio_file(file, cut_preprocess, no_filters)
+    pp, file, cut_preprocess, process_effects = args
+    pp.process_audio_file(file, cut_preprocess, process_effects)
 
 
 def preprocess_training_set(
@@ -154,7 +154,7 @@ def preprocess_training_set(
     exp_dir: str,
     per: float,
     cut_preprocess: bool,
-    no_filters: bool,
+    process_effects: bool,
 ):
     start_time = time.time()
 
@@ -170,7 +170,8 @@ def preprocess_training_set(
     ctx = multiprocessing.get_context("spawn")
     with ctx.Pool(processes=num_processes) as pool:
         pool.map(
-            process_file, [(pp, file, cut_preprocess, no_filters) for file in files]
+            process_file,
+            [(pp, file, cut_preprocess, process_effects) for file in files],
         )
 
     elapsed_time = time.time() - start_time
@@ -186,7 +187,7 @@ def preprocess_training_set(
         int(sys.argv[5]) if len(sys.argv) > 5 else multiprocessing.cpu_count()
     )
     cut_preprocess = strtobool(sys.argv[6])
-    no_filters = strtobool(sys.argv[7])
+    process_effects = strtobool(sys.argv[7])
 
     preprocess_training_set(
         input_root,
@@ -195,5 +196,5 @@ def preprocess_training_set(
         experiment_directory,
         percentage,
         cut_preprocess,
-        no_filters,
+        process_effects,
     )
diff --git a/tabs/train/train.py b/tabs/train/train.py
@@ -1,17 +1,19 @@
 import os
-from multiprocessing import cpu_count
-import sys
 import shutil
+import sys
+from multiprocessing import cpu_count
+
 import gradio as gr
+
 from assets.i18n.i18n import I18nAuto
 from core import (
-    run_preprocess_script,
     run_extract_script,
-    run_train_script,
     run_index_script,
+    run_preprocess_script,
     run_prerequisites_script,
+    run_train_script,
 )
-from rvc.configs.config import max_vram_gpu, get_gpu_info, get_number_of_gpus
+from rvc.configs.config import get_gpu_info, get_number_of_gpus, max_vram_gpu
 from rvc.lib.utils import format_title
 from tabs.settings.restart import stop_train
 
@@ -370,11 +372,13 @@ def train_tab():
                     value=True,
                     interactive=True,
                     visible=True,
-                )                
-                no_filters = gr.Checkbox(
-                    label=i18n("No Filters"),
-                    info=i18n("Disables all preprocessing filters."),
-                    value=False,
+                )
+                process_effects = gr.Checkbox(
+                    label=i18n("Process effects"),
+                    info=i18n(
+                        "It's recommended to deactivate this option if your dataset has already been processed."
+                    ),
+                    value=True,
                     interactive=True,
                     visible=True,
                 )
@@ -396,7 +400,7 @@ def train_tab():
                     sampling_rate,
                     cpu_cores_preprocess,
                     cut_preprocess,
-                    no_filters,
+                    process_effects,
                 ],
                 outputs=[preprocess_output_info],
                 api_name="preprocess_dataset",