From a97d630353fb9a74d8c7ffdcd5e67f062026d04a Mon Sep 17 00:00:00 2001
From: Guilherme <86894155+ShiromiyaG@users.noreply.github.com>
Date: Thu, 15 Aug 2024 10:56:56 -0300
Subject: [PATCH 1/4] Add cut audios option

---
 core.py                            | 15 ++++++++-
 rvc/train/preprocess/preprocess.py | 50 +++++++++++++++++-------------
 tabs/train/train.py                | 12 ++++++-
 3 files changed, 54 insertions(+), 23 deletions(-)

diff --git a/core.py b/core.py
index a3d58c54..2e3dc5ca 100644
--- a/core.py
+++ b/core.py
@@ -243,7 +243,11 @@ def run_tts_script(
 
 # Preprocess
 def run_preprocess_script(
-    model_name: str, dataset_path: str, sample_rate: int, cpu_cores: int
+    model_name: str,
+    dataset_path: str,
+    sample_rate: int,
+    cpu_cores: int,
+    cut_preprocess: bool,
 ):
     config = get_config()
     per = 3.0 if config.is_half else 3.7
@@ -259,6 +263,7 @@ def run_preprocess_script(
                 sample_rate,
                 per,
                 cpu_cores,
+                cut_preprocess,
             ],
         ),
     ]
@@ -991,6 +996,13 @@ def parse_arguments():
         help="Number of CPU cores to use for preprocessing.",
         choices=range(1, 65),
     )
+    preprocess_parser.add_argument(
+        "--cut_preprocess",
+        type=lambda x: bool(strtobool(x)),
+        choices=[True, False],
+        help="Cut the dataset into smaller segments for faster preprocessing.",
+        default=True,
+    )
 
     # Parser for 'extract' mode
     extract_parser = subparsers.add_parser(
@@ -1449,6 +1461,7 @@ def main():
                 dataset_path=args.dataset_path,
                 sample_rate=args.sample_rate,
                 cpu_cores=args.cpu_cores,
+                cut_preprocess=args.cut_preprocess,
             )
         elif args.mode == "extract":
             run_extract_script(
diff --git a/rvc/train/preprocess/preprocess.py b/rvc/train/preprocess/preprocess.py
index ad5a0631..c07ee9f8 100644
--- a/rvc/train/preprocess/preprocess.py
+++ b/rvc/train/preprocess/preprocess.py
@@ -74,7 +74,7 @@ def process_audio_segment(self, audio_segment: torch.Tensor, idx0: int, idx1: in
         wav_16k_path = os.path.join(self.wavs16k_dir, f"{idx0}_{idx1}.wav")
         self._write_audio(audio_16k, wav_16k_path, SAMPLE_RATE_16K)
 
-    def process_audio(self, path: str, idx0: int):
+    def process_audio(self, path: str, idx0: int, cut_preprocess: bool):
         try:
             audio = load_audio(path, self.sr)
             audio = torch.tensor(
@@ -82,34 +82,39 @@ def process_audio(self, path: str, idx0: int):
             ).float()
 
             idx1 = 0
-            for audio_segment in self.slicer.slice(audio.cpu().numpy()):
-                audio_segment = torch.tensor(audio_segment, device=self.device).float()
-                i = 0
-                while True:
-                    start = int(self.sr * (self.per - OVERLAP) * i)
-                    i += 1
-                    if len(audio_segment[start:]) > (self.per + OVERLAP) * self.sr:
-                        tmp_audio = audio_segment[
-                            start : start + int(self.per * self.sr)
-                        ]
-                        self.process_audio_segment(tmp_audio, idx0, idx1)
-                        idx1 += 1
-                    else:
-                        tmp_audio = audio_segment[start:]
-                        self.process_audio_segment(tmp_audio, idx0, idx1)
-                        idx1 += 1
-                        break
+            if cut_preprocess:
+                for audio_segment in self.slicer.slice(audio.cpu().numpy()):
+                    audio_segment = torch.tensor(
+                        audio_segment, device=self.device
+                    ).float()
+                    i = 0
+                    while True:
+                        start = int(self.sr * (self.per - OVERLAP) * i)
+                        i += 1
+                        if len(audio_segment[start:]) > (self.per + OVERLAP) * self.sr:
+                            tmp_audio = audio_segment[
+                                start : start + int(self.per * self.sr)
+                            ]
+                            self.process_audio_segment(tmp_audio, idx0, idx1)
+                            idx1 += 1
+                        else:
+                            tmp_audio = audio_segment[start:]
+                            self.process_audio_segment(tmp_audio, idx0, idx1)
+                            idx1 += 1
+                            break
+            else:
+                self.process_audio_segment(audio, idx0, idx1)
         except Exception as error:
             print(f"An error occurred on {path} path: {error}")
 
-    def process_audio_file(self, file_path_idx):
+    def process_audio_file(self, file_path_idx, cut_preprocess):
         file_path, idx0 = file_path_idx
         ext = os.path.splitext(file_path)[1].lower()
         if ext not in [".wav"]:
             audio = AudioSegment.from_file(file_path)
             file_path = os.path.join("/tmp", f"{idx0}.wav")
             audio.export(file_path, format="wav")
-        self.process_audio(file_path, idx0)
+        self.process_audio(file_path, idx0, cut_preprocess)
 
 
 def preprocess_training_set(
@@ -118,6 +123,7 @@ def preprocess_training_set(
     num_processes: int,
     exp_dir: str,
     per: float,
+    cut_preprocess: bool,
 ):
     start_time = time.time()
 
@@ -132,7 +138,7 @@ def preprocess_training_set(
 
     ctx = multiprocessing.get_context("spawn")
     with ctx.Pool(processes=num_processes) as pool:
-        pool.map(pp.process_audio_file, files)
+        pool.starmap(pp.process_audio_file, [(file, cut_preprocess) for file in files])
 
     elapsed_time = time.time() - start_time
     print(f"Preprocess completed in {elapsed_time:.2f} seconds.")
@@ -146,6 +152,7 @@ def preprocess_training_set(
     num_processes = (
         int(sys.argv[5]) if len(sys.argv) > 5 else multiprocessing.cpu_count()
     )
+    cut_preprocess = bool(sys.argv[6]) if len(sys.argv) > 6 else True
 
     preprocess_training_set(
         input_root,
@@ -153,4 +160,5 @@ def preprocess_training_set(
         num_processes,
         experiment_directory,
         percentage,
+        cut_preprocess,
     )
diff --git a/tabs/train/train.py b/tabs/train/train.py
index 5293baa1..c1fae348 100644
--- a/tabs/train/train.py
+++ b/tabs/train/train.py
@@ -362,7 +362,16 @@ def train_tab():
                     ),
                     interactive=True,
                 )
-
+        with gr.Accordion(i18n("advanced settings"), open=False):
+            cut_preprocess = gr.Checkbox(
+                label=i18n("Cut the audio files"),
+                info=i18n(
+                    "Leave RVC's standard audio processing, where it cuts the files."
+                ),
+                value=True,
+                interactive=True,
+                visible=True,
+            )
         preprocess_output_info = gr.Textbox(
             label=i18n("Output Information"),
             info=i18n("The output information will be displayed here."),
@@ -380,6 +389,7 @@ def train_tab():
                     dataset_path,
                     sampling_rate,
                     cpu_cores_preprocess,
+                    cut_preprocess,
                 ],
                 outputs=[preprocess_output_info],
                 api_name="preprocess_dataset",

From 466564bdd01df3338ab465991a34ad8b72da8146 Mon Sep 17 00:00:00 2001
From: Guilherme <86894155+ShiromiyaG@users.noreply.github.com>
Date: Thu, 15 Aug 2024 19:08:43 -0300
Subject: [PATCH 2/4] Fix

---
 rvc/train/preprocess/preprocess.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/rvc/train/preprocess/preprocess.py b/rvc/train/preprocess/preprocess.py
index c07ee9f8..08a94b68 100644
--- a/rvc/train/preprocess/preprocess.py
+++ b/rvc/train/preprocess/preprocess.py
@@ -9,6 +9,7 @@
 import numpy as np
 import multiprocessing
 from pydub import AudioSegment
+from distutils.util import strtobool
 
 multiprocessing.set_start_method("spawn", force=True)
 
@@ -152,7 +153,7 @@ def preprocess_training_set(
     num_processes = (
         int(sys.argv[5]) if len(sys.argv) > 5 else multiprocessing.cpu_count()
     )
-    cut_preprocess = bool(sys.argv[6]) if len(sys.argv) > 6 else True
+    cut_preprocess = strtobool(sys.argv[6])
 
     preprocess_training_set(
         input_root,

From 63623628efe6177e68c91fb6ce8166e53c028f7f Mon Sep 17 00:00:00 2001
From: Guilherme <86894155+ShiromiyaG@users.noreply.github.com>
Date: Fri, 16 Aug 2024 09:31:31 -0300
Subject: [PATCH 3/4] Fix

---
 core.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core.py b/core.py
index 2e3dc5ca..8ae0f6c2 100644
--- a/core.py
+++ b/core.py
@@ -1002,6 +1002,7 @@ def parse_arguments():
         choices=[True, False],
         help="Cut the dataset into smaller segments for faster preprocessing.",
         default=True,
+        required=False,
     )
 
     # Parser for 'extract' mode

From c870706c7cd81866c0a0cfc3a635e582fd9cacd4 Mon Sep 17 00:00:00 2001
From: Pascal Aznar <blaise@applio.org>
Date: Fri, 16 Aug 2024 16:22:57 +0200
Subject: [PATCH 4/4] Minor changes

---
 assets/i18n/languages/en_US.json |  2 ++
 tabs/train/train.py              | 19 +++++++++----------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/assets/i18n/languages/en_US.json b/assets/i18n/languages/en_US.json
index 2245f545..5e4b059b 100644
--- a/assets/i18n/languages/en_US.json
+++ b/assets/i18n/languages/en_US.json
@@ -17,6 +17,8 @@
   "Settings": "Settings",
 
   "Preprocess": "Preprocess",
+  "Audio cutting": "Audio cutting",
+  "It's recommended to deactivate this option if your dataset has already been processed.": "It's recommended to deactivate this option if your dataset has already been processed.",
   "Model Name": "Model Name",
   "Name of the new model.": "Name of the new model.",
   "Enter model name": "Enter model name",
diff --git a/tabs/train/train.py b/tabs/train/train.py
index c1fae348..0d8f894e 100644
--- a/tabs/train/train.py
+++ b/tabs/train/train.py
@@ -362,16 +362,15 @@ def train_tab():
                     ),
                     interactive=True,
                 )
-        with gr.Accordion(i18n("advanced settings"), open=False):
-            cut_preprocess = gr.Checkbox(
-                label=i18n("Cut the audio files"),
-                info=i18n(
-                    "Leave RVC's standard audio processing, where it cuts the files."
-                ),
-                value=True,
-                interactive=True,
-                visible=True,
-            )
+                cut_preprocess = gr.Checkbox(
+                    label=i18n("Audio cutting"),
+                    info=i18n(
+                        "It's recommended to deactivate this option if your dataset has already been processed."
+                    ),
+                    value=True,
+                    interactive=True,
+                    visible=True,
+                )
         preprocess_output_info = gr.Textbox(
             label=i18n("Output Information"),
             info=i18n("The output information will be displayed here."),