From a97d630353fb9a74d8c7ffdcd5e67f062026d04a Mon Sep 17 00:00:00 2001 From: Guilherme <86894155+ShiromiyaG@users.noreply.github.com> Date: Thu, 15 Aug 2024 10:56:56 -0300 Subject: [PATCH 1/4] Add cut audios option --- core.py | 15 ++++++++- rvc/train/preprocess/preprocess.py | 50 +++++++++++++++++------------- tabs/train/train.py | 12 ++++++- 3 files changed, 54 insertions(+), 23 deletions(-) diff --git a/core.py b/core.py index a3d58c54..2e3dc5ca 100644 --- a/core.py +++ b/core.py @@ -243,7 +243,11 @@ def run_tts_script( # Preprocess def run_preprocess_script( - model_name: str, dataset_path: str, sample_rate: int, cpu_cores: int + model_name: str, + dataset_path: str, + sample_rate: int, + cpu_cores: int, + cut_preprocess: bool, ): config = get_config() per = 3.0 if config.is_half else 3.7 @@ -259,6 +263,7 @@ def run_preprocess_script( sample_rate, per, cpu_cores, + cut_preprocess, ], ), ] @@ -991,6 +996,13 @@ def parse_arguments(): help="Number of CPU cores to use for preprocessing.", choices=range(1, 65), ) + preprocess_parser.add_argument( + "--cut_preprocess", + type=lambda x: bool(strtobool(x)), + choices=[True, False], + help="Cut the dataset into smaller segments for faster preprocessing.", + default=True, + ) # Parser for 'extract' mode extract_parser = subparsers.add_parser( @@ -1449,6 +1461,7 @@ def main(): dataset_path=args.dataset_path, sample_rate=args.sample_rate, cpu_cores=args.cpu_cores, + cut_preprocess=args.cut_preprocess, ) elif args.mode == "extract": run_extract_script( diff --git a/rvc/train/preprocess/preprocess.py b/rvc/train/preprocess/preprocess.py index ad5a0631..c07ee9f8 100644 --- a/rvc/train/preprocess/preprocess.py +++ b/rvc/train/preprocess/preprocess.py @@ -74,7 +74,7 @@ def process_audio_segment(self, audio_segment: torch.Tensor, idx0: int, idx1: in wav_16k_path = os.path.join(self.wavs16k_dir, f"{idx0}_{idx1}.wav") self._write_audio(audio_16k, wav_16k_path, SAMPLE_RATE_16K) - def process_audio(self, path: str, idx0: int): + def process_audio(self, path: str, idx0: int, cut_preprocess: bool): try: audio = load_audio(path, self.sr) audio = torch.tensor( @@ -82,34 +82,39 @@ def process_audio(self, path: str, idx0: int): ).float() idx1 = 0 - for audio_segment in self.slicer.slice(audio.cpu().numpy()): - audio_segment = torch.tensor(audio_segment, device=self.device).float() - i = 0 - while True: - start = int(self.sr * (self.per - OVERLAP) * i) - i += 1 - if len(audio_segment[start:]) > (self.per + OVERLAP) * self.sr: - tmp_audio = audio_segment[ - start : start + int(self.per * self.sr) - ] - self.process_audio_segment(tmp_audio, idx0, idx1) - idx1 += 1 - else: - tmp_audio = audio_segment[start:] - self.process_audio_segment(tmp_audio, idx0, idx1) - idx1 += 1 - break + if cut_preprocess: + for audio_segment in self.slicer.slice(audio.cpu().numpy()): + audio_segment = torch.tensor( + audio_segment, device=self.device + ).float() + i = 0 + while True: + start = int(self.sr * (self.per - OVERLAP) * i) + i += 1 + if len(audio_segment[start:]) > (self.per + OVERLAP) * self.sr: + tmp_audio = audio_segment[ + start : start + int(self.per * self.sr) + ] + self.process_audio_segment(tmp_audio, idx0, idx1) + idx1 += 1 + else: + tmp_audio = audio_segment[start:] + self.process_audio_segment(tmp_audio, idx0, idx1) + idx1 += 1 + break + else: + self.process_audio_segment(audio, idx0, idx1) except Exception as error: print(f"An error occurred on {path} path: {error}") - def process_audio_file(self, file_path_idx): + def process_audio_file(self, file_path_idx, cut_preprocess): file_path, idx0 = file_path_idx ext = os.path.splitext(file_path)[1].lower() if ext not in [".wav"]: audio = AudioSegment.from_file(file_path) file_path = os.path.join("/tmp", f"{idx0}.wav") audio.export(file_path, format="wav") - self.process_audio(file_path, idx0) + self.process_audio(file_path, idx0, cut_preprocess) def preprocess_training_set( @@ -118,6 +123,7 @@ def preprocess_training_set( num_processes: int, exp_dir: str, per: float, + cut_preprocess: bool, ): start_time = time.time() @@ -132,7 +138,7 @@ def preprocess_training_set( ctx = multiprocessing.get_context("spawn") with ctx.Pool(processes=num_processes) as pool: - pool.map(pp.process_audio_file, files) + pool.starmap(pp.process_audio_file, [(file, cut_preprocess) for file in files]) elapsed_time = time.time() - start_time print(f"Preprocess completed in {elapsed_time:.2f} seconds.") @@ -146,6 +152,7 @@ def preprocess_training_set( num_processes = ( int(sys.argv[5]) if len(sys.argv) > 5 else multiprocessing.cpu_count() ) + cut_preprocess = bool(sys.argv[6]) if len(sys.argv) > 6 else True preprocess_training_set( input_root, @@ -153,4 +160,5 @@ def preprocess_training_set( num_processes, experiment_directory, percentage, + cut_preprocess, ) diff --git a/tabs/train/train.py b/tabs/train/train.py index 5293baa1..c1fae348 100644 --- a/tabs/train/train.py +++ b/tabs/train/train.py @@ -362,7 +362,16 @@ def train_tab(): ), interactive=True, ) - + with gr.Accordion(i18n("advanced settings"), open=False): + cut_preprocess = gr.Checkbox( + label=i18n("Cut the audio files"), + info=i18n( + "Leave RVC's standard audio processing, where it cuts the files." + ), + value=True, + interactive=True, + visible=True, + ) preprocess_output_info = gr.Textbox( label=i18n("Output Information"), info=i18n("The output information will be displayed here."), @@ -380,6 +389,7 @@ def train_tab(): dataset_path, sampling_rate, cpu_cores_preprocess, + cut_preprocess, ], outputs=[preprocess_output_info], api_name="preprocess_dataset", From 466564bdd01df3338ab465991a34ad8b72da8146 Mon Sep 17 00:00:00 2001 From: Guilherme <86894155+ShiromiyaG@users.noreply.github.com> Date: Thu, 15 Aug 2024 19:08:43 -0300 Subject: [PATCH 2/4] Fix --- rvc/train/preprocess/preprocess.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rvc/train/preprocess/preprocess.py b/rvc/train/preprocess/preprocess.py index c07ee9f8..08a94b68 100644 --- a/rvc/train/preprocess/preprocess.py +++ b/rvc/train/preprocess/preprocess.py @@ -9,6 +9,7 @@ import numpy as np import multiprocessing from pydub import AudioSegment +from distutils.util import strtobool multiprocessing.set_start_method("spawn", force=True) @@ -152,7 +153,7 @@ def preprocess_training_set( num_processes = ( int(sys.argv[5]) if len(sys.argv) > 5 else multiprocessing.cpu_count() ) - cut_preprocess = bool(sys.argv[6]) if len(sys.argv) > 6 else True + cut_preprocess = strtobool(sys.argv[6]) preprocess_training_set( input_root, From 63623628efe6177e68c91fb6ce8166e53c028f7f Mon Sep 17 00:00:00 2001 From: Guilherme <86894155+ShiromiyaG@users.noreply.github.com> Date: Fri, 16 Aug 2024 09:31:31 -0300 Subject: [PATCH 3/4] Fix --- core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/core.py b/core.py index 2e3dc5ca..8ae0f6c2 100644 --- a/core.py +++ b/core.py @@ -1002,6 +1002,7 @@ def parse_arguments(): choices=[True, False], help="Cut the dataset into smaller segments for faster preprocessing.", default=True, + required=False, ) # Parser for 'extract' mode From c870706c7cd81866c0a0cfc3a635e582fd9cacd4 Mon Sep 17 00:00:00 2001 From: Pascal Aznar Date: Fri, 16 Aug 2024 16:22:57 +0200 Subject: [PATCH 4/4] Minor changes --- assets/i18n/languages/en_US.json | 2 ++ tabs/train/train.py | 19 +++++++++---------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/assets/i18n/languages/en_US.json b/assets/i18n/languages/en_US.json index 2245f545..5e4b059b 100644 --- a/assets/i18n/languages/en_US.json +++ b/assets/i18n/languages/en_US.json @@ -17,6 +17,8 @@ "Settings": "Settings", "Preprocess": "Preprocess", + "Audio cutting": "Audio cutting", + "It's recommended to deactivate this option if your dataset has already been processed.": "It's recommended to deactivate this option if your dataset has already been processed.", "Model Name": "Model Name", "Name of the new model.": "Name of the new model.", "Enter model name": "Enter model name", diff --git a/tabs/train/train.py b/tabs/train/train.py index c1fae348..0d8f894e 100644 --- a/tabs/train/train.py +++ b/tabs/train/train.py @@ -362,16 +362,15 @@ def train_tab(): ), interactive=True, ) - with gr.Accordion(i18n("advanced settings"), open=False): - cut_preprocess = gr.Checkbox( - label=i18n("Cut the audio files"), - info=i18n( - "Leave RVC's standard audio processing, where it cuts the files." - ), - value=True, - interactive=True, - visible=True, - ) + cut_preprocess = gr.Checkbox( + label=i18n("Audio cutting"), + info=i18n( + "It's recommended to deactivate this option if your dataset has already been processed." + ), + value=True, + interactive=True, + visible=True, + ) preprocess_output_info = gr.Textbox( label=i18n("Output Information"), info=i18n("The output information will be displayed here."),