Skip to content

Commit

Permalink
Merge pull request #598 from ShiromiyaG/new-functions
Browse files Browse the repository at this point in the history
Option to cut or not cut the audios in preprocess
  • Loading branch information
blaisewf authored Aug 16, 2024
2 parents 642c6da + c870706 commit 29c5aac
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 23 deletions.
2 changes: 2 additions & 0 deletions assets/i18n/languages/en_US.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
"Settings": "Settings",

"Preprocess": "Preprocess",
"Audio cutting": "Audio cutting",
"It's recommended to deactivate this option if your dataset has already been processed.": "It's recommended to deactivate this option if your dataset has already been processed.",
"Model Name": "Model Name",
"Name of the new model.": "Name of the new model.",
"Enter model name": "Enter model name",
Expand Down
16 changes: 15 additions & 1 deletion core.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,11 @@ def run_tts_script(

# Preprocess
def run_preprocess_script(
model_name: str, dataset_path: str, sample_rate: int, cpu_cores: int
model_name: str,
dataset_path: str,
sample_rate: int,
cpu_cores: int,
cut_preprocess: bool,
):
config = get_config()
per = 3.0 if config.is_half else 3.7
Expand All @@ -259,6 +263,7 @@ def run_preprocess_script(
sample_rate,
per,
cpu_cores,
cut_preprocess,
],
),
]
Expand Down Expand Up @@ -976,6 +981,14 @@ def parse_arguments():
help="Number of CPU cores to use for preprocessing.",
choices=range(1, 65),
)
preprocess_parser.add_argument(
"--cut_preprocess",
type=lambda x: bool(strtobool(x)),
choices=[True, False],
help="Cut the dataset into smaller segments for faster preprocessing.",
default=True,
required=False,
)

# Parser for 'extract' mode
extract_parser = subparsers.add_parser(
Expand Down Expand Up @@ -1442,6 +1455,7 @@ def main():
dataset_path=args.dataset_path,
sample_rate=args.sample_rate,
cpu_cores=args.cpu_cores,
cut_preprocess=args.cut_preprocess,
)
elif args.mode == "extract":
run_extract_script(
Expand Down
51 changes: 30 additions & 21 deletions rvc/train/preprocess/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
import multiprocessing
from pydub import AudioSegment
from distutils.util import strtobool

multiprocessing.set_start_method("spawn", force=True)

Expand Down Expand Up @@ -81,42 +82,47 @@ def process_audio_segment(self, audio_segment: torch.Tensor, idx0: int, idx1: in
wav_16k_path = os.path.join(self.wavs16k_dir, f"{idx0}_{idx1}.wav")
self._write_audio(audio_16k, wav_16k_path, SAMPLE_RATE_16K)

def process_audio(self, path: str, idx0: int):
def process_audio(self, path: str, idx0: int, cut_preprocess: bool):
try:
audio = load_audio(path, self.sr)
audio = torch.tensor(
signal.lfilter(self.b_high, self.a_high, audio), device=self.device
).float()

idx1 = 0
for audio_segment in self.slicer.slice(audio.cpu().numpy()):
audio_segment = torch.tensor(audio_segment, device=self.device).float()
i = 0
while True:
start = int(self.sr * (self.per - OVERLAP) * i)
i += 1
if len(audio_segment[start:]) > (self.per + OVERLAP) * self.sr:
tmp_audio = audio_segment[
start : start + int(self.per * self.sr)
]
self.process_audio_segment(tmp_audio, idx0, idx1)
idx1 += 1
else:
tmp_audio = audio_segment[start:]
self.process_audio_segment(tmp_audio, idx0, idx1)
idx1 += 1
break
if cut_preprocess:
for audio_segment in self.slicer.slice(audio.cpu().numpy()):
audio_segment = torch.tensor(
audio_segment, device=self.device
).float()
i = 0
while True:
start = int(self.sr * (self.per - OVERLAP) * i)
i += 1
if len(audio_segment[start:]) > (self.per + OVERLAP) * self.sr:
tmp_audio = audio_segment[
start : start + int(self.per * self.sr)
]
self.process_audio_segment(tmp_audio, idx0, idx1)
idx1 += 1
else:
tmp_audio = audio_segment[start:]
self.process_audio_segment(tmp_audio, idx0, idx1)
idx1 += 1
break
else:
self.process_audio_segment(audio, idx0, idx1)
except Exception as error:
print(f"An error occurred on {path} path: {error}")

def process_audio_file(self, file_path_idx):
def process_audio_file(self, file_path_idx, cut_preprocess):
file_path, idx0 = file_path_idx
ext = os.path.splitext(file_path)[1].lower()
if ext not in [".wav"]:
audio = AudioSegment.from_file(file_path)
file_path = os.path.join("/tmp", f"{idx0}.wav")
audio.export(file_path, format="wav")
self.process_audio(file_path, idx0)
self.process_audio(file_path, idx0, cut_preprocess)


def preprocess_training_set(
Expand All @@ -125,6 +131,7 @@ def preprocess_training_set(
num_processes: int,
exp_dir: str,
per: float,
cut_preprocess: bool,
):
start_time = time.time()

Expand All @@ -139,7 +146,7 @@ def preprocess_training_set(

ctx = multiprocessing.get_context("spawn")
with ctx.Pool(processes=num_processes) as pool:
pool.map(pp.process_audio_file, files)
pool.starmap(pp.process_audio_file, [(file, cut_preprocess) for file in files])

elapsed_time = time.time() - start_time
print(f"Preprocess completed in {elapsed_time:.2f} seconds.")
Expand All @@ -153,11 +160,13 @@ def preprocess_training_set(
num_processes = (
int(sys.argv[5]) if len(sys.argv) > 5 else multiprocessing.cpu_count()
)
cut_preprocess = strtobool(sys.argv[6])

preprocess_training_set(
input_root,
sample_rate,
num_processes,
experiment_directory,
percentage,
cut_preprocess,
)
11 changes: 10 additions & 1 deletion tabs/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,15 @@ def train_tab():
),
interactive=True,
)

cut_preprocess = gr.Checkbox(
label=i18n("Audio cutting"),
info=i18n(
"It's recommended to deactivate this option if your dataset has already been processed."
),
value=True,
interactive=True,
visible=True,
)
preprocess_output_info = gr.Textbox(
label=i18n("Output Information"),
info=i18n("The output information will be displayed here."),
Expand All @@ -380,6 +388,7 @@ def train_tab():
dataset_path,
sampling_rate,
cpu_cores_preprocess,
cut_preprocess,
],
outputs=[preprocess_output_info],
api_name="preprocess_dataset",
Expand Down

0 comments on commit 29c5aac

Please sign in to comment.