Skip to content

Commit

Permalink
minor changes on preprocess
Browse files Browse the repository at this point in the history
  • Loading branch information
blaisewf committed Aug 17, 2024
1 parent d85e980 commit cd5597f
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 54 deletions.
26 changes: 2 additions & 24 deletions assets/i18n/languages/en_US.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"This section contains some extra utilities that often may be in experimental phases.": "This section contains some extra utilities that often may be in experimental phases.",
"Output Information": "Output Information",
"The output information will be displayed here.": "The output information will be displayed here.",

"Inference": "Inference",
"Train": "Train",
"Extra": "Extra",
Expand All @@ -15,12 +14,10 @@
"Download": "Download",
"Report a Bug": "Report a Bug",
"Settings": "Settings",

"Preprocess": "Preprocess",
"Audio cutting": "Audio cutting",
"It's recommended to deactivate this option if your dataset has already been processed.": "It's recommended to deactivate this option if your dataset has already been processed.",
"No Filters": "No Filters",
"Disables all preprocessing filters.": "Disables all preprocessing filters.",
"Process effects": "Process effects",
"Model Name": "Model Name",
"Name of the new model.": "Name of the new model.",
"Enter model name": "Enter model name",
Expand All @@ -39,7 +36,6 @@
"RVC Version": "RVC Version",
"The RVC version of the model.": "The RVC version of the model.",
"Preprocess Dataset": "Preprocess Dataset",

"Embedder Model": "Embedder Model",
"Model used for learning speaker embedding.": "Model used for learning speaker embedding.",
"Extract": "Extract",
Expand Down Expand Up @@ -89,26 +85,22 @@
"Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "Set the maximum number of epochs you want your model to stop training if no improvement is detected.",
"Sync Graph": "Sync Graph",
"Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model.": "Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model.",

"Start Training": "Start Training",
"Stop Training": "Stop Training",
"Generate Index": "Generate Index",

"Export Model": "Export Model",
"The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.": "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.",
"Exported Pth file": "Exported Pth file",
"Exported Index file": "Exported Index file",
"Select the pth file to be exported": "Select the pth file to be exported",
"Select the index file to be exported": "Select the index file to be exported",
"Upload": "Upload",

"Voice Model": "Voice Model",
"Select the voice model to use for the conversion.": "Select the voice model to use for the conversion.",
"Index File": "Index File",
"Select the index file to use for the conversion.": "Select the index file to use for the conversion.",
"Refresh": "Refresh",
"Unload Voice": "Unload Voice",

"Single": "Single",
"Upload Audio": "Upload Audio",
"Select Audio": "Select Audio",
Expand Down Expand Up @@ -147,20 +139,16 @@
"Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.",
"Pitch extraction algorithm": "Pitch extraction algorithm",
"Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.",

"Convert": "Convert",
"Export Audio": "Export Audio",

"Batch": "Batch",
"Input Folder": "Input Folder",
"Select the folder containing the audios to convert.": "Select the folder containing the audios to convert.",
"Enter input path": "Enter input path",
"Output Folder": "Output Folder",
"Select the folder where the output audios will be saved.": "Select the folder where the output audios will be saved.",
"Enter output path": "Enter output path",

"Get information about the audio": "Get information about the audio",

"## Voice Blender": "## Voice Blender",
"Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.",
"Voice Blender": "Voice Blender",
Expand All @@ -169,7 +157,6 @@
"Blend Ratio": "Blend Ratio",
"Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Adjusting the position more towards one side or the other will make the model more similar to the first or second.",
"Fusion": "Fusion",

"Path to Model": "Path to Model",
"Enter path to model": "Enter path to model",
"Model information to be placed": "Model information to be placed",
Expand All @@ -182,23 +169,18 @@
"Model conversion": "Model conversion",
"Pth file": "Pth file",
"Output of the pth file": "Output of the pth file",

"Extract F0 Curve": "Extract F0 Curve",
"The f0 curve represents the variations in the base frequency of a voice over time, showing how pitch rises and falls.": "The f0 curve represents the variations in the base frequency of a voice over time, showing how pitch rises and falls.",

"# How to Report an Issue on GitHub": "# How to Report an Issue on GitHub",
"1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.",
"2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).",
"3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.",
"4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.",

"Record Screen": "Record Screen",
"Record": "Record",
"Stop Recording": "Stop Recording",

"Introduce the model .pth path": "Introduce the model .pth path",
"See Model Information": "See Model Information",

"## Download Model": "## Download Model",
"Model Link": "Model Link",
"Introduce the model link": "Introduce the model link",
Expand All @@ -212,7 +194,6 @@
"## Download Pretrained Models": "## Download Pretrained Models",
"Select the pretrained model you want to download.": "Select the pretrained model you want to download.",
"And select the sampling rate": "And select the sampling rate.",

"TTS Voices": "TTS Voices",
"TTS Speed": "TTS Speed",
"Increase or decrease TTS speed.": "Increase or decrease TTS speed.",
Expand All @@ -223,7 +204,6 @@
"Enter text to synthesize": "Enter text to synthesize",
"Output Path for TTS Audio": "Output Path for TTS Audio",
"Output Path for RVC Audio": "Output Path for RVC Audio",

"Enable Applio integration with Discord presence": "Enable Applio integration with Discord presence",
"It will activate the possibility of displaying the current Applio activity in Discord.": "It will activate the possibility of displaying the current Applio activity in Discord.",
"Enable Applio integration with applio.org/models using flask": "Enable Applio integration with applio.org/models using flask",
Expand All @@ -238,11 +218,9 @@
"Precision": "Precision",
"Select the precision you want to use for training and inference.": "Select the precision you want to use for training and inference.",
"Update precision": "Update precision",

"Plugin Installer": "Plugin Installer",
"Drag your plugin.zip to install it": "Drag your plugin.zip to install it",

"Version Checker": "Version Checker",
"Check which version of Applio is the latest to see if you need to update.": "Check which version of Applio is the latest to see if you need to update.",
"Check for updates": "Check for updates"
}
}
8 changes: 4 additions & 4 deletions core.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def run_preprocess_script(
sample_rate: int,
cpu_cores: int,
cut_preprocess: bool,
no_filters: bool,
process_effects: bool,
):
config = get_config()
per = 3.0 if config.is_half else 3.7
Expand All @@ -277,7 +277,7 @@ def run_preprocess_script(
per,
cpu_cores,
cut_preprocess,
no_filters,
process_effects,
],
),
]
Expand Down Expand Up @@ -1051,7 +1051,7 @@ def parse_arguments():
required=False,
)
preprocess_parser.add_argument(
"--no_filters",
"--process_effects",
type=lambda x: bool(strtobool(x)),
choices=[True, False],
help="Disable all filters during preprocessing.",
Expand Down Expand Up @@ -1525,7 +1525,7 @@ def main():
sample_rate=args.sample_rate,
cpu_cores=args.cpu_cores,
cut_preprocess=args.cut_preprocess,
no_filters=args.no_filters,
process_effects=args.process_effects,
)
elif args.mode == "extract":
run_extract_script(
Expand Down
31 changes: 16 additions & 15 deletions rvc/train/preprocess/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ def process_audio_segment(
audio_segment: torch.Tensor,
idx0: int,
idx1: int,
no_filters: bool,
process_effects: bool,
):
if no_filters:
if process_effects == False:
normalized_audio = audio_segment
else:
normalized_audio = self._normalize_audio(audio_segment)
Expand All @@ -92,11 +92,11 @@ def process_audio_segment(
self._write_audio(audio_16k, wav_16k_path, SAMPLE_RATE_16K)

def process_audio(
self, path: str, idx0: int, cut_preprocess: bool, no_filters: bool
self, path: str, idx0: int, cut_preprocess: bool, process_effects: bool
):
try:
audio = load_audio(path, self.sr)
if no_filters:
if process_effects == False:
audio = torch.tensor(audio, device=self.device).float()
else:
audio = torch.tensor(
Expand All @@ -117,34 +117,34 @@ def process_audio(
start : start + int(self.per * self.sr)
]
self.process_audio_segment(
tmp_audio, idx0, idx1, no_filters
tmp_audio, idx0, idx1, process_effects
)
idx1 += 1
else:
tmp_audio = audio_segment[start:]
self.process_audio_segment(
tmp_audio, idx0, idx1, no_filters
tmp_audio, idx0, idx1, process_effects
)
idx1 += 1
break
else:
self.process_audio_segment(audio, idx0, idx1, no_filters)
self.process_audio_segment(audio, idx0, idx1, process_effects)
except Exception as error:
print(f"An error occurred on {path} path: {error}")

def process_audio_file(self, file_path_idx, cut_preprocess, no_filters):
def process_audio_file(self, file_path_idx, cut_preprocess, process_effects):
file_path, idx0 = file_path_idx
ext = os.path.splitext(file_path)[1].lower()
if ext not in [".wav"]:
audio = AudioSegment.from_file(file_path)
file_path = os.path.join("/tmp", f"{idx0}.wav")
audio.export(file_path, format="wav")
self.process_audio(file_path, idx0, cut_preprocess, no_filters)
self.process_audio(file_path, idx0, cut_preprocess, process_effects)


def process_file(args):
pp, file, cut_preprocess, no_filters = args
pp.process_audio_file(file, cut_preprocess, no_filters)
pp, file, cut_preprocess, process_effects = args
pp.process_audio_file(file, cut_preprocess, process_effects)


def preprocess_training_set(
Expand All @@ -154,7 +154,7 @@ def preprocess_training_set(
exp_dir: str,
per: float,
cut_preprocess: bool,
no_filters: bool,
process_effects: bool,
):
start_time = time.time()

Expand All @@ -170,7 +170,8 @@ def preprocess_training_set(
ctx = multiprocessing.get_context("spawn")
with ctx.Pool(processes=num_processes) as pool:
pool.map(
process_file, [(pp, file, cut_preprocess, no_filters) for file in files]
process_file,
[(pp, file, cut_preprocess, process_effects) for file in files],
)

elapsed_time = time.time() - start_time
Expand All @@ -186,7 +187,7 @@ def preprocess_training_set(
int(sys.argv[5]) if len(sys.argv) > 5 else multiprocessing.cpu_count()
)
cut_preprocess = strtobool(sys.argv[6])
no_filters = strtobool(sys.argv[7])
process_effects = strtobool(sys.argv[7])

preprocess_training_set(
input_root,
Expand All @@ -195,5 +196,5 @@ def preprocess_training_set(
experiment_directory,
percentage,
cut_preprocess,
no_filters,
process_effects,
)
26 changes: 15 additions & 11 deletions tabs/train/train.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
import os
from multiprocessing import cpu_count
import sys
import shutil
import sys
from multiprocessing import cpu_count

import gradio as gr

from assets.i18n.i18n import I18nAuto
from core import (
run_preprocess_script,
run_extract_script,
run_train_script,
run_index_script,
run_preprocess_script,
run_prerequisites_script,
run_train_script,
)
from rvc.configs.config import max_vram_gpu, get_gpu_info, get_number_of_gpus
from rvc.configs.config import get_gpu_info, get_number_of_gpus, max_vram_gpu
from rvc.lib.utils import format_title
from tabs.settings.restart import stop_train

Expand Down Expand Up @@ -370,11 +372,13 @@ def train_tab():
value=True,
interactive=True,
visible=True,
)
no_filters = gr.Checkbox(
label=i18n("No Filters"),
info=i18n("Disables all preprocessing filters."),
value=False,
)
process_effects = gr.Checkbox(
label=i18n("Process effects"),
info=i18n(
"It's recommended to deactivate this option if your dataset has already been processed."
),
value=True,
interactive=True,
visible=True,
)
Expand All @@ -396,7 +400,7 @@ def train_tab():
sampling_rate,
cpu_cores_preprocess,
cut_preprocess,
no_filters,
process_effects,
],
outputs=[preprocess_output_info],
api_name="preprocess_dataset",
Expand Down

0 comments on commit cd5597f

Please sign in to comment.