Skip to content

Commit

Permalink
Merge pull request #892 from IAHispano/exp/dev
Browse files Browse the repository at this point in the history
dev to main
  • Loading branch information
blaisewf authored Dec 1, 2024
2 parents 2a35f50 + 4d689d2 commit de5b3d8
Show file tree
Hide file tree
Showing 22 changed files with 1,087 additions and 1,289 deletions.
1,631 changes: 815 additions & 816 deletions assets/Applio_NoUI.ipynb

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions assets/discord_presence.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from pypresence import Presence
import datetime as dt
import time


class RichPresenceManager:
def __init__(self):
Expand Down Expand Up @@ -32,7 +30,7 @@ def update_presence(self):
details="Open ecosystem for voice cloning",
buttons=[
{"label": "Home", "url": "https://applio.org"},
{"label": "Download", "url": "https://applio.org/download"},
{"label": "Download", "url": "https://applio.org/products/applio"},
],
large_image="logo",
large_text="Experimenting with applio",
Expand Down
70 changes: 0 additions & 70 deletions assets/zluda/README.md

This file was deleted.

37 changes: 3 additions & 34 deletions core.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def load_voices_data():


voices_data = load_voices_data()
locales = list({voice["Locale"] for voice in voices_data})
locales = list({voice["ShortName"] for voice in voices_data})


@lru_cache(maxsize=None)
Expand Down Expand Up @@ -69,7 +69,6 @@ def run_infer_script(
clean_audio: bool,
clean_strength: float,
export_format: str,
upscale_audio: bool,
f0_file: str,
embedder_model: str,
embedder_model_custom: str = None,
Expand Down Expand Up @@ -134,7 +133,6 @@ def run_infer_script(
"clean_audio": clean_audio,
"clean_strength": clean_strength,
"export_format": export_format,
"upscale_audio": upscale_audio,
"f0_file": f0_file,
"embedder_model": embedder_model,
"embedder_model_custom": embedder_model_custom,
Expand Down Expand Up @@ -207,7 +205,6 @@ def run_batch_infer_script(
clean_audio: bool,
clean_strength: float,
export_format: str,
upscale_audio: bool,
f0_file: str,
embedder_model: str,
embedder_model_custom: str = None,
Expand Down Expand Up @@ -272,7 +269,6 @@ def run_batch_infer_script(
"clean_audio": clean_audio,
"clean_strength": clean_strength,
"export_format": export_format,
"upscale_audio": upscale_audio,
"f0_file": f0_file,
"embedder_model": embedder_model,
"embedder_model_custom": embedder_model_custom,
Expand Down Expand Up @@ -348,7 +344,6 @@ def run_tts_script(
clean_audio: bool,
clean_strength: float,
export_format: str,
upscale_audio: bool,
f0_file: str,
embedder_model: str,
embedder_model_custom: str = None,
Expand Down Expand Up @@ -394,7 +389,6 @@ def run_tts_script(
clean_audio=clean_audio,
clean_strength=clean_strength,
export_format=export_format,
upscale_audio=upscale_audio,
f0_file=f0_file,
embedder_model=embedder_model,
embedder_model_custom=embedder_model_custom,
Expand Down Expand Up @@ -824,14 +818,6 @@ def parse_arguments():
help=embedder_model_custom_description,
default=None,
)
upscale_audio_description = "Upscale the input audio to a higher quality before processing. This can improve the overall quality of the output, especially for low-quality input audio."
infer_parser.add_argument(
"--upscale_audio",
type=lambda x: bool(strtobool(x)),
choices=[True, False],
help=upscale_audio_description,
default=False,
)
f0_file_description = "Full path to an external F0 file (.f0). This allows you to use pre-computed pitch values for the input audio."
infer_parser.add_argument(
"--f0_file",
Expand Down Expand Up @@ -1346,13 +1332,6 @@ def parse_arguments():
help=embedder_model_custom_description,
default=None,
)
batch_infer_parser.add_argument(
"--upscale_audio",
type=lambda x: bool(strtobool(x)),
choices=[True, False],
help=upscale_audio_description,
default=False,
)
batch_infer_parser.add_argument(
"--f0_file",
type=str,
Expand Down Expand Up @@ -1840,13 +1819,6 @@ def parse_arguments():
help=embedder_model_custom_description,
default=None,
)
tts_parser.add_argument(
"--upscale_audio",
type=lambda x: bool(strtobool(x)),
choices=[True, False],
help=upscale_audio_description,
default=False,
)
tts_parser.add_argument(
"--f0_file",
type=str,
Expand Down Expand Up @@ -2317,7 +2289,6 @@ def main():
export_format=args.export_format,
embedder_model=args.embedder_model,
embedder_model_custom=args.embedder_model_custom,
upscale_audio=args.upscale_audio,
f0_file=args.f0_file,
formant_shifting=args.formant_shifting,
formant_qfrency=args.formant_qfrency,
Expand Down Expand Up @@ -2381,7 +2352,6 @@ def main():
export_format=args.export_format,
embedder_model=args.embedder_model,
embedder_model_custom=args.embedder_model_custom,
upscale_audio=args.upscale_audio,
f0_file=args.f0_file,
formant_shifting=args.formant_shifting,
formant_qfrency=args.formant_qfrency,
Expand Down Expand Up @@ -2437,8 +2407,8 @@ def main():
protect=args.protect,
hop_length=args.hop_length,
f0_method=args.f0_method,
input_path=args.input_path,
output_path=args.output_path,
output_tts_path=args.output_tts_path,
output_rvc_path=args.output_rvc_path,
pth_path=args.pth_path,
index_path=args.index_path,
split_audio=args.split_audio,
Expand All @@ -2449,7 +2419,6 @@ def main():
export_format=args.export_format,
embedder_model=args.embedder_model,
embedder_model_custom=args.embedder_model_custom,
upscale_audio=args.upscale_audio,
f0_file=args.f0_file,
)
elif args.mode == "preprocess":
Expand Down
11 changes: 1 addition & 10 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,14 @@ numpy==1.23.5
requests>=2.31.0,<2.32.0
tqdm
wget
pydantic==2.8.2
fastapi==0.112.0
starlette==0.37.2

# Audio processing
ffmpeg-python>=0.2.0
faiss-cpu==1.7.3
librosa==0.9.2
pyworld==0.3.4
scipy==1.11.1
soundfile==0.12.1
praat-parselmouth
noisereduce
versatile-audio-upscaler
pedalboard
stftpitchshift

Expand All @@ -44,11 +38,8 @@ gradio==4.43.0
# Miscellaneous utilities
certifi>=2023.07.22; sys_platform == 'darwin'
antlr4-python3-runtime==4.8; sys_platform == 'darwin'
ffmpy==0.3.1
tensorboardX
edge-tts==6.1.9
pypresence
beautifulsoup4
flask
local-attention

flask
6 changes: 1 addition & 5 deletions rvc/infer/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def convert_audio_format(input_path, output_path, output_format):
"""
try:
if output_format != "WAV":
print(f"Converting audio to {output_format} format...")
print(f"Saving audio as {output_format}...")
audio, sample_rate = librosa.load(input_path, sr=None)
common_sample_rates = [
8000,
Expand Down Expand Up @@ -255,10 +255,6 @@ def convert_audio(
start_time = time.time()
print(f"Converting audio '{audio_input_path}'...")

if upscale_audio == True:
from audio_upscaler import upscale

upscale(audio_input_path, audio_input_path)
audio = load_audio_infer(
audio_input_path,
16000,
Expand Down
2 changes: 1 addition & 1 deletion rvc/infer/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def get_f0_hybrid(
if methods_str:
methods = [method.strip() for method in methods_str.group(1).split("+")]
f0_computation_stack = []
print(f"Calculating f0 pitch estimations for methods {str(methods)}")
print(f"Calculating f0 pitch estimations for methods: {', '.join(methods)}")
x = x.astype(np.float32)
x /= np.quantile(np.abs(x), 0.999)
for method in methods:
Expand Down
Loading

0 comments on commit de5b3d8

Please sign in to comment.