diff --git a/assets/Applio_NoUI.ipynb b/assets/Applio_NoUI.ipynb index 02574ad10..281726619 100644 --- a/assets/Applio_NoUI.ipynb +++ b/assets/Applio_NoUI.ipynb @@ -1,819 +1,818 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "0pKllbPyK_BC" - }, - "source": [ - "## **Applio NoUI**\n", - "A simple, high-quality voice conversion tool focused on ease of use and performance. \n", - "\n", - "[Support](https://discord.gg/urxFjYmYYh) — [Discord Bot](https://discord.com/oauth2/authorize?client_id=1144714449563955302&permissions=1376674695271&scope=bot%20applications.commands) — [Find Voices](https://applio.org/models) — [GitHub](https://github.com/IAHispano/Applio)\n", - "\n", - "
\n", - "\n", - "### **Credits**\n", - "- Encryption method: [Hina](https://github.com/hinabl)\n", - "- Extra section: [Poopmaster](https://github.com/poiqazwsx)\n", - "- Main development: [Applio Team](https://github.com/IAHispano)\n", - "- Colab inspired on [RVC v2 Disconnected](https://colab.research.google.com/drive/1XIPCP9ken63S7M6b5ui1b36Cs17sP-NS)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Y-iR3WeLMlac" - }, - "source": [ - "### If you restart the runtime, run it again." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "xwZkZGd-H0zT" - }, - "outputs": [], - "source": [ - "%cd /content/Applio" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ymMCTSD6m8qV" - }, - "source": [ - "# Installation\n", - "## If the runtime restarts, run the cell above and re-run the installation steps." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "yFhAeKGOp9aa" - }, - "outputs": [], - "source": [ - "# @title Mount Google Drive\n", - "from google.colab import drive\n", - "\n", - "drive.mount(\"/content/drive\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "7GysECSxBya4" - }, - "outputs": [], - "source": [ - "# @title Clone\n", - "!git clone https://github.com/IAHispano/Applio --branch 3.2.7 --single-branch\n", - "%cd /content/Applio" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "CAXW55BQm0PP" - }, - "outputs": [], - "source": [ - "# @title Install\n", - "rot_47 = lambda encoded_text: \"\".join(\n", - " [\n", - " (\n", - " chr(\n", - " (ord(c) - (ord(\"a\") if c.islower() else ord(\"A\")) - 47) % 26\n", - " + (ord(\"a\") if c.islower() else ord(\"A\"))\n", - " )\n", - " if c.isalpha()\n", - " else c\n", - " )\n", - " for c in encoded_text\n", - " ]\n", - ")\n", - "import codecs\n", - "import os\n", - "import tarfile\n", - "import subprocess\n", - "from pathlib import Path\n", - "from IPython.display import clear_output\n", - "\n", - "def vidal_setup(C):\n", - " def F():\n", - " print(\"Installing pip packages...\")\n", - " subprocess.check_call([\"pip\", \"install\", \"-r\", \"requirements.txt\", \"--quiet\"])\n", - "\n", - " A = \"/content/\" + rot_47(\"Kikpm.ovm.bu\")\n", - " D = \"/\"\n", - " if not os.path.exists(A):\n", - " M = os.path.dirname(A)\n", - " os.makedirs(M, exist_ok=True)\n", - " print(\"No cached install found..\")\n", - " try:\n", - " N = codecs.decode(\n", - " \"uggcf://uhttvatsnpr.pb/VNUvfcnab/Nccyvb/erfbyir/znva/Raivebzrag/Pbyno/Cache.gne.tm\",\n", - " \"rot_13\",\n", - " )\n", - " subprocess.run([\"wget\", \"-O\", A, N])\n", - " print(\"Download completed successfully!\")\n", - " except Exception as H:\n", - " print(str(H))\n", - " if os.path.exists(A):\n", - " os.remove(A)\n", - " if Path(A).exists():\n", - " with tarfile.open(A, \"r:gz\") as I:\n", - " I.extractall(D)\n", - " print(f\"Extraction of {A} to {D} completed.\")\n", - " if os.path.exists(A):\n", - " os.remove(A)\n", - " if C:\n", - " F()\n", - " C = False\n", - " else:\n", - " F()\n", - "\n", - "\n", - "vidal_setup(False)\n", - "!pip uninstall torch torchvision torchaudio -y\n", - "!pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --upgrade --index-url https://download.pytorch.org/whl/cu121\n", - "clear_output()\n", - "print(\"Finished installing requirements!\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "QlTibPnjmj6-" - }, - "outputs": [], - "source": [ - "# @title Download models\n", - "!python core.py \"prerequisites\" --models \"True\" --exe \"True\" --pretraineds_v1_f0 \"False\" --pretraineds_v2_f0 \"True\" --pretraineds_v1_nof0 \"False\" --pretraineds_v2_nof0 \"False\" " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YzaeMYsUE97Y" - }, - "source": [ - "# Infer\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "v0EgikgjFCjE" - }, - "outputs": [], - "source": [ - "# @title Download model\n", - "# @markdown Hugging Face or Google Drive\n", - "model_link = \"https://huggingface.co/Darwin/Darwin/resolve/main/Darwin.zip\" # @param {type:\"string\"}\n", - "\n", - "!python core.py download --model_link \"{model_link}\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "lrCKEOzvDPRu" - }, - "outputs": [], - "source": [ - "# @title Run Inference\n", - "# @markdown Please upload the audio file to your Google Drive path `/content/drive/MyDrive` and specify its name here. For the model name, use the zip file name without the extension. Alternatively, you can check the path `/content/Applio/logs` for the model name (name of the folder).\n", - "\n", - "import os\n", - "\n", - "current_dir = os.getcwd()\n", - "\n", - "model_name = \"Darwin\" # @param {type:\"string\"}\n", - "model_folder = os.path.join(current_dir, f\"logs/{model_name}\")\n", - "\n", - "if not os.path.exists(model_folder):\n", - " raise FileNotFoundError(f\"Model directory not found: {model_folder}\")\n", - "\n", - "files_in_folder = os.listdir(model_folder)\n", - "pth_path = next((f for f in files_in_folder if f.endswith(\".pth\")), None)\n", - "index_file = next((f for f in files_in_folder if f.endswith(\".index\")), None)\n", - "\n", - "if pth_path is None or index_file is None:\n", - " raise FileNotFoundError(\"No model found.\")\n", - "\n", - "pth_file = os.path.join(model_folder, pth_path)\n", - "index_file = os.path.join(model_folder, index_file)\n", - "\n", - "input_path = \"/content/example.wav\" # @param {type:\"string\"}\n", - "output_path = \"/content/output.wav\"\n", - "export_format = \"WAV\" # @param ['WAV', 'MP3', 'FLAC', 'OGG', 'M4A'] {allow-input: false}\n", - "f0_method = \"rmvpe\" # @param [\"crepe\", \"crepe-tiny\", \"rmvpe\", \"fcpe\", \"hybrid[rmvpe+fcpe]\"] {allow-input: false}\n", - "f0_up_key = 0 # @param {type:\"slider\", min:-24, max:24, step:0}\n", - "filter_radius = 3 # @param {type:\"slider\", min:0, max:10, step:0}\n", - "rms_mix_rate = 0.8 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", - "protect = 0.5 # @param {type:\"slider\", min:0.0, max:0.5, step:0.1}\n", - "index_rate = 0.7 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", - "hop_length = 128 # @param {type:\"slider\", min:1, max:512, step:0}\n", - "clean_strength = 0.7 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", - "split_audio = False # @param{type:\"boolean\"}\n", - "clean_audio = False # @param{type:\"boolean\"}\n", - "f0_autotune = False # @param{type:\"boolean\"}\n", - "formant_shift = False # @param{type:\"boolean\"}\n", - "formant_qfrency = 1.0 # @param {type:\"slider\", min:1.0, max:16.0, step:0.1}\n", - "formant_timbre = 1.0 # @param {type:\"slider\", min:1.0, max:16.0, step:0.1}\n", - "embedder_model = \"contentvec\" # @param [\"contentvec\", \"chinese-hubert-base\", \"japanese-hubert-base\", \"korean-hubert-base\", \"custom\"] {allow-input: false}\n", - "embedder_model_custom = \"\" # @param {type:\"string\"}\n", - "upscale_audio = False # @param{type:\"boolean\"}\n", - "\n", - "\n", - "# Post-processing effects\n", - "if \"post_process\" not in globals():\n", - " post_process = False \n", - "if \"reverb\" not in globals():\n", - " reverb = False \n", - "if \"pitch_shift\" not in globals():\n", - " pitch_shift = False \n", - "if \"limiter\" not in globals():\n", - " limiter = False \n", - "if \"gain\" not in globals():\n", - " gain = False \n", - "if \"distortion\" not in globals():\n", - " distortion = False \n", - "if \"chorus\" not in globals():\n", - " chorus = False \n", - "if \"bitcrush\" not in globals():\n", - " bitcrush = False\n", - "if \"clipping\" not in globals():\n", - " clipping = False \n", - "if \"compressor\" not in globals():\n", - " compressor = False \n", - "if \"delay\" not in globals():\n", - " delay = False\n", - "\n", - "if \"reverb_room_size\" not in globals():\n", - " reverb_room_size = 0.5 \n", - "if \"reverb_damping\" not in globals():\n", - " reverb_damping = 0.5 \n", - "if \"reverb_wet_gain\" not in globals():\n", - " reverb_wet_gain = 0.0 \n", - "if \"reverb_dry_gain\" not in globals():\n", - " reverb_dry_gain = 0.0 \n", - "if \"reverb_width\" not in globals():\n", - " reverb_width = 1.0 \n", - "if \"reverb_freeze_mode\" not in globals():\n", - " reverb_freeze_mode = 0.0 \n", - "\n", - "if \"pitch_shift_semitones\" not in globals():\n", - " pitch_shift_semitones = 0.0 \n", - "\n", - "if \"limiter_threshold\" not in globals():\n", - " limiter_threshold = -1.0 \n", - "if \"limiter_release_time\" not in globals():\n", - " limiter_release_time = 0.05 \n", - "\n", - "if \"gain_db\" not in globals():\n", - " gain_db = 0.0 \n", - "\n", - "if \"distortion_gain\" not in globals():\n", - " distortion_gain = 0.0 \n", - "\n", - "if \"chorus_rate\" not in globals():\n", - " chorus_rate = 1.5 \n", - "if \"chorus_depth\" not in globals():\n", - " chorus_depth = 0.1 \n", - "if \"chorus_center_delay\" not in globals():\n", - " chorus_center_delay = 15.0 \n", - "if \"chorus_feedback\" not in globals():\n", - " chorus_feedback = 0.25 \n", - "if \"chorus_mix\" not in globals():\n", - " chorus_mix = 0.5 \n", - "\n", - "if \"bitcrush_bit_depth\" not in globals():\n", - " bitcrush_bit_depth = 4 \n", - "\n", - "if \"clipping_threshold\" not in globals():\n", - " clipping_threshold = 0.5 \n", - "\n", - "if \"compressor_threshold\" not in globals():\n", - " compressor_threshold = -20.0\n", - "if \"compressor_ratio\" not in globals():\n", - " compressor_ratio = 4.0 \n", - "if \"compressor_attack\" not in globals():\n", - " compressor_attack = 0.001 \n", - "if \"compressor_release\" not in globals():\n", - " compressor_release = 0.1 \n", - "\n", - "if \"delay_seconds\" not in globals():\n", - " delay_seconds = 0.1\n", - "if \"delay_feedback\" not in globals():\n", - " delay_feedback = 0.5 \n", - "if \"delay_mix\" not in globals():\n", - " delay_mix = 0.5 \n", - " \n", - "!python core.py infer --pitch \"{f0_up_key}\" --filter_radius \"{filter_radius}\" --volume_envelope \"{rms_mix_rate}\" --index_rate \"{index_rate}\" --hop_length \"{hop_length}\" --protect \"{protect}\" --f0_autotune \"{f0_autotune}\" --f0_method \"{f0_method}\" --input_path \"{input_path}\" --output_path \"{output_path}\" --pth_path \"{pth_file}\" --index_path \"{index_file}\" --split_audio \"{split_audio}\" --clean_audio \"{clean_audio}\" --clean_strength \"{clean_strength}\" --export_format \"{export_format}\" --embedder_model \"{embedder_model}\" --embedder_model_custom \"{embedder_model_custom}\" --upscale_audio \"{upscale_audio}\" --formant_shifting \"{formant_shift}\" --formant_qfrency \"{formant_qfrency}\" --formant_timbre \"{formant_timbre}\" --post_process \"{post_process}\" --reverb \"{reverb}\" --pitch_shift \"{pitch_shift}\" --limiter \"{limiter}\" --gain \"{gain}\" --distortion \"{distortion}\" --chorus \"{chorus}\" --bitcrush \"{bitcrush}\" --clipping \"{clipping}\" --compressor \"{compressor}\" --delay \"{delay}\" --reverb_room_size \"{reverb_room_size}\" --reverb_damping \"{reverb_damping}\" --reverb_wet_gain \"{reverb_wet_gain}\" --reverb_dry_gain \"{reverb_dry_gain}\" --reverb_width \"{reverb_width}\" --reverb_freeze_mode \"{reverb_freeze_mode}\" --pitch_shift_semitones \"{pitch_shift_semitones}\" --limiter_threshold \"{limiter_threshold}\" --limiter_release_time \"{limiter_release_time}\" --gain_db \"{gain_db}\" --distortion_gain \"{distortion_gain}\" --chorus_rate \"{chorus_rate}\" --chorus_depth \"{chorus_depth}\" --chorus_center_delay \"{chorus_center_delay}\" --chorus_feedback \"{chorus_feedback}\" --chorus_mix \"{chorus_mix}\" --bitcrush_bit_depth \"{bitcrush_bit_depth}\" --clipping_threshold \"{clipping_threshold}\" --compressor_threshold \"{compressor_threshold}\" --compressor_ratio \"{compressor_ratio}\" --compressor_attack \"{compressor_attack}\" --compressor_release \"{compressor_release}\" --delay_seconds \"{delay_seconds}\" --delay_feedback \"{delay_feedback}\" --delay_mix \"{delay_mix}\"\n", - "\n", - "from IPython.display import Audio, display, clear_output\n", - "\n", - "output_path = output_path.replace(\".wav\", f\".{export_format.lower()}\")\n", - "# clear_output()\n", - "display(Audio(output_path, autoplay=True))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yrWw2h9d2TRn" - }, - "source": [ - "## **Advanced Settings**" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "cellView": "form", - "id": "J43qejJ-2Tpp" - }, - "outputs": [], - "source": [ - "# @title # Post-processing effects\n", - "post_process = False # @param{type:\"boolean\"}\n", - "reverb = False # @param{type:\"boolean\"}\n", - "pitch_shift = False # @param{type:\"boolean\"}\n", - "limiter = False # @param{type:\"boolean\"}\n", - "gain = False # @param{type:\"boolean\"}\n", - "distortion = False # @param{type:\"boolean\"}\n", - "chorus = False # @param{type:\"boolean\"}\n", - "bitcrush = False # @param{type:\"boolean\"}\n", - "clipping = False # @param{type:\"boolean\"}\n", - "compressor = False # @param{type:\"boolean\"}\n", - "delay = False # @param{type:\"boolean\"}\n", - "\n", - "reverb_room_size = 0.5 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", - "reverb_damping = 0.5 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", - "reverb_wet_gain = 0.0 # @param {type:\"slider\", min:-20.0, max:20.0, step:0.1}\n", - "reverb_dry_gain = 0.0 # @param {type:\"slider\", min:-20.0, max:20.0, step:0.1}\n", - "reverb_width = 1.0 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", - "reverb_freeze_mode = 0.0 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", - "\n", - "pitch_shift_semitones = 0.0 # @param {type:\"slider\", min:-12.0, max:12.0, step:0.1}\n", - "\n", - "limiter_threshold = -1.0 # @param {type:\"slider\", min:-20.0, max:0.0, step:0.1}\n", - "limiter_release_time = 0.05 # @param {type:\"slider\", min:0.0, max:1.0, step:0.01}\n", - "\n", - "gain_db = 0.0 # @param {type:\"slider\", min:-20.0, max:20.0, step:0.1}\n", - "\n", - "distortion_gain = 0.0 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", - "\n", - "chorus_rate = 1.5 # @param {type:\"slider\", min:0.1, max:10.0, step:0.1}\n", - "chorus_depth = 0.1 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", - "chorus_center_delay = 15.0 # @param {type:\"slider\", min:0.0, max:50.0, step:0.1}\n", - "chorus_feedback = 0.25 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", - "chorus_mix = 0.5 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", - "\n", - "bitcrush_bit_depth = 4 # @param {type:\"slider\", min:1, max:16, step:1}\n", - "\n", - "clipping_threshold = 0.5 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", - "\n", - "compressor_threshold = -20.0 # @param {type:\"slider\", min:-60.0, max:0.0, step:0.1}\n", - "compressor_ratio = 4.0 # @param {type:\"slider\", min:1.0, max:20.0, step:0.1}\n", - "compressor_attack = 0.001 # @param {type:\"slider\", min:0.0, max:0.1, step:0.001}\n", - "compressor_release = 0.1 # @param {type:\"slider\", min:0.0, max:1.0, step:0.01}\n", - "\n", - "delay_seconds = 0.1 # @param {type:\"slider\", min:0.0, max:1.0, step:0.01}\n", - "delay_feedback = 0.5 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", - "delay_mix = 0.5 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1QkabnLlF2KB" - }, - "source": [ - "# Train" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "oBzqm4JkGGa0" - }, - "outputs": [], - "source": [ - "# @title Preprocess Dataset\n", - "import os\n", - "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n", - "model_name = \"Darwin\" # @param {type:\"string\"}\n", - "dataset_path = \"/content/drive/MyDrive/Darwin_Dataset\" # @param {type:\"string\"}\n", - "\n", - "sample_rate = \"40k\" # @param [\"32k\", \"40k\", \"48k\"] {allow-input: false}\n", - "sr = int(sample_rate.rstrip(\"k\")) * 1000\n", - "cpu_cores = 2 # @param {type:\"slider\", min:1, max:2, step:1}\n", - "cut_preprocess = True # @param{type:\"boolean\"}\n", - "process_effects = False # @param{type:\"boolean\"}\n", - "noise_reduction = False # @param{type:\"boolean\"}\n", - "noise_reduction_strength = 0.7 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", - "\n", - "!python core.py preprocess --model_name \"{model_name}\" --dataset_path \"{dataset_path}\" --sample_rate \"{sr}\" --cpu_cores \"{cpu_cores}\" --cut_preprocess \"{cut_preprocess}\" --process_effects \"{process_effects}\" --noise_reduction \"{noise_reduction}\" --noise_reduction_strength \"{noise_reduction_strength}\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "zWMiMYfRJTJv" - }, - "outputs": [], - "source": [ - "# @title Extract Features\n", - "rvc_version = \"v2\" # @param [\"v2\", \"v1\"] {allow-input: false}\n", - "f0_method = \"rmvpe\" # @param [\"crepe\", \"crepe-tiny\", \"rmvpe\"] {allow-input: false}\n", - "hop_length = 128 # @param {type:\"slider\", min:1, max:512, step:0}\n", - "\n", - "sr = int(sample_rate.rstrip(\"k\")) * 1000\n", - "cpu_cores = 2 # @param {type:\"slider\", min:1, max:2, step:1}\n", - "embedder_model = \"contentvec\" # @param [\"contentvec\", \"chinese-hubert-base\", \"japanese-hubert-base\", \"korean-hubert-base\", \"custom\"] {allow-input: false}\n", - "embedder_model_custom = \"\" # @param {type:\"string\"}\n", - "\n", - "!python core.py extract --model_name \"{model_name}\" --rvc_version \"{rvc_version}\" --f0_method \"{f0_method}\" --hop_length \"{hop_length}\" --sample_rate \"{sr}\" --cpu_cores \"{cpu_cores}\" --gpu \"0\" --embedder_model \"{embedder_model}\" --embedder_model_custom \"{embedder_model_custom}\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "TI6LLdIzKAIa" - }, - "outputs": [], - "source": [ - "# @title Train\n", - "import threading\n", - "import time\n", - "import os\n", - "import shutil\n", - "import hashlib\n", - "import time\n", - "\n", - "LOGS_FOLDER = \"/content/Applio/logs/\"\n", - "GOOGLE_DRIVE_PATH = \"/content/drive/MyDrive/RVC_Backup\"\n", - "\n", - "\n", - "def import_google_drive_backup():\n", - " print(\"Importing Google Drive backup...\")\n", - " for root, dirs, files in os.walk(GOOGLE_DRIVE_PATH):\n", - " for filename in files:\n", - " filepath = os.path.join(root, filename)\n", - " if os.path.isfile(filepath):\n", - " backup_filepath = os.path.join(\n", - " LOGS_FOLDER, os.path.relpath(filepath, GOOGLE_DRIVE_PATH)\n", - " )\n", - " backup_folderpath = os.path.dirname(backup_filepath)\n", - " if not os.path.exists(backup_folderpath):\n", - " os.makedirs(backup_folderpath)\n", - " print(f\"Created backup folder: {backup_folderpath}\", flush=True)\n", - " shutil.copy2(filepath, backup_filepath)\n", - " print(f\"Imported file from Google Drive backup: {filename}\")\n", - " print(\"Google Drive backup import completed.\")\n", - "\n", - "\n", - "def get_md5_hash(file_path):\n", - " hash_md5 = hashlib.md5()\n", - " with open(file_path, \"rb\") as f:\n", - " for chunk in iter(lambda: f.read(4096), b\"\"):\n", - " hash_md5.update(chunk)\n", - " return hash_md5.hexdigest()\n", - "\n", - "\n", - "if \"autobackups\" not in globals():\n", - " autobackups = False\n", - "# @markdown ### 💾 AutoBackup\n", - "cooldown = 15 # @param {type:\"slider\", min:0, max:100, step:0}\n", - "auto_backups = True # @param{type:\"boolean\"}\n", - "def backup_files():\n", - " print(\"\\nStarting backup loop...\")\n", - " last_backup_timestamps_path = os.path.join(\n", - " LOGS_FOLDER, \"last_backup_timestamps.txt\"\n", - " )\n", - " fully_updated = False\n", - "\n", - " while True:\n", - " try:\n", - " updated_files = 0\n", - " deleted_files = 0\n", - " new_files = 0\n", - " last_backup_timestamps = {}\n", - "\n", - " try:\n", - " with open(last_backup_timestamps_path, \"r\") as f:\n", - " last_backup_timestamps = dict(line.strip().split(\":\") for line in f)\n", - " except FileNotFoundError:\n", - " pass\n", - "\n", - " for root, dirs, files in os.walk(LOGS_FOLDER):\n", - " # Excluding \"zips\" and \"mute\" directories\n", - " if \"zips\" in dirs:\n", - " dirs.remove(\"zips\")\n", - " if \"mute\" in dirs:\n", - " dirs.remove(\"mute\")\n", - "\n", - " for filename in files:\n", - " if filename != \"last_backup_timestamps.txt\":\n", - " filepath = os.path.join(root, filename)\n", - " if os.path.isfile(filepath):\n", - " backup_filepath = os.path.join(\n", - " GOOGLE_DRIVE_PATH,\n", - " os.path.relpath(filepath, LOGS_FOLDER),\n", - " )\n", - " backup_folderpath = os.path.dirname(backup_filepath)\n", - " if not os.path.exists(backup_folderpath):\n", - " os.makedirs(backup_folderpath)\n", - " last_backup_timestamp = last_backup_timestamps.get(filepath)\n", - " current_timestamp = os.path.getmtime(filepath)\n", - " if (\n", - " last_backup_timestamp is None\n", - " or float(last_backup_timestamp) < current_timestamp\n", - " ):\n", - " shutil.copy2(filepath, backup_filepath)\n", - " last_backup_timestamps[filepath] = str(current_timestamp)\n", - " if last_backup_timestamp is None:\n", - " new_files += 1\n", - " else:\n", - " updated_files += 1\n", - "\n", - "\n", - " for filepath in list(last_backup_timestamps.keys()):\n", - " if not os.path.exists(filepath):\n", - " backup_filepath = os.path.join(\n", - " GOOGLE_DRIVE_PATH, os.path.relpath(filepath, LOGS_FOLDER)\n", - " )\n", - " if os.path.exists(backup_filepath):\n", - " os.remove(backup_filepath)\n", - " deleted_files += 1\n", - " del last_backup_timestamps[filepath]\n", - "\n", - "\n", - " if updated_files > 0 or deleted_files > 0 or new_files > 0:\n", - " print(f\"Backup Complete: {new_files} new, {updated_files} updated, {deleted_files} deleted.\")\n", - " fully_updated = False\n", - " elif not fully_updated:\n", - " print(\"Files are up to date.\")\n", - " fully_updated = True\n", - "\n", - " with open(last_backup_timestamps_path, \"w\") as f:\n", - " for filepath, timestamp in last_backup_timestamps.items():\n", - " f.write(f\"{filepath}:{timestamp}\\n\")\n", - "\n", - " time.sleep(cooldown if fully_updated else 0.1)\n", - "\n", - "\n", - " except Exception as error:\n", - " print(f\"An error occurred during backup: {error}\")\n", - "\n", - "\n", - "if autobackups:\n", - " autobackups = False\n", - " print(\"Autobackup Disabled\")\n", - "else:\n", - " autobackups = True\n", - " print(\"Autobackup Enabled\") \n", - "# @markdown ### ⚙️ Train Settings\n", - "total_epoch = 800 # @param {type:\"integer\"}\n", - "batch_size = 15 # @param {type:\"slider\", min:1, max:25, step:0}\n", - "gpu = 0\n", - "sr = int(sample_rate.rstrip(\"k\")) * 1000\n", - "pretrained = True # @param{type:\"boolean\"}\n", - "cleanup = False # @param{type:\"boolean\"}\n", - "cache_data_in_gpu = False # @param{type:\"boolean\"}\n", - "tensorboard = True # @param{type:\"boolean\"}\n", - "# @markdown ### ➡️ Choose how many epochs your model will be stored\n", - "save_every_epoch = 10 # @param {type:\"slider\", min:1, max:100, step:0}\n", - "save_only_latest = False # @param{type:\"boolean\"}\n", - "save_every_weights = False # @param{type:\"boolean\"}\n", - "overtraining_detector = False # @param{type:\"boolean\"}\n", - "overtraining_threshold = 50 # @param {type:\"slider\", min:1, max:100, step:0}\n", - "# @markdown ### ❓ Optional\n", - "# @markdown In case you select custom pretrained, you will have to download the pretraineds and enter the path of the pretraineds.\n", - "custom_pretrained = False # @param{type:\"boolean\"}\n", - "g_pretrained_path = \"/content/Applio/rvc/models/pretraineds/pretraineds_custom/G48k.pth\" # @param {type:\"string\"}\n", - "d_pretrained_path = \"/content/Applio/rvc/models/pretraineds/pretraineds_custom/D48k.pth\" # @param {type:\"string\"}\n", - "\n", - "if \"pretrained\" not in globals():\n", - " pretrained = True\n", - "\n", - "if \"custom_pretrained\" not in globals():\n", - " custom_pretrained = False\n", - "\n", - "if \"g_pretrained_path\" not in globals():\n", - " g_pretrained_path = \"Custom Path\"\n", - "\n", - "if \"d_pretrained_path\" not in globals():\n", - " d_pretrained_path = \"Custom Path\"\n", - "\n", - "\n", - "def start_train():\n", - " if tensorboard == True:\n", - " %load_ext tensorboard\n", - " %tensorboard --logdir /content/Applio/logs/\n", - " !python core.py train --model_name \"{model_name}\" --rvc_version \"{rvc_version}\" --save_every_epoch \"{save_every_epoch}\" --save_only_latest \"{save_only_latest}\" --save_every_weights \"{save_every_weights}\" --total_epoch \"{total_epoch}\" --sample_rate \"{sr}\" --batch_size \"{batch_size}\" --gpu \"{gpu}\" --pretrained \"{pretrained}\" --custom_pretrained \"{custom_pretrained}\" --g_pretrained_path \"{g_pretrained_path}\" --d_pretrained_path \"{d_pretrained_path}\" --overtraining_detector \"{overtraining_detector}\" --overtraining_threshold \"{overtraining_threshold}\" --cleanup \"{cleanup}\" --cache_data_in_gpu \"{cache_data_in_gpu}\"\n", - "\n", - "\n", - "server_thread = threading.Thread(target=start_train)\n", - "server_thread.start()\n", - "\n", - "if auto_backups:\n", - " backup_files()\n", - "else:\n", - " while True:\n", - " time.sleep(10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "bHLs5AT4Q1ck" - }, - "outputs": [], - "source": [ - "# @title Generate index file\n", - "index_algorithm = \"Auto\" # @param [\"Auto\", \"Faiss\", \"KMeans\"] {allow-input: false}\n", - "!python core.py index --model_name \"{model_name}\" --rvc_version \"{rvc_version}\" --index_algorithm \"{index_algorithm}\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "X_eU_SoiHIQg" - }, - "outputs": [], - "source": [ - "# @title Save model\n", - "# @markdown Enter the name of the model and the steps. You can find it in your `/content/Applio/logs` folder.\n", - "%cd /content\n", - "import os, shutil, sys\n", - "\n", - "model_name = \"Darwin\" # @param {type:\"string\"}\n", - "model_epoch = 800 # @param {type:\"integer\"}\n", - "save_big_file = False # @param {type:\"boolean\"}\n", - "\n", - "if os.path.exists(\"/content/zips\"):\n", - " shutil.rmtree(\"/content/zips\")\n", - "print(\"Removed zips.\")\n", - "\n", - "os.makedirs(f\"/content/zips/{model_name}/\", exist_ok=True)\n", - "print(\"Created zips.\")\n", - "\n", - "logs_folder = f\"/content/Applio/logs/{model_name}/\"\n", - "weight_file = None\n", - "if not os.path.exists(logs_folder):\n", - " print(f\"Model folder not found.\")\n", - " sys.exit(\"\")\n", - "\n", - "for filename in os.listdir(logs_folder):\n", - " if filename.startswith(f\"{model_name}_{model_epoch}e\") and filename.endswith(\".pth\"):\n", - " weight_file = filename\n", - " break\n", - "if weight_file is None:\n", - " print(\"There is no weight file with that name\")\n", - " sys.exit(\"\")\n", - "if not save_big_file:\n", - " !cp {logs_folder}added_*.index /content/zips/{model_name}/\n", - " !cp {logs_folder}total_*.npy /content/zips/{model_name}/\n", - " !cp {logs_folder}{weight_file} /content/zips/{model_name}/\n", - " %cd /content/zips\n", - " !zip -r {model_name}.zip {model_name}\n", - "if save_big_file:\n", - " %cd /content/Applio\n", - " latest_steps = -1\n", - " logs_folder = \"./logs/\" + model_name\n", - " for filename in os.listdir(logs_folder):\n", - " if filename.startswith(\"G_\") and filename.endswith(\".pth\"):\n", - " steps = int(filename.split(\"_\")[1].split(\".\")[0])\n", - " if steps > latest_steps:\n", - " latest_steps = steps\n", - " MODELZIP = model_name + \".zip\"\n", - " !mkdir -p /content/zips\n", - " ZIPFILEPATH = os.path.join(\"/content/zips\", MODELZIP)\n", - " for filename in os.listdir(logs_folder):\n", - " if \"G_\" in filename or \"D_\" in filename:\n", - " if str(latest_steps) in filename:\n", - " !zip -r {ZIPFILEPATH} {os.path.join(logs_folder, filename)}\n", - " else:\n", - " !zip -r {ZIPFILEPATH} {os.path.join(logs_folder, filename)}\n", - "\n", - "!mkdir -p /content/drive/MyDrive/RVC_Backup/\n", - "shutil.move(\n", - " f\"/content/zips/{model_name}.zip\",\n", - " f\"/content/drive/MyDrive/RVC_Backup/{model_name}.zip\",\n", - ")\n", - "%cd /content/Applio\n", - "shutil.rmtree(\"/content/zips\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OaKoymXsyEYN" - }, - "source": [ - "# Resume-training" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "d3KgLAYnyHkP" - }, - "outputs": [], - "source": [ - "# @title Load a Backup\n", - "from google.colab import drive\n", - "import os\n", - "import shutil\n", - "\n", - "# @markdown Put the exact name you put as your Model Name in Applio.\n", - "modelname = \"My-Project\" # @param {type:\"string\"}\n", - "source_path = \"/content/drive/MyDrive/RVC_Backup/\" + modelname\n", - "destination_path = \"/content/Applio/logs/\" + modelname\n", - "backup_timestamps_file = \"last_backup_timestamps.txt\"\n", - "if not os.path.exists(source_path):\n", - " print(\n", - " \"The model folder does not exist. Please verify the name is correct or check your Google Drive.\"\n", - " )\n", - "else:\n", - " time_ = os.path.join(\"/content/drive/MyDrive/RVC_Backup/\", backup_timestamps_file)\n", - " time__ = os.path.join(\"/content/Applio/logs/\", backup_timestamps_file)\n", - " if os.path.exists(time_):\n", - " shutil.copy(time_, time__)\n", - " shutil.copytree(source_path, destination_path)\n", - " print(\"Model backup loaded successfully.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "sc9DzvRCyJ2d" - }, - "outputs": [], - "source": [ - "# @title Set training variables\n", - "# @markdown ### ➡️ Use the same as you did previously\n", - "model_name = \"Darwin\" # @param {type:\"string\"}\n", - "sample_rate = \"40k\" # @param [\"32k\", \"40k\", \"48k\"] {allow-input: false}\n", - "rvc_version = \"v2\" # @param [\"v2\", \"v1\"] {allow-input: false}\n", - "f0_method = \"rmvpe\" # @param [\"crepe\", \"crepe-tiny\", \"rmvpe\"] {allow-input: false}\n", - "hop_length = 128 # @param {type:\"slider\", min:1, max:512, step:0}\n", - "sr = int(sample_rate.rstrip(\"k\")) * 1000" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [ - "ymMCTSD6m8qV" - ], - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "0pKllbPyK_BC" + }, + "source": [ + "## **Applio NoUI**\n", + "A simple, high-quality voice conversion tool focused on ease of use and performance. \n", + "\n", + "[Support](https://discord.gg/urxFjYmYYh) — [Discord Bot](https://discord.com/oauth2/authorize?client_id=1144714449563955302&permissions=1376674695271&scope=bot%20applications.commands) — [Find Voices](https://applio.org/models) — [GitHub](https://github.com/IAHispano/Applio)\n", + "\n", + "
\n", + "\n", + "### **Credits**\n", + "- Encryption method: [Hina](https://github.com/hinabl)\n", + "- Extra section: [Poopmaster](https://github.com/poiqazwsx)\n", + "- Main development: [Applio Team](https://github.com/IAHispano)\n", + "- Colab inspired on [RVC v2 Disconnected](https://colab.research.google.com/drive/1XIPCP9ken63S7M6b5ui1b36Cs17sP-NS)." + ] }, - "nbformat": 4, - "nbformat_minor": 0 + { + "cell_type": "markdown", + "metadata": { + "id": "Y-iR3WeLMlac" + }, + "source": [ + "### If you restart the runtime, run it again." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xwZkZGd-H0zT" + }, + "outputs": [], + "source": [ + "%cd /content/Applio" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ymMCTSD6m8qV" + }, + "source": [ + "# Installation\n", + "## If the runtime restarts, run the cell above and re-run the installation steps." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "yFhAeKGOp9aa" + }, + "outputs": [], + "source": [ + "# @title Mount Google Drive\n", + "from google.colab import drive\n", + "\n", + "drive.mount(\"/content/drive\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "7GysECSxBya4" + }, + "outputs": [], + "source": [ + "# @title Clone\n", + "!git clone https://github.com/IAHispano/Applio --branch 3.2.7 --single-branch\n", + "%cd /content/Applio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "CAXW55BQm0PP" + }, + "outputs": [], + "source": [ + "# @title Install\n", + "rot_47 = lambda encoded_text: \"\".join(\n", + " [\n", + " (\n", + " chr(\n", + " (ord(c) - (ord(\"a\") if c.islower() else ord(\"A\")) - 47) % 26\n", + " + (ord(\"a\") if c.islower() else ord(\"A\"))\n", + " )\n", + " if c.isalpha()\n", + " else c\n", + " )\n", + " for c in encoded_text\n", + " ]\n", + ")\n", + "import codecs\n", + "import os\n", + "import tarfile\n", + "import subprocess\n", + "from pathlib import Path\n", + "from IPython.display import clear_output\n", + "\n", + "def vidal_setup(C):\n", + " def F():\n", + " print(\"Installing pip packages...\")\n", + " subprocess.check_call([\"pip\", \"install\", \"-r\", \"requirements.txt\", \"--quiet\"])\n", + "\n", + " A = \"/content/\" + rot_47(\"Kikpm.ovm.bu\")\n", + " D = \"/\"\n", + " if not os.path.exists(A):\n", + " M = os.path.dirname(A)\n", + " os.makedirs(M, exist_ok=True)\n", + " print(\"No cached install found..\")\n", + " try:\n", + " N = codecs.decode(\n", + " \"uggcf://uhttvatsnpr.pb/VNUvfcnab/Nccyvb/erfbyir/znva/Raivebzrag/Pbyno/Cache.gne.tm\",\n", + " \"rot_13\",\n", + " )\n", + " subprocess.run([\"wget\", \"-O\", A, N])\n", + " print(\"Download completed successfully!\")\n", + " except Exception as H:\n", + " print(str(H))\n", + " if os.path.exists(A):\n", + " os.remove(A)\n", + " if Path(A).exists():\n", + " with tarfile.open(A, \"r:gz\") as I:\n", + " I.extractall(D)\n", + " print(f\"Extraction of {A} to {D} completed.\")\n", + " if os.path.exists(A):\n", + " os.remove(A)\n", + " if C:\n", + " F()\n", + " C = False\n", + " else:\n", + " F()\n", + "\n", + "\n", + "vidal_setup(False)\n", + "!pip uninstall torch torchvision torchaudio -y\n", + "!pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --upgrade --index-url https://download.pytorch.org/whl/cu121\n", + "clear_output()\n", + "print(\"Finished installing requirements!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "QlTibPnjmj6-" + }, + "outputs": [], + "source": [ + "# @title Download models\n", + "!python core.py \"prerequisites\" --models \"True\" --exe \"True\" --pretraineds_v1_f0 \"False\" --pretraineds_v2_f0 \"True\" --pretraineds_v1_nof0 \"False\" --pretraineds_v2_nof0 \"False\" " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YzaeMYsUE97Y" + }, + "source": [ + "# Infer\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "v0EgikgjFCjE" + }, + "outputs": [], + "source": [ + "# @title Download model\n", + "# @markdown Hugging Face or Google Drive\n", + "model_link = \"https://huggingface.co/Darwin/Darwin/resolve/main/Darwin.zip\" # @param {type:\"string\"}\n", + "\n", + "!python core.py download --model_link \"{model_link}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "lrCKEOzvDPRu" + }, + "outputs": [], + "source": [ + "# @title Run Inference\n", + "# @markdown Please upload the audio file to your Google Drive path `/content/drive/MyDrive` and specify its name here. For the model name, use the zip file name without the extension. Alternatively, you can check the path `/content/Applio/logs` for the model name (name of the folder).\n", + "\n", + "import os\n", + "\n", + "current_dir = os.getcwd()\n", + "\n", + "model_name = \"Darwin\" # @param {type:\"string\"}\n", + "model_folder = os.path.join(current_dir, f\"logs/{model_name}\")\n", + "\n", + "if not os.path.exists(model_folder):\n", + " raise FileNotFoundError(f\"Model directory not found: {model_folder}\")\n", + "\n", + "files_in_folder = os.listdir(model_folder)\n", + "pth_path = next((f for f in files_in_folder if f.endswith(\".pth\")), None)\n", + "index_file = next((f for f in files_in_folder if f.endswith(\".index\")), None)\n", + "\n", + "if pth_path is None or index_file is None:\n", + " raise FileNotFoundError(\"No model found.\")\n", + "\n", + "pth_file = os.path.join(model_folder, pth_path)\n", + "index_file = os.path.join(model_folder, index_file)\n", + "\n", + "input_path = \"/content/example.wav\" # @param {type:\"string\"}\n", + "output_path = \"/content/output.wav\"\n", + "export_format = \"WAV\" # @param ['WAV', 'MP3', 'FLAC', 'OGG', 'M4A'] {allow-input: false}\n", + "f0_method = \"rmvpe\" # @param [\"crepe\", \"crepe-tiny\", \"rmvpe\", \"fcpe\", \"hybrid[rmvpe+fcpe]\"] {allow-input: false}\n", + "f0_up_key = 0 # @param {type:\"slider\", min:-24, max:24, step:0}\n", + "filter_radius = 3 # @param {type:\"slider\", min:0, max:10, step:0}\n", + "rms_mix_rate = 0.8 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", + "protect = 0.5 # @param {type:\"slider\", min:0.0, max:0.5, step:0.1}\n", + "index_rate = 0.7 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", + "hop_length = 128 # @param {type:\"slider\", min:1, max:512, step:0}\n", + "clean_strength = 0.7 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", + "split_audio = False # @param{type:\"boolean\"}\n", + "clean_audio = False # @param{type:\"boolean\"}\n", + "f0_autotune = False # @param{type:\"boolean\"}\n", + "formant_shift = False # @param{type:\"boolean\"}\n", + "formant_qfrency = 1.0 # @param {type:\"slider\", min:1.0, max:16.0, step:0.1}\n", + "formant_timbre = 1.0 # @param {type:\"slider\", min:1.0, max:16.0, step:0.1}\n", + "embedder_model = \"contentvec\" # @param [\"contentvec\", \"chinese-hubert-base\", \"japanese-hubert-base\", \"korean-hubert-base\", \"custom\"] {allow-input: false}\n", + "embedder_model_custom = \"\" # @param {type:\"string\"}\n", + "\n", + "\n", + "# Post-processing effects\n", + "if \"post_process\" not in globals():\n", + " post_process = False \n", + "if \"reverb\" not in globals():\n", + " reverb = False \n", + "if \"pitch_shift\" not in globals():\n", + " pitch_shift = False \n", + "if \"limiter\" not in globals():\n", + " limiter = False \n", + "if \"gain\" not in globals():\n", + " gain = False \n", + "if \"distortion\" not in globals():\n", + " distortion = False \n", + "if \"chorus\" not in globals():\n", + " chorus = False \n", + "if \"bitcrush\" not in globals():\n", + " bitcrush = False\n", + "if \"clipping\" not in globals():\n", + " clipping = False \n", + "if \"compressor\" not in globals():\n", + " compressor = False \n", + "if \"delay\" not in globals():\n", + " delay = False\n", + "\n", + "if \"reverb_room_size\" not in globals():\n", + " reverb_room_size = 0.5 \n", + "if \"reverb_damping\" not in globals():\n", + " reverb_damping = 0.5 \n", + "if \"reverb_wet_gain\" not in globals():\n", + " reverb_wet_gain = 0.0 \n", + "if \"reverb_dry_gain\" not in globals():\n", + " reverb_dry_gain = 0.0 \n", + "if \"reverb_width\" not in globals():\n", + " reverb_width = 1.0 \n", + "if \"reverb_freeze_mode\" not in globals():\n", + " reverb_freeze_mode = 0.0 \n", + "\n", + "if \"pitch_shift_semitones\" not in globals():\n", + " pitch_shift_semitones = 0.0 \n", + "\n", + "if \"limiter_threshold\" not in globals():\n", + " limiter_threshold = -1.0 \n", + "if \"limiter_release_time\" not in globals():\n", + " limiter_release_time = 0.05 \n", + "\n", + "if \"gain_db\" not in globals():\n", + " gain_db = 0.0 \n", + "\n", + "if \"distortion_gain\" not in globals():\n", + " distortion_gain = 0.0 \n", + "\n", + "if \"chorus_rate\" not in globals():\n", + " chorus_rate = 1.5 \n", + "if \"chorus_depth\" not in globals():\n", + " chorus_depth = 0.1 \n", + "if \"chorus_center_delay\" not in globals():\n", + " chorus_center_delay = 15.0 \n", + "if \"chorus_feedback\" not in globals():\n", + " chorus_feedback = 0.25 \n", + "if \"chorus_mix\" not in globals():\n", + " chorus_mix = 0.5 \n", + "\n", + "if \"bitcrush_bit_depth\" not in globals():\n", + " bitcrush_bit_depth = 4 \n", + "\n", + "if \"clipping_threshold\" not in globals():\n", + " clipping_threshold = 0.5 \n", + "\n", + "if \"compressor_threshold\" not in globals():\n", + " compressor_threshold = -20.0\n", + "if \"compressor_ratio\" not in globals():\n", + " compressor_ratio = 4.0 \n", + "if \"compressor_attack\" not in globals():\n", + " compressor_attack = 0.001 \n", + "if \"compressor_release\" not in globals():\n", + " compressor_release = 0.1 \n", + "\n", + "if \"delay_seconds\" not in globals():\n", + " delay_seconds = 0.1\n", + "if \"delay_feedback\" not in globals():\n", + " delay_feedback = 0.5 \n", + "if \"delay_mix\" not in globals():\n", + " delay_mix = 0.5 \n", + " \n", + "!python core.py infer --pitch \"{f0_up_key}\" --filter_radius \"{filter_radius}\" --volume_envelope \"{rms_mix_rate}\" --index_rate \"{index_rate}\" --hop_length \"{hop_length}\" --protect \"{protect}\" --f0_autotune \"{f0_autotune}\" --f0_method \"{f0_method}\" --input_path \"{input_path}\" --output_path \"{output_path}\" --pth_path \"{pth_file}\" --index_path \"{index_file}\" --split_audio \"{split_audio}\" --clean_audio \"{clean_audio}\" --clean_strength \"{clean_strength}\" --export_format \"{export_format}\" --embedder_model \"{embedder_model}\" --embedder_model_custom \"{embedder_model_custom}\" --formant_shifting \"{formant_shift}\" --formant_qfrency \"{formant_qfrency}\" --formant_timbre \"{formant_timbre}\" --post_process \"{post_process}\" --reverb \"{reverb}\" --pitch_shift \"{pitch_shift}\" --limiter \"{limiter}\" --gain \"{gain}\" --distortion \"{distortion}\" --chorus \"{chorus}\" --bitcrush \"{bitcrush}\" --clipping \"{clipping}\" --compressor \"{compressor}\" --delay \"{delay}\" --reverb_room_size \"{reverb_room_size}\" --reverb_damping \"{reverb_damping}\" --reverb_wet_gain \"{reverb_wet_gain}\" --reverb_dry_gain \"{reverb_dry_gain}\" --reverb_width \"{reverb_width}\" --reverb_freeze_mode \"{reverb_freeze_mode}\" --pitch_shift_semitones \"{pitch_shift_semitones}\" --limiter_threshold \"{limiter_threshold}\" --limiter_release_time \"{limiter_release_time}\" --gain_db \"{gain_db}\" --distortion_gain \"{distortion_gain}\" --chorus_rate \"{chorus_rate}\" --chorus_depth \"{chorus_depth}\" --chorus_center_delay \"{chorus_center_delay}\" --chorus_feedback \"{chorus_feedback}\" --chorus_mix \"{chorus_mix}\" --bitcrush_bit_depth \"{bitcrush_bit_depth}\" --clipping_threshold \"{clipping_threshold}\" --compressor_threshold \"{compressor_threshold}\" --compressor_ratio \"{compressor_ratio}\" --compressor_attack \"{compressor_attack}\" --compressor_release \"{compressor_release}\" --delay_seconds \"{delay_seconds}\" --delay_feedback \"{delay_feedback}\" --delay_mix \"{delay_mix}\"\n", + "\n", + "from IPython.display import Audio, display, clear_output\n", + "\n", + "output_path = output_path.replace(\".wav\", f\".{export_format.lower()}\")\n", + "# clear_output()\n", + "display(Audio(output_path, autoplay=True))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yrWw2h9d2TRn" + }, + "source": [ + "## **Advanced Settings**" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "cellView": "form", + "id": "J43qejJ-2Tpp" + }, + "outputs": [], + "source": [ + "# @title # Post-processing effects\n", + "post_process = False # @param{type:\"boolean\"}\n", + "reverb = False # @param{type:\"boolean\"}\n", + "pitch_shift = False # @param{type:\"boolean\"}\n", + "limiter = False # @param{type:\"boolean\"}\n", + "gain = False # @param{type:\"boolean\"}\n", + "distortion = False # @param{type:\"boolean\"}\n", + "chorus = False # @param{type:\"boolean\"}\n", + "bitcrush = False # @param{type:\"boolean\"}\n", + "clipping = False # @param{type:\"boolean\"}\n", + "compressor = False # @param{type:\"boolean\"}\n", + "delay = False # @param{type:\"boolean\"}\n", + "\n", + "reverb_room_size = 0.5 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", + "reverb_damping = 0.5 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", + "reverb_wet_gain = 0.0 # @param {type:\"slider\", min:-20.0, max:20.0, step:0.1}\n", + "reverb_dry_gain = 0.0 # @param {type:\"slider\", min:-20.0, max:20.0, step:0.1}\n", + "reverb_width = 1.0 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", + "reverb_freeze_mode = 0.0 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", + "\n", + "pitch_shift_semitones = 0.0 # @param {type:\"slider\", min:-12.0, max:12.0, step:0.1}\n", + "\n", + "limiter_threshold = -1.0 # @param {type:\"slider\", min:-20.0, max:0.0, step:0.1}\n", + "limiter_release_time = 0.05 # @param {type:\"slider\", min:0.0, max:1.0, step:0.01}\n", + "\n", + "gain_db = 0.0 # @param {type:\"slider\", min:-20.0, max:20.0, step:0.1}\n", + "\n", + "distortion_gain = 0.0 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", + "\n", + "chorus_rate = 1.5 # @param {type:\"slider\", min:0.1, max:10.0, step:0.1}\n", + "chorus_depth = 0.1 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", + "chorus_center_delay = 15.0 # @param {type:\"slider\", min:0.0, max:50.0, step:0.1}\n", + "chorus_feedback = 0.25 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", + "chorus_mix = 0.5 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", + "\n", + "bitcrush_bit_depth = 4 # @param {type:\"slider\", min:1, max:16, step:1}\n", + "\n", + "clipping_threshold = 0.5 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", + "\n", + "compressor_threshold = -20.0 # @param {type:\"slider\", min:-60.0, max:0.0, step:0.1}\n", + "compressor_ratio = 4.0 # @param {type:\"slider\", min:1.0, max:20.0, step:0.1}\n", + "compressor_attack = 0.001 # @param {type:\"slider\", min:0.0, max:0.1, step:0.001}\n", + "compressor_release = 0.1 # @param {type:\"slider\", min:0.0, max:1.0, step:0.01}\n", + "\n", + "delay_seconds = 0.1 # @param {type:\"slider\", min:0.0, max:1.0, step:0.01}\n", + "delay_feedback = 0.5 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", + "delay_mix = 0.5 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1QkabnLlF2KB" + }, + "source": [ + "# Train" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "oBzqm4JkGGa0" + }, + "outputs": [], + "source": [ + "# @title Preprocess Dataset\n", + "import os\n", + "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n", + "model_name = \"Darwin\" # @param {type:\"string\"}\n", + "dataset_path = \"/content/drive/MyDrive/Darwin_Dataset\" # @param {type:\"string\"}\n", + "\n", + "sample_rate = \"40k\" # @param [\"32k\", \"40k\", \"48k\"] {allow-input: false}\n", + "sr = int(sample_rate.rstrip(\"k\")) * 1000\n", + "cpu_cores = 2 # @param {type:\"slider\", min:1, max:2, step:1}\n", + "cut_preprocess = True # @param{type:\"boolean\"}\n", + "process_effects = False # @param{type:\"boolean\"}\n", + "noise_reduction = False # @param{type:\"boolean\"}\n", + "noise_reduction_strength = 0.7 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n", + "\n", + "!python core.py preprocess --model_name \"{model_name}\" --dataset_path \"{dataset_path}\" --sample_rate \"{sr}\" --cpu_cores \"{cpu_cores}\" --cut_preprocess \"{cut_preprocess}\" --process_effects \"{process_effects}\" --noise_reduction \"{noise_reduction}\" --noise_reduction_strength \"{noise_reduction_strength}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "zWMiMYfRJTJv" + }, + "outputs": [], + "source": [ + "# @title Extract Features\n", + "rvc_version = \"v2\" # @param [\"v2\", \"v1\"] {allow-input: false}\n", + "f0_method = \"rmvpe\" # @param [\"crepe\", \"crepe-tiny\", \"rmvpe\"] {allow-input: false}\n", + "hop_length = 128 # @param {type:\"slider\", min:1, max:512, step:0}\n", + "\n", + "sr = int(sample_rate.rstrip(\"k\")) * 1000\n", + "cpu_cores = 2 # @param {type:\"slider\", min:1, max:2, step:1}\n", + "embedder_model = \"contentvec\" # @param [\"contentvec\", \"chinese-hubert-base\", \"japanese-hubert-base\", \"korean-hubert-base\", \"custom\"] {allow-input: false}\n", + "embedder_model_custom = \"\" # @param {type:\"string\"}\n", + "\n", + "!python core.py extract --model_name \"{model_name}\" --rvc_version \"{rvc_version}\" --f0_method \"{f0_method}\" --hop_length \"{hop_length}\" --sample_rate \"{sr}\" --cpu_cores \"{cpu_cores}\" --gpu \"0\" --embedder_model \"{embedder_model}\" --embedder_model_custom \"{embedder_model_custom}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "TI6LLdIzKAIa" + }, + "outputs": [], + "source": [ + "# @title Train\n", + "import threading\n", + "import time\n", + "import os\n", + "import shutil\n", + "import hashlib\n", + "import time\n", + "\n", + "LOGS_FOLDER = \"/content/Applio/logs/\"\n", + "GOOGLE_DRIVE_PATH = \"/content/drive/MyDrive/RVC_Backup\"\n", + "\n", + "\n", + "def import_google_drive_backup():\n", + " print(\"Importing Google Drive backup...\")\n", + " for root, dirs, files in os.walk(GOOGLE_DRIVE_PATH):\n", + " for filename in files:\n", + " filepath = os.path.join(root, filename)\n", + " if os.path.isfile(filepath):\n", + " backup_filepath = os.path.join(\n", + " LOGS_FOLDER, os.path.relpath(filepath, GOOGLE_DRIVE_PATH)\n", + " )\n", + " backup_folderpath = os.path.dirname(backup_filepath)\n", + " if not os.path.exists(backup_folderpath):\n", + " os.makedirs(backup_folderpath)\n", + " print(f\"Created backup folder: {backup_folderpath}\", flush=True)\n", + " shutil.copy2(filepath, backup_filepath)\n", + " print(f\"Imported file from Google Drive backup: {filename}\")\n", + " print(\"Google Drive backup import completed.\")\n", + "\n", + "\n", + "def get_md5_hash(file_path):\n", + " hash_md5 = hashlib.md5()\n", + " with open(file_path, \"rb\") as f:\n", + " for chunk in iter(lambda: f.read(4096), b\"\"):\n", + " hash_md5.update(chunk)\n", + " return hash_md5.hexdigest()\n", + "\n", + "\n", + "if \"autobackups\" not in globals():\n", + " autobackups = False\n", + "# @markdown ### 💾 AutoBackup\n", + "cooldown = 15 # @param {type:\"slider\", min:0, max:100, step:0}\n", + "auto_backups = True # @param{type:\"boolean\"}\n", + "def backup_files():\n", + " print(\"\\nStarting backup loop...\")\n", + " last_backup_timestamps_path = os.path.join(\n", + " LOGS_FOLDER, \"last_backup_timestamps.txt\"\n", + " )\n", + " fully_updated = False\n", + "\n", + " while True:\n", + " try:\n", + " updated_files = 0\n", + " deleted_files = 0\n", + " new_files = 0\n", + " last_backup_timestamps = {}\n", + "\n", + " try:\n", + " with open(last_backup_timestamps_path, \"r\") as f:\n", + " last_backup_timestamps = dict(line.strip().split(\":\") for line in f)\n", + " except FileNotFoundError:\n", + " pass\n", + "\n", + " for root, dirs, files in os.walk(LOGS_FOLDER):\n", + " # Excluding \"zips\" and \"mute\" directories\n", + " if \"zips\" in dirs:\n", + " dirs.remove(\"zips\")\n", + " if \"mute\" in dirs:\n", + " dirs.remove(\"mute\")\n", + "\n", + " for filename in files:\n", + " if filename != \"last_backup_timestamps.txt\":\n", + " filepath = os.path.join(root, filename)\n", + " if os.path.isfile(filepath):\n", + " backup_filepath = os.path.join(\n", + " GOOGLE_DRIVE_PATH,\n", + " os.path.relpath(filepath, LOGS_FOLDER),\n", + " )\n", + " backup_folderpath = os.path.dirname(backup_filepath)\n", + " if not os.path.exists(backup_folderpath):\n", + " os.makedirs(backup_folderpath)\n", + " last_backup_timestamp = last_backup_timestamps.get(filepath)\n", + " current_timestamp = os.path.getmtime(filepath)\n", + " if (\n", + " last_backup_timestamp is None\n", + " or float(last_backup_timestamp) < current_timestamp\n", + " ):\n", + " shutil.copy2(filepath, backup_filepath)\n", + " last_backup_timestamps[filepath] = str(current_timestamp)\n", + " if last_backup_timestamp is None:\n", + " new_files += 1\n", + " else:\n", + " updated_files += 1\n", + "\n", + "\n", + " for filepath in list(last_backup_timestamps.keys()):\n", + " if not os.path.exists(filepath):\n", + " backup_filepath = os.path.join(\n", + " GOOGLE_DRIVE_PATH, os.path.relpath(filepath, LOGS_FOLDER)\n", + " )\n", + " if os.path.exists(backup_filepath):\n", + " os.remove(backup_filepath)\n", + " deleted_files += 1\n", + " del last_backup_timestamps[filepath]\n", + "\n", + "\n", + " if updated_files > 0 or deleted_files > 0 or new_files > 0:\n", + " print(f\"Backup Complete: {new_files} new, {updated_files} updated, {deleted_files} deleted.\")\n", + " fully_updated = False\n", + " elif not fully_updated:\n", + " print(\"Files are up to date.\")\n", + " fully_updated = True\n", + "\n", + " with open(last_backup_timestamps_path, \"w\") as f:\n", + " for filepath, timestamp in last_backup_timestamps.items():\n", + " f.write(f\"{filepath}:{timestamp}\\n\")\n", + "\n", + " time.sleep(cooldown if fully_updated else 0.1)\n", + "\n", + "\n", + " except Exception as error:\n", + " print(f\"An error occurred during backup: {error}\")\n", + "\n", + "\n", + "if autobackups:\n", + " autobackups = False\n", + " print(\"Autobackup Disabled\")\n", + "else:\n", + " autobackups = True\n", + " print(\"Autobackup Enabled\") \n", + "# @markdown ### ⚙️ Train Settings\n", + "total_epoch = 800 # @param {type:\"integer\"}\n", + "batch_size = 15 # @param {type:\"slider\", min:1, max:25, step:0}\n", + "gpu = 0\n", + "sr = int(sample_rate.rstrip(\"k\")) * 1000\n", + "pretrained = True # @param{type:\"boolean\"}\n", + "cleanup = False # @param{type:\"boolean\"}\n", + "cache_data_in_gpu = False # @param{type:\"boolean\"}\n", + "tensorboard = True # @param{type:\"boolean\"}\n", + "# @markdown ### ➡️ Choose how many epochs your model will be stored\n", + "save_every_epoch = 10 # @param {type:\"slider\", min:1, max:100, step:0}\n", + "save_only_latest = False # @param{type:\"boolean\"}\n", + "save_every_weights = False # @param{type:\"boolean\"}\n", + "overtraining_detector = False # @param{type:\"boolean\"}\n", + "overtraining_threshold = 50 # @param {type:\"slider\", min:1, max:100, step:0}\n", + "# @markdown ### ❓ Optional\n", + "# @markdown In case you select custom pretrained, you will have to download the pretraineds and enter the path of the pretraineds.\n", + "custom_pretrained = False # @param{type:\"boolean\"}\n", + "g_pretrained_path = \"/content/Applio/rvc/models/pretraineds/pretraineds_custom/G48k.pth\" # @param {type:\"string\"}\n", + "d_pretrained_path = \"/content/Applio/rvc/models/pretraineds/pretraineds_custom/D48k.pth\" # @param {type:\"string\"}\n", + "\n", + "if \"pretrained\" not in globals():\n", + " pretrained = True\n", + "\n", + "if \"custom_pretrained\" not in globals():\n", + " custom_pretrained = False\n", + "\n", + "if \"g_pretrained_path\" not in globals():\n", + " g_pretrained_path = \"Custom Path\"\n", + "\n", + "if \"d_pretrained_path\" not in globals():\n", + " d_pretrained_path = \"Custom Path\"\n", + "\n", + "\n", + "def start_train():\n", + " if tensorboard == True:\n", + " %load_ext tensorboard\n", + " %tensorboard --logdir /content/Applio/logs/\n", + " !python core.py train --model_name \"{model_name}\" --rvc_version \"{rvc_version}\" --save_every_epoch \"{save_every_epoch}\" --save_only_latest \"{save_only_latest}\" --save_every_weights \"{save_every_weights}\" --total_epoch \"{total_epoch}\" --sample_rate \"{sr}\" --batch_size \"{batch_size}\" --gpu \"{gpu}\" --pretrained \"{pretrained}\" --custom_pretrained \"{custom_pretrained}\" --g_pretrained_path \"{g_pretrained_path}\" --d_pretrained_path \"{d_pretrained_path}\" --overtraining_detector \"{overtraining_detector}\" --overtraining_threshold \"{overtraining_threshold}\" --cleanup \"{cleanup}\" --cache_data_in_gpu \"{cache_data_in_gpu}\"\n", + "\n", + "\n", + "server_thread = threading.Thread(target=start_train)\n", + "server_thread.start()\n", + "\n", + "if auto_backups:\n", + " backup_files()\n", + "else:\n", + " while True:\n", + " time.sleep(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "bHLs5AT4Q1ck" + }, + "outputs": [], + "source": [ + "# @title Generate index file\n", + "index_algorithm = \"Auto\" # @param [\"Auto\", \"Faiss\", \"KMeans\"] {allow-input: false}\n", + "!python core.py index --model_name \"{model_name}\" --rvc_version \"{rvc_version}\" --index_algorithm \"{index_algorithm}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "X_eU_SoiHIQg" + }, + "outputs": [], + "source": [ + "# @title Save model\n", + "# @markdown Enter the name of the model and the steps. You can find it in your `/content/Applio/logs` folder.\n", + "%cd /content\n", + "import os, shutil, sys\n", + "\n", + "model_name = \"Darwin\" # @param {type:\"string\"}\n", + "model_epoch = 800 # @param {type:\"integer\"}\n", + "save_big_file = False # @param {type:\"boolean\"}\n", + "\n", + "if os.path.exists(\"/content/zips\"):\n", + " shutil.rmtree(\"/content/zips\")\n", + "print(\"Removed zips.\")\n", + "\n", + "os.makedirs(f\"/content/zips/{model_name}/\", exist_ok=True)\n", + "print(\"Created zips.\")\n", + "\n", + "logs_folder = f\"/content/Applio/logs/{model_name}/\"\n", + "weight_file = None\n", + "if not os.path.exists(logs_folder):\n", + " print(f\"Model folder not found.\")\n", + " sys.exit(\"\")\n", + "\n", + "for filename in os.listdir(logs_folder):\n", + " if filename.startswith(f\"{model_name}_{model_epoch}e\") and filename.endswith(\".pth\"):\n", + " weight_file = filename\n", + " break\n", + "if weight_file is None:\n", + " print(\"There is no weight file with that name\")\n", + " sys.exit(\"\")\n", + "if not save_big_file:\n", + " !cp {logs_folder}added_*.index /content/zips/{model_name}/\n", + " !cp {logs_folder}total_*.npy /content/zips/{model_name}/\n", + " !cp {logs_folder}{weight_file} /content/zips/{model_name}/\n", + " %cd /content/zips\n", + " !zip -r {model_name}.zip {model_name}\n", + "if save_big_file:\n", + " %cd /content/Applio\n", + " latest_steps = -1\n", + " logs_folder = \"./logs/\" + model_name\n", + " for filename in os.listdir(logs_folder):\n", + " if filename.startswith(\"G_\") and filename.endswith(\".pth\"):\n", + " steps = int(filename.split(\"_\")[1].split(\".\")[0])\n", + " if steps > latest_steps:\n", + " latest_steps = steps\n", + " MODELZIP = model_name + \".zip\"\n", + " !mkdir -p /content/zips\n", + " ZIPFILEPATH = os.path.join(\"/content/zips\", MODELZIP)\n", + " for filename in os.listdir(logs_folder):\n", + " if \"G_\" in filename or \"D_\" in filename:\n", + " if str(latest_steps) in filename:\n", + " !zip -r {ZIPFILEPATH} {os.path.join(logs_folder, filename)}\n", + " else:\n", + " !zip -r {ZIPFILEPATH} {os.path.join(logs_folder, filename)}\n", + "\n", + "!mkdir -p /content/drive/MyDrive/RVC_Backup/\n", + "shutil.move(\n", + " f\"/content/zips/{model_name}.zip\",\n", + " f\"/content/drive/MyDrive/RVC_Backup/{model_name}.zip\",\n", + ")\n", + "%cd /content/Applio\n", + "shutil.rmtree(\"/content/zips\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OaKoymXsyEYN" + }, + "source": [ + "# Resume-training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "d3KgLAYnyHkP" + }, + "outputs": [], + "source": [ + "# @title Load a Backup\n", + "from google.colab import drive\n", + "import os\n", + "import shutil\n", + "\n", + "# @markdown Put the exact name you put as your Model Name in Applio.\n", + "modelname = \"My-Project\" # @param {type:\"string\"}\n", + "source_path = \"/content/drive/MyDrive/RVC_Backup/\" + modelname\n", + "destination_path = \"/content/Applio/logs/\" + modelname\n", + "backup_timestamps_file = \"last_backup_timestamps.txt\"\n", + "if not os.path.exists(source_path):\n", + " print(\n", + " \"The model folder does not exist. Please verify the name is correct or check your Google Drive.\"\n", + " )\n", + "else:\n", + " time_ = os.path.join(\"/content/drive/MyDrive/RVC_Backup/\", backup_timestamps_file)\n", + " time__ = os.path.join(\"/content/Applio/logs/\", backup_timestamps_file)\n", + " if os.path.exists(time_):\n", + " shutil.copy(time_, time__)\n", + " shutil.copytree(source_path, destination_path)\n", + " print(\"Model backup loaded successfully.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "sc9DzvRCyJ2d" + }, + "outputs": [], + "source": [ + "# @title Set training variables\n", + "# @markdown ### ➡️ Use the same as you did previously\n", + "model_name = \"Darwin\" # @param {type:\"string\"}\n", + "sample_rate = \"40k\" # @param [\"32k\", \"40k\", \"48k\"] {allow-input: false}\n", + "rvc_version = \"v2\" # @param [\"v2\", \"v1\"] {allow-input: false}\n", + "f0_method = \"rmvpe\" # @param [\"crepe\", \"crepe-tiny\", \"rmvpe\"] {allow-input: false}\n", + "hop_length = 128 # @param {type:\"slider\", min:1, max:512, step:0}\n", + "sr = int(sample_rate.rstrip(\"k\")) * 1000" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "ymMCTSD6m8qV" + ], + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/assets/discord_presence.py b/assets/discord_presence.py index 5600487b9..76aa3054f 100644 --- a/assets/discord_presence.py +++ b/assets/discord_presence.py @@ -1,7 +1,5 @@ from pypresence import Presence import datetime as dt -import time - class RichPresenceManager: def __init__(self): @@ -32,7 +30,7 @@ def update_presence(self): details="Open ecosystem for voice cloning", buttons=[ {"label": "Home", "url": "https://applio.org"}, - {"label": "Download", "url": "https://applio.org/download"}, + {"label": "Download", "url": "https://applio.org/products/applio"}, ], large_image="logo", large_text="Experimenting with applio", diff --git a/assets/zluda/README.md b/assets/zluda/README.md deleted file mode 100644 index fb68abaf2..000000000 --- a/assets/zluda/README.md +++ /dev/null @@ -1,70 +0,0 @@ - -## Installation and Setup Instructions - -Zluda is a CUDA emulator that supports a select number of modern AMD GPUs. The following guide is for Windows installation of Zluda. - -### 1. Install VC++ Runtime - -Download and install the VC++ Runtime from [this link](https://aka.ms/vs/17/release/vc_redist.x64.exe). - - -### 2. Install HIP SDK - -Read the [System Requirements](https://rocm.docs.amd.com/projects/install-on-windows/en/develop/reference/system-requirements.html) - -Check the *"Windows-supported GPUs"* section to determine the correct installation steps: - -2.1 If your GPU has a green checkbox in the HIP SDK column: - -- Install either v6.1.2 or v5.7.1 HIP SDK from [AMD ROCm Hub](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html) - -2.2 If your GPU is RX 6600, 6600XT, 6650XT, 6700, 6700XT, 6750XT: -- Install v5.7.1 HIP SDK from [AMD ROCm Hub](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html) -- For 6700, 6700XT, 6750XT, download [gfx1031 archive](https://github.com/brknsoul/ROCmLibs/raw/main/Optimised_ROCmLibs_gfx1031.7z) -- For 6600, 6600XT, 6650XT, download [gfx1032 archive](https://github.com/brknsoul/ROCmLibs/raw/main/Optimised_ROCmLibs_gfx1032.7z) -**Steps:** -a. Rename `C:\Program Files\AMD\ROCm\5.7\bin\rocblas\library` to `library.old` -b. Create a new folder named `library` -c. Unzip the content of the archive into that folder - -2.3 For all other AMD GPUs: find gfxNNNN value for your GPU by googling "techpowerup your_gpu" (listed under "Shader ISA" on the page). - -2.3.1 For `gfx803, gfx900, gfx906, gfx1010, gfx1011, gfx1012, gfx1030, gfx1100, gfx1101, gfx1102` GPUs: -- Install v5.7.1 HIP SDK from [AMD ROCm Hub](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html) -- Download [this archive](https://github.com/brknsoul/ROCmLibs/raw/main/ROCmLibs.7z) - **Steps:** - a. Rename `C:\Program Files\AMD\ROCm\5.7\bin\rocblas\library` to `library.old` - b. Unzip the content of the archive into `C:\Program Files\AMD\ROCm\5.7\bin\rocblas\` - -2.3.2 Other GPUs -- Visit [this repository with a collection of tensile libraries](https://github.com/likelovewant/ROCmLibs-for-gfx1103-AMD780M-APU) -- Follow the description there. - -### 3. Installing Applio -3.1 Install [Python 3.10.11] https://www.python.org/ftp/python/3.10.11/python-3.10.11-amd64.exe - - check "Add Python to Path" -3.2 Download Applio v3.2.5 or higher source code zip Applio's release page, unzip to the desired folder. -3.3 Edit `run-install.bat` and update the Torch URL from `cu121` to `cu118` - ```pip install torch==2.3.1 torchvision torchaudio --upgrade --index-url https://download.pytorch.org/whl/cu121``` -3.4 Execute `run-install.bat` to install the required python libraries. -3.5. If installation completes without errors proceed to the next step - -### 4. Download Zluda and Patch Torch Libraries -4.1 move all .bat files from `assets\zluda`to root Applio folder -4.2 For HIP SDK 5.7: -- Run `patch_zluda_hip57.bat`. -- Add `C:\Program Files\AMD\ROCm\5.7\bin` to your system's Path environment variable. - -4.3 For HIP SDK 6.1:** -- Run `patch_zluda_hip61.bat`. -- Add `C:\Program Files\AMD\ROCm\6.1\bin` to your system's Path environment variable. - -### 5. Starting Applio - -It is assumed your primary AMD GPU has index 0. If by some reason your iGPU is listed first under 'Display Adapters' in Device manager, edit the `run-applio-amd.bat` file and change the value from "0" to "1". - -Execute `run-applio-amd.bat` to start Applio. - -### 6. Initial Compilation - -Any time Zluda emulator meets a previously unseen computational task it compiles the kernel code to support it. During this time there's no output and Applio appears to be frozen. The compilation time takes 15..20 minutes. diff --git a/core.py b/core.py index 1f9c0fc55..dc3889c19 100644 --- a/core.py +++ b/core.py @@ -33,7 +33,7 @@ def load_voices_data(): voices_data = load_voices_data() -locales = list({voice["Locale"] for voice in voices_data}) +locales = list({voice["ShortName"] for voice in voices_data}) @lru_cache(maxsize=None) @@ -69,7 +69,6 @@ def run_infer_script( clean_audio: bool, clean_strength: float, export_format: str, - upscale_audio: bool, f0_file: str, embedder_model: str, embedder_model_custom: str = None, @@ -134,7 +133,6 @@ def run_infer_script( "clean_audio": clean_audio, "clean_strength": clean_strength, "export_format": export_format, - "upscale_audio": upscale_audio, "f0_file": f0_file, "embedder_model": embedder_model, "embedder_model_custom": embedder_model_custom, @@ -207,7 +205,6 @@ def run_batch_infer_script( clean_audio: bool, clean_strength: float, export_format: str, - upscale_audio: bool, f0_file: str, embedder_model: str, embedder_model_custom: str = None, @@ -272,7 +269,6 @@ def run_batch_infer_script( "clean_audio": clean_audio, "clean_strength": clean_strength, "export_format": export_format, - "upscale_audio": upscale_audio, "f0_file": f0_file, "embedder_model": embedder_model, "embedder_model_custom": embedder_model_custom, @@ -348,7 +344,6 @@ def run_tts_script( clean_audio: bool, clean_strength: float, export_format: str, - upscale_audio: bool, f0_file: str, embedder_model: str, embedder_model_custom: str = None, @@ -394,7 +389,6 @@ def run_tts_script( clean_audio=clean_audio, clean_strength=clean_strength, export_format=export_format, - upscale_audio=upscale_audio, f0_file=f0_file, embedder_model=embedder_model, embedder_model_custom=embedder_model_custom, @@ -824,14 +818,6 @@ def parse_arguments(): help=embedder_model_custom_description, default=None, ) - upscale_audio_description = "Upscale the input audio to a higher quality before processing. This can improve the overall quality of the output, especially for low-quality input audio." - infer_parser.add_argument( - "--upscale_audio", - type=lambda x: bool(strtobool(x)), - choices=[True, False], - help=upscale_audio_description, - default=False, - ) f0_file_description = "Full path to an external F0 file (.f0). This allows you to use pre-computed pitch values for the input audio." infer_parser.add_argument( "--f0_file", @@ -1346,13 +1332,6 @@ def parse_arguments(): help=embedder_model_custom_description, default=None, ) - batch_infer_parser.add_argument( - "--upscale_audio", - type=lambda x: bool(strtobool(x)), - choices=[True, False], - help=upscale_audio_description, - default=False, - ) batch_infer_parser.add_argument( "--f0_file", type=str, @@ -1840,13 +1819,6 @@ def parse_arguments(): help=embedder_model_custom_description, default=None, ) - tts_parser.add_argument( - "--upscale_audio", - type=lambda x: bool(strtobool(x)), - choices=[True, False], - help=upscale_audio_description, - default=False, - ) tts_parser.add_argument( "--f0_file", type=str, @@ -2317,7 +2289,6 @@ def main(): export_format=args.export_format, embedder_model=args.embedder_model, embedder_model_custom=args.embedder_model_custom, - upscale_audio=args.upscale_audio, f0_file=args.f0_file, formant_shifting=args.formant_shifting, formant_qfrency=args.formant_qfrency, @@ -2381,7 +2352,6 @@ def main(): export_format=args.export_format, embedder_model=args.embedder_model, embedder_model_custom=args.embedder_model_custom, - upscale_audio=args.upscale_audio, f0_file=args.f0_file, formant_shifting=args.formant_shifting, formant_qfrency=args.formant_qfrency, @@ -2437,8 +2407,8 @@ def main(): protect=args.protect, hop_length=args.hop_length, f0_method=args.f0_method, - input_path=args.input_path, - output_path=args.output_path, + output_tts_path=args.output_tts_path, + output_rvc_path=args.output_rvc_path, pth_path=args.pth_path, index_path=args.index_path, split_audio=args.split_audio, @@ -2449,7 +2419,6 @@ def main(): export_format=args.export_format, embedder_model=args.embedder_model, embedder_model_custom=args.embedder_model_custom, - upscale_audio=args.upscale_audio, f0_file=args.f0_file, ) elif args.mode == "preprocess": diff --git a/requirements.txt b/requirements.txt index d7d00c291..2d6a7e38a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,20 +6,14 @@ numpy==1.23.5 requests>=2.31.0,<2.32.0 tqdm wget -pydantic==2.8.2 -fastapi==0.112.0 -starlette==0.37.2 # Audio processing ffmpeg-python>=0.2.0 faiss-cpu==1.7.3 librosa==0.9.2 -pyworld==0.3.4 scipy==1.11.1 soundfile==0.12.1 -praat-parselmouth noisereduce -versatile-audio-upscaler pedalboard stftpitchshift @@ -44,11 +38,8 @@ gradio==4.43.0 # Miscellaneous utilities certifi>=2023.07.22; sys_platform == 'darwin' antlr4-python3-runtime==4.8; sys_platform == 'darwin' -ffmpy==0.3.1 tensorboardX edge-tts==6.1.9 pypresence beautifulsoup4 -flask -local-attention - +flask \ No newline at end of file diff --git a/rvc/infer/infer.py b/rvc/infer/infer.py index e53942e6f..ae78283db 100644 --- a/rvc/infer/infer.py +++ b/rvc/infer/infer.py @@ -108,7 +108,7 @@ def convert_audio_format(input_path, output_path, output_format): """ try: if output_format != "WAV": - print(f"Converting audio to {output_format} format...") + print(f"Saving audio as {output_format}...") audio, sample_rate = librosa.load(input_path, sr=None) common_sample_rates = [ 8000, @@ -255,10 +255,6 @@ def convert_audio( start_time = time.time() print(f"Converting audio '{audio_input_path}'...") - if upscale_audio == True: - from audio_upscaler import upscale - - upscale(audio_input_path, audio_input_path) audio = load_audio_infer( audio_input_path, 16000, diff --git a/rvc/infer/pipeline.py b/rvc/infer/pipeline.py index 0f570ee2e..51b97d595 100644 --- a/rvc/infer/pipeline.py +++ b/rvc/infer/pipeline.py @@ -286,7 +286,7 @@ def get_f0_hybrid( if methods_str: methods = [method.strip() for method in methods_str.group(1).split("+")] f0_computation_stack = [] - print(f"Calculating f0 pitch estimations for methods {str(methods)}") + print(f"Calculating f0 pitch estimations for methods: {', '.join(methods)}") x = x.astype(np.float32) x /= np.quantile(np.abs(x), 0.999) for method in methods: diff --git a/rvc/lib/algorithm/attentions.py b/rvc/lib/algorithm/attentions.py index c4f47f8b9..37367ada9 100644 --- a/rvc/lib/algorithm/attentions.py +++ b/rvc/lib/algorithm/attentions.py @@ -1,6 +1,5 @@ import math import torch - from rvc.lib.algorithm.commons import convert_pad_shape @@ -33,192 +32,157 @@ def __init__( proximal_init=False, ): super().__init__() - assert channels % n_heads == 0 + assert ( + channels % n_heads == 0 + ), "Channels must be divisible by the number of heads." self.channels = channels self.out_channels = out_channels self.n_heads = n_heads - self.p_dropout = p_dropout + self.k_channels = channels // n_heads self.window_size = window_size - self.heads_share = heads_share self.block_length = block_length self.proximal_bias = proximal_bias - self.proximal_init = proximal_init - self.attn = None - self.k_channels = channels // n_heads + # Define projections self.conv_q = torch.nn.Conv1d(channels, channels, 1) self.conv_k = torch.nn.Conv1d(channels, channels, 1) self.conv_v = torch.nn.Conv1d(channels, channels, 1) self.conv_o = torch.nn.Conv1d(channels, out_channels, 1) + self.drop = torch.nn.Dropout(p_dropout) - if window_size is not None: + # Relative positional encodings + if window_size: n_heads_rel = 1 if heads_share else n_heads rel_stddev = self.k_channels**-0.5 self.emb_rel_k = torch.nn.Parameter( - torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) + torch.randn(n_heads_rel, 2 * window_size + 1, self.k_channels) * rel_stddev ) self.emb_rel_v = torch.nn.Parameter( - torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) + torch.randn(n_heads_rel, 2 * window_size + 1, self.k_channels) * rel_stddev ) + # Initialize weights torch.nn.init.xavier_uniform_(self.conv_q.weight) torch.nn.init.xavier_uniform_(self.conv_k.weight) torch.nn.init.xavier_uniform_(self.conv_v.weight) + torch.nn.init.xavier_uniform_(self.conv_o.weight) + if proximal_init: with torch.no_grad(): self.conv_k.weight.copy_(self.conv_q.weight) self.conv_k.bias.copy_(self.conv_q.bias) def forward(self, x, c, attn_mask=None): - q = self.conv_q(x) - k = self.conv_k(c) - v = self.conv_v(c) + # Compute query, key, value projections + q, k, v = self.conv_q(x), self.conv_k(c), self.conv_v(c) + # Compute attention x, self.attn = self.attention(q, k, v, mask=attn_mask) - x = self.conv_o(x) - return x + # Final output projection + return self.conv_o(x) def attention(self, query, key, value, mask=None): - # reshape [b, d, t] -> [b, n_h, t, d_k] + # Reshape and compute scaled dot-product attention b, d, t_s, t_t = (*key.size(), query.size(2)) query = query.view(b, self.n_heads, self.k_channels, t_t).transpose(2, 3) key = key.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3) value = value.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3) scores = torch.matmul(query / math.sqrt(self.k_channels), key.transpose(-2, -1)) - if self.window_size is not None: - assert ( - t_s == t_t - ), "Relative attention is only available for self-attention." - key_relative_embeddings = self._get_relative_embeddings(self.emb_rel_k, t_s) - rel_logits = self._matmul_with_relative_keys( - query / math.sqrt(self.k_channels), key_relative_embeddings - ) - scores_local = self._relative_position_to_absolute_position(rel_logits) - scores = scores + scores_local + + if self.window_size: + assert t_s == t_t, "Relative attention only supports self-attention." + scores += self._compute_relative_scores(query, t_s) + if self.proximal_bias: - assert t_s == t_t, "Proximal bias is only available for self-attention." - scores = scores + self._attention_bias_proximal(t_s).to( - device=scores.device, dtype=scores.dtype - ) + assert t_s == t_t, "Proximal bias only supports self-attention." + scores += self._attention_bias_proximal(t_s).to(scores.device, scores.dtype) + if mask is not None: scores = scores.masked_fill(mask == 0, -1e4) - if self.block_length is not None: - assert ( - t_s == t_t - ), "Local attention is only available for self-attention." + if self.block_length: block_mask = ( torch.ones_like(scores) .triu(-self.block_length) .tril(self.block_length) ) scores = scores.masked_fill(block_mask == 0, -1e4) - p_attn = torch.nn.functional.softmax(scores, dim=-1) # [b, n_h, t_t, t_s] - p_attn = self.drop(p_attn) + + # Apply softmax and dropout + p_attn = self.drop(torch.nn.functional.softmax(scores, dim=-1)) + + # Compute attention output output = torch.matmul(p_attn, value) - if self.window_size is not None: - relative_weights = self._absolute_position_to_relative_position(p_attn) - value_relative_embeddings = self._get_relative_embeddings( - self.emb_rel_v, t_s - ) - output = output + self._matmul_with_relative_values( - relative_weights, value_relative_embeddings - ) - output = ( - output.transpose(2, 3).contiguous().view(b, d, t_t) - ) # [b, n_h, t_t, d_k] -> [b, d, t_t] - return output, p_attn + if self.window_size: + output += self._apply_relative_values(p_attn, t_s) + + return output.transpose(2, 3).contiguous().view(b, d, t_t), p_attn + + def _compute_relative_scores(self, query, length): + rel_emb = self._get_relative_embeddings(self.emb_rel_k, length) + rel_logits = self._matmul_with_relative_keys( + query / math.sqrt(self.k_channels), rel_emb + ) + return self._relative_position_to_absolute_position(rel_logits) + + def _apply_relative_values(self, p_attn, length): + rel_weights = self._absolute_position_to_relative_position(p_attn) + rel_emb = self._get_relative_embeddings(self.emb_rel_v, length) + return self._matmul_with_relative_values(rel_weights, rel_emb) + + # Helper methods def _matmul_with_relative_values(self, x, y): - """ - x: [b, h, l, m] - y: [h or 1, m, d] - ret: [b, h, l, d] - """ - ret = torch.matmul(x, y.unsqueeze(0)) - return ret + return torch.matmul(x, y.unsqueeze(0)) def _matmul_with_relative_keys(self, x, y): - """ - x: [b, h, l, d] - y: [h or 1, m, d] - ret: [b, h, l, m] - """ - ret = torch.matmul(x, y.unsqueeze(0).transpose(-2, -1)) - return ret - - def _get_relative_embeddings(self, relative_embeddings, length): - # Pad first before slice to avoid using cond ops. + return torch.matmul(x, y.unsqueeze(0).transpose(-2, -1)) + + def _get_relative_embeddings(self, embeddings, length): pad_length = max(length - (self.window_size + 1), 0) - slice_start_position = max((self.window_size + 1) - length, 0) - slice_end_position = slice_start_position + 2 * length - 1 + start = max((self.window_size + 1) - length, 0) + end = start + 2 * length - 1 + if pad_length > 0: - padded_relative_embeddings = torch.nn.functional.pad( - relative_embeddings, + embeddings = torch.nn.functional.pad( + embeddings, convert_pad_shape([[0, 0], [pad_length, pad_length], [0, 0]]), ) - else: - padded_relative_embeddings = relative_embeddings - used_relative_embeddings = padded_relative_embeddings[ - :, slice_start_position:slice_end_position - ] - return used_relative_embeddings + return embeddings[:, start:end] def _relative_position_to_absolute_position(self, x): - """ - x: [b, h, l, 2*l-1] - ret: [b, h, l, l] - """ batch, heads, length, _ = x.size() - - # Concat columns of pad to shift from relative to absolute indexing. x = torch.nn.functional.pad( x, convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, 1]]) ) - - # Concat extra elements so to add up to shape (len+1, 2*len-1). - x_flat = x.view([batch, heads, length * 2 * length]) + x_flat = x.view(batch, heads, length * 2 * length) x_flat = torch.nn.functional.pad( x_flat, convert_pad_shape([[0, 0], [0, 0], [0, length - 1]]) ) - - # Reshape and slice out the padded elements. - x_final = x_flat.view([batch, heads, length + 1, 2 * length - 1])[ + return x_flat.view(batch, heads, length + 1, 2 * length - 1)[ :, :, :length, length - 1 : ] - return x_final def _absolute_position_to_relative_position(self, x): - """ - x: [b, h, l, l] - ret: [b, h, l, 2*l-1] - """ batch, heads, length, _ = x.size() - # padd along column x = torch.nn.functional.pad( x, convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, length - 1]]) ) - x_flat = x.view([batch, heads, length**2 + length * (length - 1)]) - # add 0's in the beginning that will skew the elements after reshape + x_flat = x.view(batch, heads, length**2 + length * (length - 1)) x_flat = torch.nn.functional.pad( x_flat, convert_pad_shape([[0, 0], [0, 0], [length, 0]]) ) - x_final = x_flat.view([batch, heads, length, 2 * length])[:, :, :, 1:] - return x_final + return x_flat.view(batch, heads, length, 2 * length)[:, :, :, 1:] def _attention_bias_proximal(self, length): - """Bias for self-attention to encourage attention to close positions. - Args: - length: an integer scalar. - """ r = torch.arange(length, dtype=torch.float32) - diff = torch.unsqueeze(r, 0) - torch.unsqueeze(r, 1) - return torch.unsqueeze(torch.unsqueeze(-torch.log1p(torch.abs(diff)), 0), 0) + diff = r.unsqueeze(0) - r.unsqueeze(1) + return -torch.log1p(torch.abs(diff)).unsqueeze(0).unsqueeze(0) class FFN(torch.nn.Module): @@ -246,47 +210,34 @@ def __init__( causal=False, ): super().__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.filter_channels = filter_channels - self.kernel_size = kernel_size - self.p_dropout = p_dropout - self.activation = activation - self.causal = causal - - if causal: - self.padding = self._causal_padding - else: - self.padding = self._same_padding + self.padding_fn = self._causal_padding if causal else self._same_padding self.conv_1 = torch.nn.Conv1d(in_channels, filter_channels, kernel_size) self.conv_2 = torch.nn.Conv1d(filter_channels, out_channels, kernel_size) self.drop = torch.nn.Dropout(p_dropout) + self.activation = activation + def forward(self, x, x_mask): - x = self.conv_1(self.padding(x * x_mask)) - if self.activation == "gelu": - x = x * torch.sigmoid(1.702 * x) - else: - x = torch.relu(x) + x = self.conv_1(self.padding_fn(x * x_mask)) + x = self._apply_activation(x) x = self.drop(x) - x = self.conv_2(self.padding(x * x_mask)) + x = self.conv_2(self.padding_fn(x * x_mask)) return x * x_mask + def _apply_activation(self, x): + if self.activation == "gelu": + return x * torch.sigmoid(1.702 * x) + return torch.relu(x) + def _causal_padding(self, x): - if self.kernel_size == 1: - return x - pad_l = self.kernel_size - 1 - pad_r = 0 - padding = [[0, 0], [0, 0], [pad_l, pad_r]] - x = torch.nn.functional.pad(x, convert_pad_shape(padding)) - return x + pad_l, pad_r = self.conv_1.kernel_size[0] - 1, 0 + return torch.nn.functional.pad( + x, convert_pad_shape([[0, 0], [0, 0], [pad_l, pad_r]]) + ) def _same_padding(self, x): - if self.kernel_size == 1: - return x - pad_l = (self.kernel_size - 1) // 2 - pad_r = self.kernel_size // 2 - padding = [[0, 0], [0, 0], [pad_l, pad_r]] - x = torch.nn.functional.pad(x, convert_pad_shape(padding)) - return x + pad = (self.conv_1.kernel_size[0] - 1) // 2 + return torch.nn.functional.pad( + x, convert_pad_shape([[0, 0], [0, 0], [pad, pad]]) + ) diff --git a/rvc/lib/algorithm/discriminators.py b/rvc/lib/algorithm/discriminators.py index 23f8c6894..99251ad6c 100644 --- a/rvc/lib/algorithm/discriminators.py +++ b/rvc/lib/algorithm/discriminators.py @@ -15,55 +15,16 @@ class MultiPeriodDiscriminator(torch.nn.Module): the input signal at different periods. Args: + periods (str): Periods of the discriminator. V1 = [2, 3, 5, 7, 11, 17], V2 = [2, 3, 5, 7, 11, 17, 23, 37]. use_spectral_norm (bool): Whether to use spectral normalization. Defaults to False. """ - def __init__(self, use_spectral_norm=False): + def __init__(self, version, use_spectral_norm=False): super(MultiPeriodDiscriminator, self).__init__() - periods = [2, 3, 5, 7, 11, 17] - self.discriminators = torch.nn.ModuleList( - [DiscriminatorS(use_spectral_norm=use_spectral_norm)] - + [DiscriminatorP(p, use_spectral_norm=use_spectral_norm) for p in periods] + periods = ( + [2, 3, 5, 7, 11, 17] if version == "v1" else [2, 3, 5, 7, 11, 17, 23, 37] ) - - def forward(self, y, y_hat): - """ - Forward pass of the multi-period discriminator. - - Args: - y (torch.Tensor): Real audio signal. - y_hat (torch.Tensor): Fake audio signal. - """ - y_d_rs, y_d_gs, fmap_rs, fmap_gs = [], [], [], [] - for d in self.discriminators: - y_d_r, fmap_r = d(y) - y_d_g, fmap_g = d(y_hat) - y_d_rs.append(y_d_r) - y_d_gs.append(y_d_g) - fmap_rs.append(fmap_r) - fmap_gs.append(fmap_g) - - return y_d_rs, y_d_gs, fmap_rs, fmap_gs - - -class MultiPeriodDiscriminatorV2(torch.nn.Module): - """ - Multi-period discriminator V2. - - This class implements a multi-period discriminator V2, which is used - to discriminate between real and fake audio signals. The discriminator - is composed of a series of convolutional layers that are applied to - the input signal at different periods. - - Args: - use_spectral_norm (bool): Whether to use spectral normalization. - Defaults to False. - """ - - def __init__(self, use_spectral_norm=False): - super(MultiPeriodDiscriminatorV2, self).__init__() - periods = [2, 3, 5, 7, 11, 17, 23, 37] self.discriminators = torch.nn.ModuleList( [DiscriminatorS(use_spectral_norm=use_spectral_norm)] + [DiscriminatorP(p, use_spectral_norm=use_spectral_norm) for p in periods] @@ -71,7 +32,7 @@ def __init__(self, use_spectral_norm=False): def forward(self, y, y_hat): """ - Forward pass of the multi-period discriminator V2. + Forward pass of the multi-period discriminator. Args: y (torch.Tensor): Real audio signal. diff --git a/rvc/lib/algorithm/encoders.py b/rvc/lib/algorithm/encoders.py index d0823a47e..e52f9e7d6 100644 --- a/rvc/lib/algorithm/encoders.py +++ b/rvc/lib/algorithm/encoders.py @@ -31,7 +31,6 @@ def __init__( kernel_size=1, p_dropout=0.0, window_size=10, - **kwargs ): super().__init__() self.hidden_channels = hidden_channels diff --git a/rvc/lib/algorithm/generators.py b/rvc/lib/algorithm/generators.py index e08023de9..5979e99f4 100644 --- a/rvc/lib/algorithm/generators.py +++ b/rvc/lib/algorithm/generators.py @@ -1,4 +1,5 @@ import torch +import numpy as np from torch.nn.utils import remove_weight_norm from torch.nn.utils.parametrizations import weight_norm from typing import Optional @@ -8,7 +9,7 @@ class Generator(torch.nn.Module): - """Generator for synthesizing audio. Optimized for performance and quality. + """Generator for synthesizing audio. Args: initial_channel (int): Number of channels in the initial convolutional layer. @@ -107,93 +108,123 @@ def remove_weight_norm(self): for l in self.resblocks: l.remove_weight_norm() - -class SineGen(torch.nn.Module): - """Sine wave generator. +class SineGenerator(torch.nn.Module): + """ + A sine wave generator that synthesizes waveforms with optional harmonic overtones and noise. Args: - samp_rate (int): Sampling rate in Hz. - harmonic_num (int, optional): Number of harmonic overtones. Defaults to 0. - sine_amp (float, optional): Amplitude of sine waveform. Defaults to 0.1. - noise_std (float, optional): Standard deviation of Gaussian noise. Defaults to 0.003. - voiced_threshold (float, optional): F0 threshold for voiced/unvoiced classification. Defaults to 0. - flag_for_pulse (bool, optional): Whether this SineGen is used inside PulseGen. Defaults to False. + sampling_rate (int): The sampling rate in Hz. + num_harmonics (int, optional): The number of harmonic overtones to include. Defaults to 0. + sine_amplitude (float, optional): The amplitude of the sine waveform. Defaults to 0.1. + noise_stddev (float, optional): The standard deviation of Gaussian noise. Defaults to 0.003. + voiced_threshold (float, optional): F0 threshold for distinguishing voiced/unvoiced frames. Defaults to 0. """ def __init__( self, - samp_rate, - harmonic_num=0, - sine_amp=0.1, - noise_std=0.003, - voiced_threshold=0, - flag_for_pulse=False, + sampling_rate: int, + num_harmonics: int = 0, + sine_amplitude: float = 0.1, + noise_stddev: float = 0.003, + voiced_threshold: float = 0.0, ): - super(SineGen, self).__init__() - self.sine_amp = sine_amp - self.noise_std = noise_std - self.harmonic_num = harmonic_num - self.dim = self.harmonic_num + 1 - self.sample_rate = samp_rate + super(SineGenerator, self).__init__() + self.sampling_rate = sampling_rate + self.num_harmonics = num_harmonics + self.sine_amplitude = sine_amplitude + self.noise_stddev = noise_stddev self.voiced_threshold = voiced_threshold + self.waveform_dim = self.num_harmonics + 1 # fundamental + harmonics - def _f02uv(self, f0): - """Converts F0 to voiced/unvoiced signal. + def _compute_voiced_unvoiced(self, f0: torch.Tensor) -> torch.Tensor: + """ + Generate a binary mask to indicate voiced/unvoiced frames. Args: - f0 (torch.Tensor): F0 tensor with shape (batch_size, length, 1).. + f0 (torch.Tensor): Fundamental frequency tensor (batch_size, length). """ - uv = torch.ones_like(f0) - uv = uv * (f0 > self.voiced_threshold) - return uv + uv_mask = (f0 > self.voiced_threshold).float() + return uv_mask - def forward(self, f0: torch.Tensor, upp: int): - """Generates sine waves. + def _generate_sine_wave( + self, f0: torch.Tensor, upsampling_factor: int + ) -> torch.Tensor: + """ + Generate sine waves for the fundamental frequency and its harmonics. Args: - f0 (torch.Tensor): F0 tensor with shape (batch_size, length, 1). - upp (int): Upsampling factor. + f0 (torch.Tensor): Fundamental frequency tensor (batch_size, length, 1). + upsampling_factor (int): Upsampling factor. + """ + batch_size, length, _ = f0.shape + + # Create an upsampling grid + upsampling_grid = torch.arange( + 1, upsampling_factor + 1, dtype=f0.dtype, device=f0.device + ) + + # Calculate phase increments + phase_increments = (f0 / self.sampling_rate) * upsampling_grid + phase_remainder = torch.fmod(phase_increments[:, :-1, -1:] + 0.5, 1.0) - 0.5 + cumulative_phase = phase_remainder.cumsum(dim=1).fmod(1.0).to(f0.dtype) + phase_increments += torch.nn.functional.pad( + cumulative_phase, (0, 0, 1, 0), mode="constant" + ) + + # Reshape to match the sine wave shape + phase_increments = phase_increments.reshape(batch_size, -1, 1) + + # Scale for harmonics + harmonic_scale = torch.arange( + 1, self.waveform_dim + 1, dtype=f0.dtype, device=f0.device + ).reshape(1, 1, -1) + phase_increments *= harmonic_scale + + # Add random phase offset (except for the fundamental) + random_phase = torch.rand(1, 1, self.waveform_dim, device=f0.device) + random_phase[..., 0] = 0 # Fundamental frequency has no random offset + phase_increments += random_phase + + # Generate sine waves + sine_waves = torch.sin(2 * np.pi * phase_increments) + return sine_waves + + def forward(self, f0: torch.Tensor, upsampling_factor: int): + """ + Forward pass to generate sine waveforms with noise and voiced/unvoiced masking. + + Args: + f0 (torch.Tensor): Fundamental frequency tensor (batch_size, length, 1). + upsampling_factor (int): Upsampling factor. """ with torch.no_grad(): - f0 = f0[:, None].transpose(1, 2) - f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim, device=f0.device) - f0_buf[:, :, 0] = f0[:, :, 0] - f0_buf[:, :, 1:] = ( - f0_buf[:, :, 0:1] - * torch.arange(2, self.harmonic_num + 2, device=f0.device)[ - None, None, : - ] - ) - rad_values = (f0_buf / float(self.sample_rate)) % 1 - rand_ini = torch.rand( - f0_buf.shape[0], f0_buf.shape[2], device=f0_buf.device + # Expand `f0` to include waveform dimensions + f0 = f0.unsqueeze(-1) + + # Generate sine waves + sine_waves = ( + self._generate_sine_wave(f0, upsampling_factor) * self.sine_amplitude ) - rand_ini[:, 0] = 0 - rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini - tmp_over_one = torch.cumsum(rad_values, 1) - tmp_over_one *= upp - tmp_over_one = torch.nn.functional.interpolate( - tmp_over_one.transpose(2, 1), - scale_factor=float(upp), - mode="linear", - align_corners=True, - ).transpose(2, 1) - rad_values = torch.nn.functional.interpolate( - rad_values.transpose(2, 1), scale_factor=float(upp), mode="nearest" + + # Compute voiced/unvoiced mask + voiced_mask = self._compute_voiced_unvoiced(f0) + + # Upsample voiced/unvoiced mask + voiced_mask = torch.nn.functional.interpolate( + voiced_mask.transpose(2, 1), + scale_factor=float(upsampling_factor), + mode="nearest", ).transpose(2, 1) - tmp_over_one %= 1 - tmp_over_one_idx = (tmp_over_one[:, 1:, :] - tmp_over_one[:, :-1, :]) < 0 - cumsum_shift = torch.zeros_like(rad_values) - cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0 - sine_waves = torch.sin( - torch.cumsum(rad_values + cumsum_shift, dim=1) * 2 * torch.pi + + # Compute noise amplitude + noise_amplitude = voiced_mask * self.noise_stddev + (1 - voiced_mask) * ( + self.sine_amplitude / 3 ) - sine_waves = sine_waves * self.sine_amp - uv = self._f02uv(f0) - uv = torch.nn.functional.interpolate( - uv.transpose(2, 1), scale_factor=float(upp), mode="nearest" - ).transpose(2, 1) - noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3 - noise = noise_amp * torch.randn_like(sine_waves) - sine_waves = sine_waves * uv + noise - return sine_waves, uv, noise + + # Add Gaussian noise + noise = noise_amplitude * torch.randn_like(sine_waves) + + # Combine sine waves and noise + sine_waveforms = sine_waves * voiced_mask + noise + + return sine_waveforms, voiced_mask, noise diff --git a/rvc/lib/algorithm/modules.py b/rvc/lib/algorithm/modules.py index 1038356d2..8a2dad1a8 100644 --- a/rvc/lib/algorithm/modules.py +++ b/rvc/lib/algorithm/modules.py @@ -3,7 +3,7 @@ class WaveNet(torch.nn.Module): - """WaveNet residual blocks as used in WaveGlow + """WaveNet residual blocks as used in WaveGlow. Args: hidden_channels (int): Number of hidden channels. @@ -23,79 +23,87 @@ def __init__( gin_channels=0, p_dropout=0, ): - super(WaveNet, self).__init__() - assert kernel_size % 2 == 1 + super().__init__() + assert kernel_size % 2 == 1, "Kernel size must be odd for proper padding." + self.hidden_channels = hidden_channels self.kernel_size = (kernel_size,) self.dilation_rate = dilation_rate self.n_layers = n_layers self.gin_channels = gin_channels self.p_dropout = p_dropout + self.n_channels_tensor = torch.IntTensor([hidden_channels]) # Static tensor self.in_layers = torch.nn.ModuleList() self.res_skip_layers = torch.nn.ModuleList() self.drop = torch.nn.Dropout(p_dropout) - if gin_channels != 0: - cond_layer = torch.nn.Conv1d( - gin_channels, 2 * hidden_channels * n_layers, 1 - ) + # Conditional layer for global conditioning + if gin_channels: self.cond_layer = torch.nn.utils.parametrizations.weight_norm( - cond_layer, name="weight" + torch.nn.Conv1d(gin_channels, 2 * hidden_channels * n_layers, 1), + name="weight", ) + # Precompute dilations and paddings dilations = [dilation_rate**i for i in range(n_layers)] paddings = [(kernel_size * d - d) // 2 for d in dilations] + # Initialize layers for i in range(n_layers): - in_layer = torch.nn.Conv1d( - hidden_channels, - 2 * hidden_channels, - kernel_size, - dilation=dilations[i], - padding=paddings[i], + self.in_layers.append( + torch.nn.utils.parametrizations.weight_norm( + torch.nn.Conv1d( + hidden_channels, + 2 * hidden_channels, + kernel_size, + dilation=dilations[i], + padding=paddings[i], + ), + name="weight", + ) ) - in_layer = torch.nn.utils.parametrizations.weight_norm( - in_layer, name="weight" - ) - self.in_layers.append(in_layer) res_skip_channels = ( hidden_channels if i == n_layers - 1 else 2 * hidden_channels ) - - res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1) - res_skip_layer = torch.nn.utils.parametrizations.weight_norm( - res_skip_layer, name="weight" + self.res_skip_layers.append( + torch.nn.utils.parametrizations.weight_norm( + torch.nn.Conv1d(hidden_channels, res_skip_channels, 1), + name="weight", + ) ) - self.res_skip_layers.append(res_skip_layer) - def forward(self, x, x_mask, g=None, **kwargs): + def forward(self, x, x_mask, g=None): """Forward pass. Args: - x (torch.Tensor): Input tensor of shape (batch_size, hidden_channels, time_steps). - x_mask (torch.Tensor): Mask tensor of shape (batch_size, 1, time_steps). - g (torch.Tensor, optional): Conditioning tensor of shape (batch_size, gin_channels, time_steps). - Defaults to None. + x (torch.Tensor): Input tensor (batch_size, hidden_channels, time_steps). + x_mask (torch.Tensor): Mask tensor (batch_size, 1, time_steps). + g (torch.Tensor, optional): Conditioning tensor (batch_size, gin_channels, time_steps). """ - output = torch.zeros_like(x) - n_channels_tensor = torch.IntTensor([self.hidden_channels]) + output = x.clone().zero_() - if g is not None: - g = self.cond_layer(g) + # Apply conditional layer if global conditioning is provided + g = self.cond_layer(g) if g is not None else None for i in range(self.n_layers): x_in = self.in_layers[i](x) - if g is not None: - cond_offset = i * 2 * self.hidden_channels - g_l = g[:, cond_offset : cond_offset + 2 * self.hidden_channels, :] - else: - g_l = torch.zeros_like(x_in) + g_l = ( + g[ + :, + i * 2 * self.hidden_channels : (i + 1) * 2 * self.hidden_channels, + :, + ] + if g is not None + else 0 + ) - acts = fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor) + # Activation with fused Tanh-Sigmoid + acts = fused_add_tanh_sigmoid_multiply(x_in, g_l, self.n_channels_tensor) acts = self.drop(acts) + # Residual and skip connections res_skip_acts = self.res_skip_layers[i](acts) if i < self.n_layers - 1: res_acts = res_skip_acts[:, : self.hidden_channels, :] @@ -103,13 +111,14 @@ def forward(self, x, x_mask, g=None, **kwargs): output = output + res_skip_acts[:, self.hidden_channels :, :] else: output = output + res_skip_acts + return output * x_mask def remove_weight_norm(self): """Remove weight normalization from the module.""" - if self.gin_channels != 0: + if self.gin_channels: torch.nn.utils.remove_weight_norm(self.cond_layer) - for l in self.in_layers: - torch.nn.utils.remove_weight_norm(l) - for l in self.res_skip_layers: - torch.nn.utils.remove_weight_norm(l) + for layer in self.in_layers: + torch.nn.utils.remove_weight_norm(layer) + for layer in self.res_skip_layers: + torch.nn.utils.remove_weight_norm(layer) diff --git a/rvc/lib/algorithm/nsf.py b/rvc/lib/algorithm/nsf.py index 465e04de5..5476adabd 100644 --- a/rvc/lib/algorithm/nsf.py +++ b/rvc/lib/algorithm/nsf.py @@ -4,7 +4,7 @@ from torch.nn.utils.parametrizations import weight_norm from typing import Optional -from rvc.lib.algorithm.generators import SineGen +from rvc.lib.algorithm.generators import SineGenerator from rvc.lib.algorithm.residuals import LRELU_SLOPE, ResBlock1, ResBlock2 from rvc.lib.algorithm.commons import init_weights @@ -37,7 +37,7 @@ def __init__( self.noise_std = add_noise_std self.is_half = is_half - self.l_sin_gen = SineGen( + self.l_sin_gen = SineGenerator( sample_rate, harmonic_num, sine_amp, add_noise_std, voiced_threshod ) self.l_linear = torch.nn.Linear(harmonic_num + 1, 1) diff --git a/rvc/lib/algorithm/residuals.py b/rvc/lib/algorithm/residuals.py index a2b2e39dd..87805f725 100644 --- a/rvc/lib/algorithm/residuals.py +++ b/rvc/lib/algorithm/residuals.py @@ -9,7 +9,6 @@ LRELU_SLOPE = 0.1 -# Helper functions def create_conv1d_layer(channels, kernel_size, dilation): return weight_norm( torch.nn.Conv1d( diff --git a/rvc/train/train.py b/rvc/train/train.py index 4a2d3fa62..9ca7a2951 100644 --- a/rvc/train/train.py +++ b/rvc/train/train.py @@ -352,7 +352,6 @@ def run( # Initialize models and optimizers from rvc.lib.algorithm.discriminators import MultiPeriodDiscriminator - from rvc.lib.algorithm.discriminators import MultiPeriodDiscriminatorV2 from rvc.lib.algorithm.synthesizers import Synthesizer net_g = Synthesizer( @@ -364,10 +363,7 @@ def run( sr=sample_rate, ).to(device) - if version == "v1": - net_d = MultiPeriodDiscriminator(config.model.use_spectral_norm).to(device) - else: - net_d = MultiPeriodDiscriminatorV2(config.model.use_spectral_norm).to(device) + net_d = MultiPeriodDiscriminator(version, config.model.use_spectral_norm).to(device) optim_g = torch.optim.AdamW( net_g.parameters(), diff --git a/tabs/extra/analyzer/analyzer.py b/tabs/extra/analyzer/analyzer.py index 7b05653c0..c8c7cdab0 100644 --- a/tabs/extra/analyzer/analyzer.py +++ b/tabs/extra/analyzer/analyzer.py @@ -21,7 +21,7 @@ def analyzer_tab(): interactive=False, ) get_info_button = gr.Button( - value=i18n("Get information about the audio"), variant="primary" + value=i18n("Get information about the audio") ) image_output = gr.Image(type="filepath", interactive=False) diff --git a/tabs/extra/f0_extractor/f0_extractor.py b/tabs/extra/f0_extractor/f0_extractor.py index 9b4884b5b..a6a05e30d 100644 --- a/tabs/extra/f0_extractor/f0_extractor.py +++ b/tabs/extra/f0_extractor/f0_extractor.py @@ -52,7 +52,7 @@ def f0_extractor_tab(): choices=["crepe", "fcpe", "rmvpe"], value="rmvpe", ) - button = gr.Button(i18n("Extract F0 Curve"), variant="primary") + button = gr.Button(i18n("Extract F0 Curve")) with gr.Accordion(label=i18n("Output Information")): txt_output = gr.File(label="F0 Curve", type="filepath") diff --git a/tabs/extra/processing/processing.py b/tabs/extra/processing/processing.py index aec998006..1d01a7106 100644 --- a/tabs/extra/processing/processing.py +++ b/tabs/extra/processing/processing.py @@ -29,7 +29,7 @@ def processing_tab(): value="", max_lines=11, ) - model_view_button = gr.Button(i18n("View"), variant="primary") + model_view_button = gr.Button(i18n("View")) model_view_button.click( fn=run_model_information_script, inputs=[model_view_model_path], diff --git a/tabs/inference/inference.py b/tabs/inference/inference.py index 8bb4c5123..86b3fcf94 100644 --- a/tabs/inference/inference.py +++ b/tabs/inference/inference.py @@ -468,15 +468,6 @@ def inference_tab(): value=0.5, interactive=True, ) - upscale_audio = gr.Checkbox( - label=i18n("Upscale Audio"), - info=i18n( - "Upscale the audio to a higher quality, recommended for low-quality audios. (It could take longer to process the audio)" - ), - visible=True, - value=False, - interactive=True, - ) formant_shifting = gr.Checkbox( label=i18n("Formant Shifting"), info=i18n( @@ -486,6 +477,12 @@ def inference_tab(): visible=True, interactive=True, ) + post_process = gr.Checkbox( + label=i18n("Post-Process"), + info=i18n("Post-process the audio to apply effects to the output."), + value=False, + interactive=True, + ) with gr.Row(visible=False) as formant_row: formant_preset = gr.Dropdown( label=i18n("Browse presets for formanting"), @@ -520,12 +517,6 @@ def inference_tab(): visible=False, interactive=True, ) - post_process = gr.Checkbox( - label=i18n("Post-Process"), - info=i18n("Post-process the audio to apply effects to the output."), - value=False, - interactive=True, - ) reverb = gr.Checkbox( label=i18n("Reverb"), info=i18n("Apply reverb to the audio."), @@ -1112,15 +1103,6 @@ def inference_tab(): value=0.5, interactive=True, ) - upscale_audio_batch = gr.Checkbox( - label=i18n("Upscale Audio"), - info=i18n( - "Upscale the audio to a higher quality, recommended for low-quality audios. (It could take longer to process the audio)" - ), - visible=True, - value=False, - interactive=True, - ) formant_shifting_batch = gr.Checkbox( label=i18n("Formant Shifting"), info=i18n( @@ -1130,6 +1112,12 @@ def inference_tab(): visible=True, interactive=True, ) + post_process_batch = gr.Checkbox( + label=i18n("Post-Process"), + info=i18n("Post-process the audio to apply effects to the output."), + value=False, + interactive=True, + ) with gr.Row(visible=False) as formant_row_batch: formant_preset_batch = gr.Dropdown( label=i18n("Browse presets for formanting"), @@ -1143,7 +1131,6 @@ def inference_tab(): formant_refresh_button_batch = gr.Button( value="Refresh", visible=False, - variant="primary", ) formant_qfrency_batch = gr.Slider( value=1.0, @@ -1165,12 +1152,6 @@ def inference_tab(): visible=False, interactive=True, ) - post_process_batch = gr.Checkbox( - label=i18n("Post-Process"), - info=i18n("Post-process the audio to apply effects to the output."), - value=False, - interactive=True, - ) reverb_batch = gr.Checkbox( label=i18n("Reverb"), info=i18n("Apply reverb to the audio."), @@ -2065,7 +2046,6 @@ def delay_visible(checkbox): clean_audio, clean_strength, export_format, - upscale_audio, f0_file, embedder_model, embedder_model_custom, @@ -2132,7 +2112,6 @@ def delay_visible(checkbox): clean_audio_batch, clean_strength_batch, export_format_batch, - upscale_audio_batch, f0_file_batch, embedder_model_batch, embedder_model_custom_batch, diff --git a/tabs/train/train.py b/tabs/train/train.py index a728e155d..23c07dbae 100644 --- a/tabs/train/train.py +++ b/tabs/train/train.py @@ -825,7 +825,7 @@ def train_tab(): with gr.Column(): refresh_export = gr.Button(i18n("Refresh")) if not os.name == "nt": - upload_exported = gr.Button(i18n("Upload"), variant="primary") + upload_exported = gr.Button(i18n("Upload")) upload_exported.click( fn=upload_to_google_drive, inputs=[pth_dropdown_export, index_dropdown_export], diff --git a/tabs/tts/tts.py b/tabs/tts/tts.py index 492915674..647ef8ce2 100644 --- a/tabs/tts/tts.py +++ b/tabs/tts/tts.py @@ -205,15 +205,6 @@ def tts_tab(): value=0.5, interactive=True, ) - upscale_audio = gr.Checkbox( - label=i18n("Upscale Audio"), - info=i18n( - "Upscale the audio to a higher quality, recommended for low-quality audios. (It could take longer to process the audio)" - ), - visible=True, - value=False, - interactive=True, - ) pitch = gr.Slider( minimum=-24, maximum=24, @@ -415,7 +406,6 @@ def toggle_visible_embedder_custom(embedder_model): clean_audio, clean_strength, export_format, - upscale_audio, f0_file, embedder_model, embedder_model_custom, diff --git a/tabs/voice_blender/voice_blender.py b/tabs/voice_blender/voice_blender.py index 8b740d2a6..b90b86ab3 100644 --- a/tabs/voice_blender/voice_blender.py +++ b/tabs/voice_blender/voice_blender.py @@ -64,7 +64,7 @@ def voice_blender_tab(): "Adjusting the position more towards one side or the other will make the model more similar to the first or second." ), ) - model_fusion_button = gr.Button(i18n("Fusion"), variant="primary") + model_fusion_button = gr.Button(i18n("Fusion")) with gr.Row(): model_fusion_output_info = gr.Textbox( label=i18n("Output Information"),