Skip to content

Commit

Permalink
Tortoise model and voice selection improvements (#73)
Browse files Browse the repository at this point in the history
* fix voices height

* improve env generator

* add voices-tortoise

* add voices-tortoise to voice dropdown

* add tortoise models path

* improved open_folder

* add open buttons for folders

* tortoise add model dropdown

* add video
  • Loading branch information
rsxdalv authored Jul 16, 2023
1 parent 1c24f4f commit f4f0c7a
Show file tree
Hide file tree
Showing 11 changed files with 163 additions and 31 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ favorites/
voices/
collections/
outputs-rvc/
voices-tortoise/

# Ignore model checkpoints
data/
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@

## Videos

| **Demo - How to use RVC with Tortoise** | **How To Get More Voices for Bark TTS** |
| :------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------: |
| [![Watch the video](https://img.youtube.com/vi/mhp_e8WSpxA/sddefault.jpg)](https://youtu.be/mhp_e8WSpxA) | [![Watch the video](https://img.youtube.com/vi/yeC5vJoavOE/sddefault.jpg)](https://youtu.be/yeC5vJoavOE) |
| **Refining Bark TTS vocals using Demucs & Vocos** | **Demo - How to use RVC with Tortoise** | **How To Get More Voices for Bark TTS** |
| :------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------: |
| [![Watch the video](https://img.youtube.com/vi/jCb-8JE7pk8/sddefault.jpg)](https://youtu.be/jCb-8JE7pk8) | [![Watch the video](https://img.youtube.com/vi/mhp_e8WSpxA/sddefault.jpg)](https://youtu.be/mhp_e8WSpxA) | [![Watch the video](https://img.youtube.com/vi/yeC5vJoavOE/sddefault.jpg)](https://youtu.be/yeC5vJoavOE) |

## Screenshots

Expand Down
Empty file added data/models/tortoise/.gitkeep
Empty file.
25 changes: 22 additions & 3 deletions src/history_tab/open_folder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@
import os
import subprocess
import sys

if sys.platform == "darwin":

def open_folder(folder_path: str):
os.startfile(folder_path)
def open_folder(folder_path: str):
subprocess.check_call(["open", "--", folder_path])

elif sys.platform == "linux2":

def open_folder(folder_path: str):
subprocess.check_call(["xdg-open", "--", folder_path])

elif sys.platform == "win32":

def open_folder(folder_path: str):
subprocess.Popen(["explorer", folder_path])


if __name__ == "__main__":
# open_folder("./data/models/")
import os

open_folder(os.path.join(os.path.dirname(__file__), "..", "data", "models"))
2 changes: 1 addition & 1 deletion src/history_tab/voices_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


def voices_tab(register_use_as_history_button, directory="voices"):
with gr.Tab(directory.capitalize()) as voices_tab, gr.Row():
with gr.Tab(directory.capitalize()) as voices_tab, gr.Row(equal_height=False):
with gr.Column():
with gr.Row():
button_output = gr.Button(value=f"Open {directory} folder")
Expand Down
11 changes: 11 additions & 0 deletions src/tortoise/TortoiseParameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def __init__(
cond_free: bool = True,
cond_free_k: int = 2,
diffusion_temperature: float = 1.0,
model: str = "Default",
): # sourcery skip: remove-unnecessary-cast
self.text = text
self.voice = voice
Expand All @@ -37,6 +38,7 @@ def __init__(
self.cond_free = cond_free
self.cond_free_k = cond_free_k
self.diffusion_temperature = float(diffusion_temperature)
self.model = model

def __repr__(self):
params = ",\n ".join(f"{k}={v!r}" for k, v in self.__dict__.items())
Expand Down Expand Up @@ -80,6 +82,7 @@ def __init__(
cond_free: gr.Checkbox,
cond_free_k: gr.Slider,
diffusion_temperature: gr.Slider,
model: gr.Dropdown,
):
self.text = text
self.voice = voice
Expand All @@ -97,6 +100,7 @@ def __init__(
self.cond_free = cond_free
self.cond_free_k = cond_free_k
self.diffusion_temperature = diffusion_temperature
self.model = model

def __repr__(self):
params = ",\n ".join(f"{k}={v!r}" for k, v in self.__dict__.items())
Expand Down Expand Up @@ -126,6 +130,7 @@ def to_list(components: TortoiseParameterComponents | TortoiseParameters):
components.cond_free,
components.cond_free_k,
components.diffusion_temperature,
components.model,
]

@staticmethod
Expand All @@ -152,6 +157,7 @@ def next_idx():
"cond_free": components[next_idx()],
"cond_free_k": components[next_idx()],
"diffusion_temperature": components[next_idx()],
"model": components[next_idx()],
}


Expand Down Expand Up @@ -235,6 +241,11 @@ def next_idx():
maximum=1.0,
step=0.1,
),
model=gr.Dropdown(
show_label=False,
choices=["Default"],
value="Default",
),
)

button = gr.Button("Generate")
Expand Down
65 changes: 58 additions & 7 deletions src/tortoise/gen_tortoise.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from src.utils.date import get_date_string
from src.utils.save_waveform_plot import save_waveform_plot
from tortoise.api import TextToSpeech, MODELS_DIR
from tortoise.utils.audio import load_voices
from tortoise.utils.audio import load_voices, get_voices
import gradio as gr
from src.tortoise.TortoiseOutputRow import TortoiseOutputRow, TortoiseOutputUpdate
from src.tortoise.save_json import save_json
Expand All @@ -17,16 +17,65 @@
OUTPUT_PATH = "outputs/"

MODEL = None
TORTOISE_VOICE_DIR = "voices-tortoise"

TORTOISE_VOICE_DIR_ABS = os.path.abspath(
os.path.join(
*os.path.split(os.path.dirname(os.path.realpath(__file__))),
"..",
"..",
"voices-tortoise",
)
)

TORTOISE_LOCAL_MODELS_DIR = os.path.abspath(
os.path.join(
*os.path.split(os.path.dirname(os.path.realpath(__file__))),
"..",
"..",
"data",
"models",
"tortoise",
)
)


def get_model_list():
try:
return ["Default"] + [
x for x in os.listdir(TORTOISE_LOCAL_MODELS_DIR) if x != ".gitkeep"
]
except FileNotFoundError as e:
print(e)
return ["Default"]


def get_full_model_dir(model_dir: str):
return os.path.join(TORTOISE_LOCAL_MODELS_DIR, model_dir)


def switch_model(model_dir: str):
get_tts(
models_dir=MODELS_DIR
if model_dir == "Default"
else get_full_model_dir(model_dir),
force_reload=True,
)
return gr.Dropdown.update()


def get_voice_list():
return ["random"] + list(get_voices(extra_voice_dirs=[TORTOISE_VOICE_DIR]))


def save_wav_tortoise(audio_array, filename):
write_wav(filename, SAMPLE_RATE, audio_array)


def get_tts():
def get_tts(models_dir=MODELS_DIR, force_reload=False):
global MODEL
if MODEL is None:
MODEL = TextToSpeech(models_dir=MODELS_DIR)
if MODEL is None or force_reload:
MODEL = TextToSpeech(models_dir=models_dir)
return MODEL


Expand All @@ -40,7 +89,9 @@ def generate_tortoise(
os.makedirs(OUTPUT_PATH, exist_ok=True)

voice_sel = voice.split("&") if "&" in voice else [voice]
voice_samples, conditioning_latents = load_voices(voice_sel)
voice_samples, conditioning_latents = load_voices(
voice_sel, extra_voice_dirs=[TORTOISE_VOICE_DIR]
)

tts = get_tts()
result, state = tts.tts_with_preset(
Expand All @@ -53,8 +104,8 @@ def generate_tortoise(
**{
k: v
for k, v in params.to_dict().items()
if k not in ["text", "voice", "split_prompt", "seed"]
}
if k not in ["text", "voice", "split_prompt", "seed", "model"]
},
)

seed, _, _, _ = state
Expand Down
37 changes: 30 additions & 7 deletions src/tortoise/generation_tab_tortoise.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,26 @@
from typing import Any
from src.history_tab.open_folder import open_folder
from src.musicgen.setup_seed_ui_musicgen import setup_seed_ui_musicgen
from tortoise.utils.audio import get_voices
from src.css.css import full_css
import gradio as gr
from src.tortoise.TortoiseOutputRow import TortoiseOutputRow
from src.tortoise.create_tortoise_output_row_ui import create_tortoise_output_row_ui
from src.tortoise.gen_tortoise import generate_tortoise_long
from src.tortoise.gen_tortoise import (
generate_tortoise_long,
get_model_list,
get_voice_list,
TORTOISE_LOCAL_MODELS_DIR,
TORTOISE_VOICE_DIR_ABS,
switch_model,
)
from src.tortoise.TortoiseParameters import (
TortoiseParameterComponents,
TortoiseParameters,
)
from src.tortoise.autoregressive_params import autoregressive_params
from src.tortoise.diffusion_params import diffusion_params
from src.tortoise.presets import presets
from src.tortoise.gr_reload_button import gr_reload_button
from src.tortoise.gr_reload_button import gr_open_button_simple, gr_reload_button

MAX_OUTPUTS = 9

Expand All @@ -39,19 +46,34 @@ def generation_tab_tortoise():
def tortoise_core_ui():
with gr.Row():
with gr.Column():
with gr.Box():
gr.Markdown("Model")
with gr.Row():
model = gr.Dropdown(
choices=get_model_list(),
value="Default",
show_label=False,
container=False,
)
gr_open_button_simple(TORTOISE_LOCAL_MODELS_DIR)
gr_reload_button().click(
fn=lambda: gr.Dropdown.update(choices=get_model_list()),
outputs=[model],
)

model.select(fn=switch_model, inputs=[model], outputs=[model])
with gr.Box():
gr.Markdown("Voice")
with gr.Row():
voice = gr.Dropdown(
choices=["random"] + list(get_voices()),
choices=get_voice_list(),
value="random",
show_label=False,
container=False,
)
gr_open_button_simple(TORTOISE_VOICE_DIR_ABS)
gr_reload_button().click(
fn=lambda: gr.Dropdown.update(
choices=["random"] + list(get_voices())
),
fn=lambda: gr.Dropdown.update(choices=get_voice_list()),
outputs=[voice],
)
with gr.Box():
Expand Down Expand Up @@ -125,6 +147,7 @@ def tortoise_core_ui():
cond_free=cond_free,
cond_free_k=cond_free_k,
diffusion_temperature=diffusion_temperature,
model=model,
)
)

Expand Down
19 changes: 17 additions & 2 deletions src/tortoise/gr_reload_button.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
import gradio as gr
from src.history_tab.open_folder import open_folder


def gr_reload_button(**kwargs):
def gr_icon_button(value="refresh", **kwargs):
return gr.Button(
"refresh",
value=value,
elem_classes="btn-sm material-symbols-outlined",
size="sm",
**kwargs,
)


def gr_reload_button(**kwargs):
return gr_icon_button(value="refresh", **kwargs)


def gr_open_button(**kwargs):
return gr_icon_button(value="folder_open", **kwargs)


def gr_open_button_simple(dirname="", **kwargs):
return gr_open_button(**kwargs).click(
fn=lambda: open_folder(dirname),
)
28 changes: 20 additions & 8 deletions src/utils/setup_or_recover.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,31 @@ def dummy():
pass


def env_entry(name, value, comment):
return f"# {comment}\n{name}={value}\n"


def generate_env(
environment_suno_use_small_models: bool,
environment_suno_enable_mps: bool,
environment_suno_offload_cpu: bool,
):
return f"""
# Duplicates small models checkboxes
SUNO_USE_SMALL_MODELS={environment_suno_use_small_models}
# Use MPS when CUDA is unavailable
SUNO_ENABLE_MPS={environment_suno_enable_mps}
# Offload GPU models to CPU
SUNO_OFFLOAD_CPU={environment_suno_offload_cpu}
"""
env = "# This file gets updated automatically from the UI"
env += env_entry(
"SUNO_USE_SMALL_MODELS",
environment_suno_use_small_models,
"Duplicates small models checkboxes",
)
env += env_entry(
"SUNO_ENABLE_MPS",
environment_suno_enable_mps,
"Use MPS when CUDA is unavailable",
)
env += env_entry(
"SUNO_OFFLOAD_CPU", environment_suno_offload_cpu, "Offload GPU models to CPU"
)

return env


def setup_or_recover():
Expand Down
Empty file added voices-tortoise/.gitkeep
Empty file.

0 comments on commit f4f0c7a

Please sign in to comment.