diff --git a/README.md b/README.md index 4f386ecdc2..594777c116 100644 --- a/README.md +++ b/README.md @@ -198,17 +198,18 @@ from TTS.api import TTS # Get device device = "cuda" if torch.cuda.is_available() else "cpu" -# List available 🐸TTS models and choose the first one -model_name = TTS().list_models()[0] +# List available 🐸TTS models +print(TTS().list_models()) + # Init TTS -tts = TTS(model_name).to(device) +tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1").to(device) # Run TTS -# ❗ Since this model is multi-speaker and multi-lingual, we must set the target speaker and the language -# Text to speech with a numpy output -wav = tts.tts("This is a test! This is also a test!!", speaker=tts.speakers[0], language=tts.languages[0]) +# ❗ Since this model is multi-lingual voice cloning model, we must set the target speaker_wav and language +# Text to speech list of amplitude values as output +wav = tts.tts(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en") # Text to speech to a file -tts.tts_to_file(text="Hello world!", speaker=tts.speakers[0], language=tts.languages[0], file_path="output.wav") +tts.tts_to_file(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en", file_path="output.wav") ``` #### Running a single speaker model diff --git a/docs/source/inference.md b/docs/source/inference.md index 3071be4f4d..4de9ecdd14 100644 --- a/docs/source/inference.md +++ b/docs/source/inference.md @@ -114,18 +114,24 @@ tts-server --model_name "///" \ You can run a multi-speaker and multi-lingual model in Python as ```python +import torch from TTS.api import TTS -# List available 🐸TTS models and choose the first one -model_name = TTS().list_models()[0] +# Get device +device = "cuda" if torch.cuda.is_available() else "cpu" + +# List available 🐸TTS models +print(TTS().list_models()) + # Init TTS -tts = TTS(model_name) +tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1").to(device) + # Run TTS -# ❗ Since this model is multi-speaker and multi-lingual, we must set the target speaker and the language -# Text to speech with a numpy output -wav = tts.tts("This is a test! This is also a test!!", speaker=tts.speakers[0], language=tts.languages[0]) +# ❗ Since this model is multi-lingual voice cloning model, we must set the target speaker_wav and language +# Text to speech list of amplitude values as output +wav = tts.tts(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en") # Text to speech to a file -tts.tts_to_file(text="Hello world!", speaker=tts.speakers[0], language=tts.languages[0], file_path="output.wav") +tts.tts_to_file(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en", file_path="output.wav") ``` #### Here is an example for a single speaker model.