You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I was validating the resolution of bug #3224 which I suspect was fixed in commit 4d0f53d and got a new unexpected error from the same code. I empirically figured that error appears after changes in commit preceding the one that fixed my issue (8c5227e), hence I can't really validate the fix.
Error log:
---------------------------------------------------------------------------TypeErrorTraceback (mostrecentcalllast)
[<ipython-input-8-37c7de7f3028>](https://localhost:8080/#) in <cell line: 2>()1tts=TTS('tts_models/multilingual/multi-dataset/xtts_v2', gpu=True)
---->2tts.tts_with_vc(text=f"Как оно?", speaker_wav='/content/SPEAKER_00_voice_clips.wav', language='ru')
13frames
[/usr/local/lib/python3.10/dist-packages/TTS/api.py](https://localhost:8080/#) in tts_with_vc(self, text, language, speaker_wav, speaker)464withtempfile.NamedTemporaryFile(suffix=".wav", delete=False) asfp:
465# Lazy code... save it to a temp file to resample it while reading it for VC-->466self.tts_to_file(text=text, speaker=speaker, language=language, file_path=fp.name)
467ifself.voice_converterisNone:
468self.load_vc_model_by_name("voice_conversion_models/multilingual/vctk/freevc24")
[/usr/local/lib/python3.10/dist-packages/TTS/api.py](https://localhost:8080/#) in tts_to_file(self, text, speaker, language, speaker_wav, emotion, speed, pipe_out, file_path, **kwargs)401pipe_out=pipe_out,
402 )
-->403wav=self.tts(text=text, speaker=speaker, language=language, speaker_wav=speaker_wav, **kwargs)
404self.synthesizer.save_wav(wav=wav, path=file_path, pipe_out=pipe_out)
405returnfile_path
[/usr/local/lib/python3.10/dist-packages/TTS/api.py](https://localhost:8080/#) in tts(self, text, speaker, language, speaker_wav, emotion, speed, **kwargs)339text=text, speaker_name=speaker, language=language, emotion=emotion, speed=speed340 )
-->341wav=self.synthesizer.tts(
342text=text,
343speaker_name=speaker,
[/usr/local/lib/python3.10/dist-packages/TTS/utils/synthesizer.py](https://localhost:8080/#) in tts(self, text, speaker_name, language_name, speaker_wav, style_wav, style_text, reference_wav, reference_speaker_name, **kwargs)376forseninsens:
377ifhasattr(self.tts_model, "synthesize"):
-->378outputs=self.tts_model.synthesize(
379text=sen,
380config=self.tts_config,
[/usr/local/lib/python3.10/dist-packages/TTS/tts/models/xtts.py](https://localhost:8080/#) in synthesize(self, text, config, speaker_wav, language, **kwargs)390391"""--> 392 return self.inference_with_config(text, config, ref_audio_path=speaker_wav, language=language, **kwargs) 393 394 def inference_with_config(self, text, config, ref_audio_path, language, **kwargs):[/usr/local/lib/python3.10/dist-packages/TTS/tts/models/xtts.py](https://localhost:8080/#) in inference_with_config(self, text, config, ref_audio_path, language, **kwargs) 412 } 413 settings.update(kwargs) # allow overriding of preset settings with kwargs--> 414 return self.full_inference(text, ref_audio_path, language, **settings) 415 416 @torch.inference_mode()[/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py](https://localhost:8080/#) in decorate_context(*args, **kwargs) 113 def decorate_context(*args, **kwargs): 114 with ctx_factory():--> 115 return func(*args, **kwargs) 116 117 return decorate_context[/usr/local/lib/python3.10/dist-packages/TTS/tts/models/xtts.py](https://localhost:8080/#) in full_inference(self, text, ref_audio_path, language, temperature, length_penalty, repetition_penalty, top_k, top_p, do_sample, gpt_cond_len, gpt_cond_chunk_len, max_ref_len, sound_norm_refs, **hf_generate_kwargs) 473 Sample rate is 24kHz. 474 """-->475 (gpt_cond_latent, speaker_embedding) =self.get_conditioning_latents(
476audio_path=ref_audio_path,
477gpt_cond_len=gpt_cond_len,
[/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py](https://localhost:8080/#) in decorate_context(*args, **kwargs)113defdecorate_context(*args, **kwargs):
114withctx_factory():
-->115returnfunc(*args, **kwargs)
116117returndecorate_context
[/usr/local/lib/python3.10/dist-packages/TTS/tts/models/xtts.py](https://localhost:8080/#) in get_conditioning_latents(self, audio_path, max_ref_length, gpt_cond_len, gpt_cond_chunk_len, librosa_trim_db, sound_norm_refs, load_sr)349speaker_embedding=None350forfile_pathinaudio_paths:
-->351audio=load_audio(file_path, load_sr)
352audio=audio[:, : load_sr*max_ref_length].to(self.device)
353ifsound_norm_refs:
[/usr/local/lib/python3.10/dist-packages/TTS/tts/models/xtts.py](https://localhost:8080/#) in load_audio(audiopath, sampling_rate)7071# torchaudio should chose proper backend to load audio depending on platform--->72audio, lsr=torchaudio.load(audiopath)
7374# stereo to mono if needed
[/usr/local/lib/python3.10/dist-packages/torchaudio/_backend/utils.py](https://localhost:8080/#) in load(uri, frame_offset, num_frames, normalize, channels_first, format, buffer_size, backend)201""" 202 backend = dispatcher(uri, format, backend)--> 203 return backend.load(uri, frame_offset, num_frames, normalize, channels_first, format, buffer_size) 204 205 return load[/usr/local/lib/python3.10/dist-packages/torchaudio/_backend/ffmpeg.py](https://localhost:8080/#) in load(uri, frame_offset, num_frames, normalize, channels_first, format, buffer_size) 332 ) 333 else:--> 334 return load_audio(os.path.normpath(uri), frame_offset, num_frames, normalize, channels_first, format) 335 336 @staticmethod[/usr/lib/python3.10/posixpath.py](https://localhost:8080/#) in normpath(path) 338 def normpath(path): 339 """Normalizepath, eliminatingdoubleslashes, etc."""
--> 340 path = os.fspath(path)
341 if isinstance(path, bytes):
342 sep = b'/'
TypeError: expected str, bytes or os.PathLike object, not NoneType
Just use the following with XTTS. It has voice cloning integrated and the output doesn't need to be passed through another voice conversion model (#3293 tracks providing a better error message in this case).
Describe the bug
I was validating the resolution of bug #3224 which I suspect was fixed in commit 4d0f53d and got a new unexpected error from the same code. I empirically figured that error appears after changes in commit preceding the one that fixed my issue (8c5227e), hence I can't really validate the fix.
Error log:
To Reproduce
Code:
Environment
The text was updated successfully, but these errors were encountered: