From dad6a7b0b6bba9cf5cc0c3c72c7b29e0905609db Mon Sep 17 00:00:00 2001 From: Julian Weber Date: Sat, 21 Oct 2023 11:26:03 +0200 Subject: [PATCH] Preserve [ja] token of the text processing --- TTS/tts/layers/xtts/tokenizer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/TTS/tts/layers/xtts/tokenizer.py b/TTS/tts/layers/xtts/tokenizer.py index 1d4ed235ea..4b9fb9edef 100644 --- a/TTS/tts/layers/xtts/tokenizer.py +++ b/TTS/tts/layers/xtts/tokenizer.py @@ -483,10 +483,13 @@ def preprocess_text(self, txt, lang): if lang == "zh-cn": txt = chinese_transliterate(txt) elif lang == "ja": + assert txt[:4] == "[ja]", "Japanese speech should start with the [ja] token." + txt = txt[4:] if self.katsu is None: import cutlet self.katsu = cutlet.Cutlet() txt = japanese_cleaners(txt, self.katsu) + txt = "[ja]" + txt else: raise NotImplementedError() return txt