Preserve [ja] token of the text processing

coqui-ai · Oct 21, 2023 · dad6a7b · dad6a7b
1 parent c7a1604
commit dad6a7b
Showing 1 changed file with 3 additions and 0 deletions.
diff --git a/TTS/tts/layers/xtts/tokenizer.py b/TTS/tts/layers/xtts/tokenizer.py
@@ -483,10 +483,13 @@ def preprocess_text(self, txt, lang):
             if lang == "zh-cn":
                 txt = chinese_transliterate(txt)
         elif lang == "ja":
+            assert txt[:4] == "[ja]", "Japanese speech should start with the [ja] token."
+            txt = txt[4:]
             if self.katsu is None:
                 import cutlet
                 self.katsu = cutlet.Cutlet()
             txt = japanese_cleaners(txt, self.katsu)
+            txt = "[ja]" + txt
         else:
             raise NotImplementedError()
         return txt