diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index da5352d1fc..20308aab6c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -30,6 +30,7 @@ jobs: - name: Install Espeak if: contains(fromJSON('["inference_tests", "test_text", "test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset) run: | + sudo apt-get update sudo apt-get install espeak espeak-ng - name: Install dependencies run: | diff --git a/TTS/bin/find_unique_chars.py b/TTS/bin/find_unique_chars.py index 81f2f4465f..f476ca5ddb 100644 --- a/TTS/bin/find_unique_chars.py +++ b/TTS/bin/find_unique_chars.py @@ -4,7 +4,7 @@ from argparse import RawTextHelpFormatter from TTS.config import load_config -from TTS.tts.datasets import load_tts_samples +from TTS.tts.datasets import find_unique_chars, load_tts_samples def main(): @@ -29,17 +29,7 @@ def main(): ) items = train_items + eval_items - - texts = "".join(item["text"] for item in items) - chars = set(texts) - lower_chars = filter(lambda c: c.islower(), chars) - chars_force_lower = [c.lower() for c in chars] - chars_force_lower = set(chars_force_lower) - - print(f" > Number of unique characters: {len(chars)}") - print(f" > Unique characters: {''.join(sorted(chars))}") - print(f" > Unique lower characters: {''.join(sorted(lower_chars))}") - print(f" > Unique all forced to lower characters: {''.join(sorted(chars_force_lower))}") + find_unique_chars(items) if __name__ == "__main__": diff --git a/TTS/tts/datasets/__init__.py b/TTS/tts/datasets/__init__.py index 192138561f..4f354fa0be 100644 --- a/TTS/tts/datasets/__init__.py +++ b/TTS/tts/datasets/__init__.py @@ -167,7 +167,7 @@ def _get_formatter_by_name(name): def find_unique_chars(data_samples, verbose=True): - texts = "".join(item[0] for item in data_samples) + texts = "".join(item["text"] for item in data_samples) chars = set(texts) lower_chars = filter(lambda c: c.islower(), chars) chars_force_lower = [c.lower() for c in chars]