coqui-ai · erogol · Oct 10, 2022 · Oct 8, 2022 · Oct 8, 2022 · Oct 10, 2022
diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py
@@ -15,6 +15,15 @@
 
 def coqui(root_path, meta_file, ignored_speakers=None):
     """Interal dataset formatter."""
+    filepath = os.path.join(root_path, meta_file)
+    # ensure there are 4 columns for every line
+    with open(filepath, "r", encoding="utf8") as f:
+        lines = f.readlines()
+    num_cols = len(lines[0].split("|"))  # take the first row as reference
+    for idx, line in enumerate(lines[1:]):
+        if len(line.split("|")) != num_cols:
+            print(f" > Missing column in line {idx + 1} -> {line.strip()}")
+    # load metadata
     metadata = pd.read_csv(os.path.join(root_path, meta_file), sep="|")
     assert all(x in metadata.columns for x in ["audio_file", "text"])
     speaker_name = None if "speaker_name" in metadata.columns else "coqui"