Write non-speech files in a txt

coqui-ai · Oct 5, 2022 · be339a7 · be339a7
1 parent 3faccbd
commit be339a7
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 4 deletions.
diff --git a/TTS/bin/remove_silence_using_vad.py b/TTS/bin/remove_silence_using_vad.py
@@ -17,15 +17,15 @@ def adjust_path_and_remove_silence(audio_path):
     # create all directory structure
     pathlib.Path(output_path).parent.mkdir(parents=True, exist_ok=True)
     # remove the silence and save the audio
-    output_path = remove_silence(
+    output_path, is_speech = remove_silence(
         model_and_utils,
         audio_path,
         output_path,
         trim_just_beginning_and_end=args.trim_just_beginning_and_end,
         use_cuda=args.use_cuda,
     )
 
-    return output_path
+    return output_path, is_speech
 
 
 def preprocess_audios():
@@ -39,12 +39,20 @@ def preprocess_audios():
     else:
         print("> Trimming all nonspeech parts.")
 
+    filtered_files = []
     if files:
         # create threads
         # num_threads = multiprocessing.cpu_count()
         # process_map(adjust_path_and_remove_silence, files, max_workers=num_threads, chunksize=15)
         for f in tqdm(files):
-            adjust_path_and_remove_silence(f)
+            output_path, is_speech = adjust_path_and_remove_silence(f)
+            if not is_speech:
+                filtered_files.append(output_path)
+
+        # write files that do not have speech
+        with open(os.path.join(args.output_dir, "filtered_files.txt", encoding="utf-8"), "w") as f:
+            for file in filtered_files:
+                f.write(file + "\n")
     else:
         print("> No files Found !")
 

diff --git a/TTS/utils/vad.py b/TTS/utils/vad.py
@@ -73,9 +73,11 @@ def remove_silence(
     # if have speech timestamps else save the wav
     if new_speech_timestamps:
         wav = collect_chunks(new_speech_timestamps, wav)
+        is_speech = True
     else:
         print(f"> The file {audio_path} probably does not have speech please check it !!")
+        is_speech = False
 
     # save audio
     save_audio(out_path, wav, sampling_rate=gt_sample_rate)
-    return out_path
+    return out_path, is_speech