forked from GeorgeFedoseev/DeepSpeech
-
Notifications
You must be signed in to change notification settings - Fork 0
/
file_transcriber.py
65 lines (43 loc) · 1.72 KB
/
file_transcriber.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import infer
import time
from util import audio_filter_utils as audio_utils
import os
import wave
current_dir_path = os.path.dirname(os.path.realpath(__file__))
project_root_path = current_dir_path
data_path = os.path.join(project_root_path, "data")
initialized = False
def initialize():
#print "Initialize file transcriber..."
global session, initialized
if initialized:
#print "Already inited session"
return
start_time = time.time()
infer.init(language_tool_language="", use_lm=True)
print("DeepSpeech init took %.2f sec" % (time.time() - start_time))
start_time = time.time()
session = infer.init_session()
print("session init took %.2f sec" % (time.time() - start_time))
initialized = True
def transcribe_file(rec_path):
initialize()
wav_obj = wave.open(rec_path)
#if not audio_utils.has_speech(wav_obj):
#print "no speech"
# return ""
tmp_dir_path = os.path.join(os.getcwd(), "tmp")
# filter
# normalize volume
#audio_wav_volume_normalized_path = rec_path+"_normalized.wav"
#print("Normalizing volume... %s" % (audio_wav_path))
#audio_utils.loud_norm(rec_path, audio_wav_volume_normalized_path)
# correct volume
audio_wav_volume_corrected_path = rec_path+"_volume_corrected.wav"
#print("Correcting volume...")
audio_utils.correct_volume(rec_path, audio_wav_volume_corrected_path)
# apply bandpass filter
audio_wav_filtered_path = rec_path+"_filtered.wav"
#print("Applying bandpass filter...")
audio_utils.apply_bandpass_filter(audio_wav_volume_corrected_path, audio_wav_filtered_path)
return infer.infer(audio_wav_filtered_path, session)