-
Notifications
You must be signed in to change notification settings - Fork 1
/
test_live.py
107 lines (82 loc) · 2.51 KB
/
test_live.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from __future__ import (print_function, division, absolute_import, unicode_literals)
from builtins import *
import time
import struct
import logging
import multiprocessing
import signal
import wave
import numpy as np
import pyaudio
from yapykaldi import KaldiNNet3OnlineDecoder, KaldiNNet3OnlineModel
FINALIZE = False
RATE = 16000
TIMEOUT = 2
CHUNK = 1024
logging.basicConfig(level=logging.INFO,
format='(%(processName)-9s) %(message)s',)
def listen(q):
global FINALIZE
global CHUNK
global RATE
FORMAT = pyaudio.paInt16
CHANNELS = 1
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
logging.info("* start listening")
frames = []
while not FINALIZE:
data = stream.read(CHUNK)
q.put(data)
frames.append(data)
logging.info("* stop listening")
stream.stop_stream()
stream.close()
p.terminate()
logging.info("* writing data to '{}'".format(WAVE_OUTPUT_FILENAME))
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
def recognize(q):
global FINALIZE
global CHUNK
model_dir = "../data/kaldi-generic-en-tdnn_fl-latest"
model = KaldiNNet3OnlineModel(model_dir)
decoder = KaldiNNet3OnlineDecoder(model)
while not FINALIZE:
try:
data = q.get(block=True, timeout=TIMEOUT)
data = struct.unpack_from('<%dh' % CHUNK, data)
except Exception:
break
else:
logging.info("Recognizing chunk")
if decoder.decode(RATE, np.array(data, dtype=np.float32), FINALIZE):
decoded_string, _ = decoder.get_decoded_string()
logging.info("** {}".format(decoded_string))
else:
raise RuntimeError("Decoding failed")
def handle_interrupt(sig, frame):
global FINALIZE
logging.info("Handling interrupt")
FINALIZE = True
time.sleep(3)
def main():
logging.info("Starting live speech recognition")
q = multiprocessing.Queue()
asr = multiprocessing.Process(None, recognize, args=(q,))
asr.start()
listen(q)
asr.join()
logging.info("Completed ASR")
if __name__ == '__main__':
signal.signal(signal.SIGINT, handle_interrupt)
main()