feat!: migrate to microgenerator (#61)

GoogleCloudPlatform · Sep 24, 2020 · 1760c7a · 1760c7a
1 parent 9440035
commit 1760c7a
Show file tree

Hide file tree

Showing 28 changed files with 510 additions and 444 deletions.
diff --git a/speech/microphone/transcribe_streaming_infinite.py b/speech/microphone/transcribe_streaming_infinite.py
@@ -41,9 +41,9 @@
 SAMPLE_RATE = 16000
 CHUNK_SIZE = int(SAMPLE_RATE / 10)  # 100ms
 
-RED = '\033[0;31m'
-GREEN = '\033[0;32m'
-YELLOW = '\033[0;33m'
+RED = "\033[0;31m"
+GREEN = "\033[0;32m"
+YELLOW = "\033[0;33m"
 
 
 def get_current_time():
@@ -123,12 +123,14 @@ def generator(self):
                     if self.bridging_offset > self.final_request_end_time:
                         self.bridging_offset = self.final_request_end_time
 
-                    chunks_from_ms = round((self.final_request_end_time -
-                                            self.bridging_offset) / chunk_time)
+                    chunks_from_ms = round(
+                        (self.final_request_end_time - self.bridging_offset)
+                        / chunk_time
+                    )
 
-                    self.bridging_offset = (round((
-                        len(self.last_audio_input) - chunks_from_ms)
-                                                  * chunk_time))
+                    self.bridging_offset = round(
+                        (len(self.last_audio_input) - chunks_from_ms) * chunk_time
+                    )
 
                     for i in range(chunks_from_ms, len(self.last_audio_input)):
                         data.append(self.last_audio_input[i])
@@ -157,7 +159,7 @@ def generator(self):
                 except queue.Empty:
                     break
 
-            yield b''.join(data)
+            yield b"".join(data)
 
 
 def listen_print_loop(responses, stream):
@@ -201,35 +203,37 @@ def listen_print_loop(responses, stream):
         if result.result_end_time.nanos:
             result_nanos = result.result_end_time.nanos
 
-        stream.result_end_time = int((result_seconds * 1000)
-                                     + (result_nanos / 1000000))
+        stream.result_end_time = int((result_seconds * 1000) + (result_nanos / 1000000))
 
-        corrected_time = (stream.result_end_time - stream.bridging_offset
-                          + (STREAMING_LIMIT * stream.restart_counter))
+        corrected_time = (
+            stream.result_end_time
+            - stream.bridging_offset
+            + (STREAMING_LIMIT * stream.restart_counter)
+        )
         # Display interim results, but with a carriage return at the end of the
         # line, so subsequent lines will overwrite them.
 
         if result.is_final:
 
             sys.stdout.write(GREEN)
-            sys.stdout.write('\033[K')
-            sys.stdout.write(str(corrected_time) + ': ' + transcript + '\n')
+            sys.stdout.write("\033[K")
+            sys.stdout.write(str(corrected_time) + ": " + transcript + "\n")
 
             stream.is_final_end_time = stream.result_end_time
             stream.last_transcript_was_final = True
 
             # Exit recognition if any of the transcribed phrases could be
             # one of our keywords.
-            if re.search(r'\b(exit|quit)\b', transcript, re.I):
+            if re.search(r"\b(exit|quit)\b", transcript, re.I):
                 sys.stdout.write(YELLOW)
-                sys.stdout.write('Exiting...\n')
+                sys.stdout.write("Exiting...\n")
                 stream.closed = True
                 break
 
         else:
             sys.stdout.write(RED)
-            sys.stdout.write('\033[K')
-            sys.stdout.write(str(corrected_time) + ': ' + transcript + '\r')
+            sys.stdout.write("\033[K")
+            sys.stdout.write(str(corrected_time) + ": " + transcript + "\r")
 
             stream.last_transcript_was_final = False
 
@@ -238,37 +242,42 @@ def main():
     """start bidirectional streaming from microphone input to speech API"""
 
     client = speech.SpeechClient()
-    config = speech.types.RecognitionConfig(
-        encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
+    config = speech.RecognitionConfig(
+        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
         sample_rate_hertz=SAMPLE_RATE,
-        language_code='en-US',
-        max_alternatives=1)
-    streaming_config = speech.types.StreamingRecognitionConfig(
-        config=config,
-        interim_results=True)
+        language_code="en-US",
+        max_alternatives=1,
+    )
+    streaming_config = speech.StreamingRecognitionConfig(
+        config=config, interim_results=True
+    )
 
     mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE)
     print(mic_manager.chunk_size)
     sys.stdout.write(YELLOW)
     sys.stdout.write('\nListening, say "Quit" or "Exit" to stop.\n\n')
-    sys.stdout.write('End (ms)       Transcript Results/Status\n')
-    sys.stdout.write('=====================================================\n')
+    sys.stdout.write("End (ms)       Transcript Results/Status\n")
+    sys.stdout.write("=====================================================\n")
 
     with mic_manager as stream:
 
         while not stream.closed:
             sys.stdout.write(YELLOW)
-            sys.stdout.write('\n' + str(
-                STREAMING_LIMIT * stream.restart_counter) + ': NEW REQUEST\n')
+            sys.stdout.write(
+                "\n" + str(STREAMING_LIMIT * stream.restart_counter) + ": NEW REQUEST\n"
+            )
 
             stream.audio_input = []
             audio_generator = stream.generator()
 
-            requests = (speech.types.StreamingRecognizeRequest(
-                audio_content=content)for content in audio_generator)
+            requests = (
+                speech.StreamingRecognizeRequest(audio_content=content)
+                for content in audio_generator
+            )
 
-            responses = client.streaming_recognize(streaming_config,
-                                                   requests)
+            responses = client.streaming_recognize(
+                requests=requests, config=streaming_config
+            )
 
             # Now, put the transcription responses to use.
             listen_print_loop(responses, stream)
@@ -282,11 +291,11 @@ def main():
             stream.restart_counter = stream.restart_counter + 1
 
             if not stream.last_transcript_was_final:
-                sys.stdout.write('\n')
+                sys.stdout.write("\n")
             stream.new_stream = True
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
 
     main()
 

diff --git a/speech/microphone/transcribe_streaming_mic.py b/speech/microphone/transcribe_streaming_mic.py
@@ -32,8 +32,6 @@
 import sys
 
 from google.cloud import speech
-from google.cloud.speech import enums
-from google.cloud.speech import types
 import pyaudio
 from six.moves import queue
 
@@ -44,6 +42,7 @@
 
 class MicrophoneStream(object):
     """Opens a recording stream as a generator yielding the audio chunks."""
+
     def __init__(self, rate, chunk):
         self._rate = rate
         self._chunk = chunk
@@ -58,8 +57,10 @@ def __enter__(self):
             format=pyaudio.paInt16,
             # The API currently only supports 1-channel (mono) audio
             # https://goo.gl/z757pE
-            channels=1, rate=self._rate,
-            input=True, frames_per_buffer=self._chunk,
+            channels=1,
+            rate=self._rate,
+            input=True,
+            frames_per_buffer=self._chunk,
             # Run the audio stream asynchronously to fill the buffer object.
             # This is necessary so that the input device's buffer doesn't
             # overflow while the calling thread makes network requests, etc.
@@ -104,7 +105,7 @@ def generator(self):
                 except queue.Empty:
                     break
 
-            yield b''.join(data)
+            yield b"".join(data)
 
 
 def listen_print_loop(responses):
@@ -142,10 +143,10 @@ def listen_print_loop(responses):
         #
         # If the previous result was longer than this one, we need to print
         # some extra spaces to overwrite the previous result
-        overwrite_chars = ' ' * (num_chars_printed - len(transcript))
+        overwrite_chars = " " * (num_chars_printed - len(transcript))
 
         if not result.is_final:
-            sys.stdout.write(transcript + overwrite_chars + '\r')
+            sys.stdout.write(transcript + overwrite_chars + "\r")
             sys.stdout.flush()
 
             num_chars_printed = len(transcript)
@@ -155,8 +156,8 @@ def listen_print_loop(responses):
 
             # Exit recognition if any of the transcribed phrases could be
             # one of our keywords.
-            if re.search(r'\b(exit|quit)\b', transcript, re.I):
-                print('Exiting..')
+            if re.search(r"\b(exit|quit)\b", transcript, re.I):
+                print("Exiting..")
                 break
 
             num_chars_printed = 0
@@ -165,28 +166,33 @@ def listen_print_loop(responses):
 def main():
     # See http://g.co/cloud/speech/docs/languages
     # for a list of supported languages.
-    language_code = 'en-US'  # a BCP-47 language tag
+    language_code = "en-US"  # a BCP-47 language tag
 
     client = speech.SpeechClient()
-    config = types.RecognitionConfig(
-        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
+    config = speech.RecognitionConfig(
+        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
         sample_rate_hertz=RATE,
-        language_code=language_code)
-    streaming_config = types.StreamingRecognitionConfig(
-        config=config,
-        interim_results=True)
+        language_code=language_code,
+    )
+    streaming_config = speech.StreamingRecognitionConfig(
+        config=config, interim_results=True
+    )
 
     with MicrophoneStream(RATE, CHUNK) as stream:
         audio_generator = stream.generator()
-        requests = (types.StreamingRecognizeRequest(audio_content=content)
-                    for content in audio_generator)
+        requests = (
+            speech.StreamingRecognizeRequest(audio_content=content)
+            for content in audio_generator
+        )
 
-        responses = client.streaming_recognize(streaming_config, requests)
+        responses = client.streaming_recognize(
+            requests=requests, config=streaming_config
+        )
 
         # Now, put the transcription responses to use.
         listen_print_loop(responses)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
 # [END speech_transcribe_streaming_mic]
diff --git a/speech/microphone/transcribe_streaming_mic_test.py b/speech/microphone/transcribe_streaming_mic_test.py
@@ -18,7 +18,7 @@
 
 import mock
 
-RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
+RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
 
 
 class MockPyAudio(object):
@@ -32,8 +32,9 @@ def open(self, stream_callback, rate, *args, **kwargs):
         self.rate = rate
         self.closed = threading.Event()
         self.stream_thread = threading.Thread(
-            target=self.stream_audio, args=(
-                self.audio_filename, stream_callback, self.closed))
+            target=self.stream_audio,
+            args=(self.audio_filename, stream_callback, self.closed),
+        )
         self.stream_thread.start()
         return self
 
@@ -47,23 +48,25 @@ def terminate(self):
         pass
 
     def stream_audio(self, audio_filename, callback, closed, num_frames=512):
-        with open(audio_filename, 'rb') as audio_file:
+        with open(audio_filename, "rb") as audio_file:
             while not closed.is_set():
                 # Approximate realtime by sleeping for the appropriate time for
                 # the requested number of frames
                 time.sleep(num_frames / float(self.rate))
                 # audio is 16-bit samples, whereas python byte is 8-bit
                 num_bytes = 2 * num_frames
-                chunk = audio_file.read(num_bytes) or b'\0' * num_bytes
+                chunk = audio_file.read(num_bytes) or b"\0" * num_bytes
                 callback(chunk, None, None, None)
 
 
-@mock.patch.dict('sys.modules', pyaudio=mock.MagicMock(
-        PyAudio=MockPyAudio(os.path.join(RESOURCES, 'quit.raw'))))
+@mock.patch.dict(
+    "sys.modules",
+    pyaudio=mock.MagicMock(PyAudio=MockPyAudio(os.path.join(RESOURCES, "quit.raw"))),
+)
 def test_main(capsys):
     import transcribe_streaming_mic
 
     transcribe_streaming_mic.main()
     out, err = capsys.readouterr()
 
-    assert re.search(r'quit', out, re.DOTALL | re.I)
+    assert re.search(r"quit", out, re.DOTALL | re.I)