diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 88d0bd85..e4805f68 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,3 +1 @@ google-cloud-speech==0.36.3 -pyaudio==0.2.11 -six==1.12.0 diff --git a/samples/snippets/resources/quit.raw b/samples/snippets/resources/quit.raw deleted file mode 100644 index a01dfc45..00000000 Binary files a/samples/snippets/resources/quit.raw and /dev/null differ diff --git a/samples/snippets/transcribe_streaming_indefinite.py b/samples/snippets/transcribe_streaming_indefinite.py deleted file mode 100644 index f1adb224..00000000 --- a/samples/snippets/transcribe_streaming_indefinite.py +++ /dev/null @@ -1,223 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Google Cloud Speech API sample application using the streaming API. - -NOTE: This module requires the additional dependency `pyaudio`. To install -using pip: - - pip install pyaudio - -Example usage: - python transcribe_streaming_indefinite.py -""" - -# [START speech_transcribe_infinite_streaming] -from __future__ import division - -import time -import re -import sys - -from google.cloud import speech - -import pyaudio -from six.moves import queue - -# Audio recording parameters -STREAMING_LIMIT = 55000 -SAMPLE_RATE = 16000 -CHUNK_SIZE = int(SAMPLE_RATE / 10) # 100ms - - -def get_current_time(): - return int(round(time.time() * 1000)) - - -def duration_to_secs(duration): - return duration.seconds + (duration.nanos / float(1e9)) - - -class ResumableMicrophoneStream: - """Opens a recording stream as a generator yielding the audio chunks.""" - def __init__(self, rate, chunk_size): - self._rate = rate - self._chunk_size = chunk_size - self._num_channels = 1 - self._max_replay_secs = 5 - - # Create a thread-safe buffer of audio data - self._buff = queue.Queue() - self.closed = True - self.start_time = get_current_time() - - # 2 bytes in 16 bit samples - self._bytes_per_sample = 2 * self._num_channels - self._bytes_per_second = self._rate * self._bytes_per_sample - - self._bytes_per_chunk = (self._chunk_size * self._bytes_per_sample) - self._chunks_per_second = ( - self._bytes_per_second // self._bytes_per_chunk) - - def __enter__(self): - self.closed = False - - self._audio_interface = pyaudio.PyAudio() - self._audio_stream = self._audio_interface.open( - format=pyaudio.paInt16, - channels=self._num_channels, - rate=self._rate, - input=True, - frames_per_buffer=self._chunk_size, - # Run the audio stream asynchronously to fill the buffer object. - # This is necessary so that the input device's buffer doesn't - # overflow while the calling thread makes network requests, etc. - stream_callback=self._fill_buffer, - ) - - return self - - def __exit__(self, type, value, traceback): - self._audio_stream.stop_stream() - self._audio_stream.close() - self.closed = True - # Signal the generator to terminate so that the client's - # streaming_recognize method will not block the process termination. - self._buff.put(None) - self._audio_interface.terminate() - - def _fill_buffer(self, in_data, *args, **kwargs): - """Continuously collect data from the audio stream, into the buffer.""" - self._buff.put(in_data) - return None, pyaudio.paContinue - - def generator(self): - while not self.closed: - if get_current_time() - self.start_time > STREAMING_LIMIT: - self.start_time = get_current_time() - break - # Use a blocking get() to ensure there's at least one chunk of - # data, and stop iteration if the chunk is None, indicating the - # end of the audio stream. - chunk = self._buff.get() - if chunk is None: - return - data = [chunk] - - # Now consume whatever other data's still buffered. - while True: - try: - chunk = self._buff.get(block=False) - if chunk is None: - return - data.append(chunk) - except queue.Empty: - break - - yield b''.join(data) - - -def listen_print_loop(responses, stream): - """Iterates through server responses and prints them. - - The responses passed is a generator that will block until a response - is provided by the server. - - Each response may contain multiple results, and each result may contain - multiple alternatives; for details, see https://goo.gl/tjCPAU. Here we - print only the transcription for the top alternative of the top result. - - In this case, responses are provided for interim results as well. If the - response is an interim one, print a line feed at the end of it, to allow - the next result to overwrite it, until the response is a final one. For the - final one, print a newline to preserve the finalized transcription. - """ - responses = (r for r in responses if ( - r.results and r.results[0].alternatives)) - - num_chars_printed = 0 - for response in responses: - if not response.results: - continue - - # The `results` list is consecutive. For streaming, we only care about - # the first result being considered, since once it's `is_final`, it - # moves on to considering the next utterance. - result = response.results[0] - if not result.alternatives: - continue - - # Display the transcription of the top alternative. - top_alternative = result.alternatives[0] - transcript = top_alternative.transcript - - # Display interim results, but with a carriage return at the end of the - # line, so subsequent lines will overwrite them. - # - # If the previous result was longer than this one, we need to print - # some extra spaces to overwrite the previous result - overwrite_chars = ' ' * (num_chars_printed - len(transcript)) - - if not result.is_final: - sys.stdout.write(transcript + overwrite_chars + '\r') - sys.stdout.flush() - - num_chars_printed = len(transcript) - else: - print(transcript + overwrite_chars) - - # Exit recognition if any of the transcribed phrases could be - # one of our keywords. - if re.search(r'\b(exit|quit)\b', transcript, re.I): - print('Exiting..') - stream.closed = True - break - - num_chars_printed = 0 - - -def main(): - client = speech.SpeechClient() - config = speech.types.RecognitionConfig( - encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, - sample_rate_hertz=SAMPLE_RATE, - language_code='en-US', - max_alternatives=1, - enable_word_time_offsets=True) - streaming_config = speech.types.StreamingRecognitionConfig( - config=config, - interim_results=True) - - mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE) - - print('Say "Quit" or "Exit" to terminate the program.') - - with mic_manager as stream: - while not stream.closed: - audio_generator = stream.generator() - requests = (speech.types.StreamingRecognizeRequest( - audio_content=content) - for content in audio_generator) - - responses = client.streaming_recognize(streaming_config, - requests) - # Now, put the transcription responses to use. - listen_print_loop(responses, stream) - - -if __name__ == '__main__': - main() -# [END speech_transcribe_infinite_streaming] diff --git a/samples/snippets/transcribe_streaming_mic.py b/samples/snippets/transcribe_streaming_mic.py deleted file mode 100644 index 3ca7b709..00000000 --- a/samples/snippets/transcribe_streaming_mic.py +++ /dev/null @@ -1,192 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Google Cloud Speech API sample application using the streaming API. - -NOTE: This module requires the additional dependency `pyaudio`. To install -using pip: - - pip install pyaudio - -Example usage: - python transcribe_streaming_mic.py -""" - -# [START speech_transcribe_streaming_mic] -from __future__ import division - -import re -import sys - -from google.cloud import speech -from google.cloud.speech import enums -from google.cloud.speech import types -import pyaudio -from six.moves import queue - -# Audio recording parameters -RATE = 16000 -CHUNK = int(RATE / 10) # 100ms - - -class MicrophoneStream(object): - """Opens a recording stream as a generator yielding the audio chunks.""" - def __init__(self, rate, chunk): - self._rate = rate - self._chunk = chunk - - # Create a thread-safe buffer of audio data - self._buff = queue.Queue() - self.closed = True - - def __enter__(self): - self._audio_interface = pyaudio.PyAudio() - self._audio_stream = self._audio_interface.open( - format=pyaudio.paInt16, - # The API currently only supports 1-channel (mono) audio - # https://goo.gl/z757pE - channels=1, rate=self._rate, - input=True, frames_per_buffer=self._chunk, - # Run the audio stream asynchronously to fill the buffer object. - # This is necessary so that the input device's buffer doesn't - # overflow while the calling thread makes network requests, etc. - stream_callback=self._fill_buffer, - ) - - self.closed = False - - return self - - def __exit__(self, type, value, traceback): - self._audio_stream.stop_stream() - self._audio_stream.close() - self.closed = True - # Signal the generator to terminate so that the client's - # streaming_recognize method will not block the process termination. - self._buff.put(None) - self._audio_interface.terminate() - - def _fill_buffer(self, in_data, frame_count, time_info, status_flags): - """Continuously collect data from the audio stream, into the buffer.""" - self._buff.put(in_data) - return None, pyaudio.paContinue - - def generator(self): - while not self.closed: - # Use a blocking get() to ensure there's at least one chunk of - # data, and stop iteration if the chunk is None, indicating the - # end of the audio stream. - chunk = self._buff.get() - if chunk is None: - return - data = [chunk] - - # Now consume whatever other data's still buffered. - while True: - try: - chunk = self._buff.get(block=False) - if chunk is None: - return - data.append(chunk) - except queue.Empty: - break - - yield b''.join(data) - - -def listen_print_loop(responses): - """Iterates through server responses and prints them. - - The responses passed is a generator that will block until a response - is provided by the server. - - Each response may contain multiple results, and each result may contain - multiple alternatives; for details, see https://goo.gl/tjCPAU. Here we - print only the transcription for the top alternative of the top result. - - In this case, responses are provided for interim results as well. If the - response is an interim one, print a line feed at the end of it, to allow - the next result to overwrite it, until the response is a final one. For the - final one, print a newline to preserve the finalized transcription. - """ - num_chars_printed = 0 - for response in responses: - if not response.results: - continue - - # The `results` list is consecutive. For streaming, we only care about - # the first result being considered, since once it's `is_final`, it - # moves on to considering the next utterance. - result = response.results[0] - if not result.alternatives: - continue - - # Display the transcription of the top alternative. - transcript = result.alternatives[0].transcript - - # Display interim results, but with a carriage return at the end of the - # line, so subsequent lines will overwrite them. - # - # If the previous result was longer than this one, we need to print - # some extra spaces to overwrite the previous result - overwrite_chars = ' ' * (num_chars_printed - len(transcript)) - - if not result.is_final: - sys.stdout.write(transcript + overwrite_chars + '\r') - sys.stdout.flush() - - num_chars_printed = len(transcript) - - else: - print(transcript + overwrite_chars) - - # Exit recognition if any of the transcribed phrases could be - # one of our keywords. - if re.search(r'\b(exit|quit)\b', transcript, re.I): - print('Exiting..') - break - - num_chars_printed = 0 - - -def main(): - # See http://g.co/cloud/speech/docs/languages - # for a list of supported languages. - language_code = 'en-US' # a BCP-47 language tag - - client = speech.SpeechClient() - config = types.RecognitionConfig( - encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, - sample_rate_hertz=RATE, - language_code=language_code) - streaming_config = types.StreamingRecognitionConfig( - config=config, - interim_results=True) - - with MicrophoneStream(RATE, CHUNK) as stream: - audio_generator = stream.generator() - requests = (types.StreamingRecognizeRequest(audio_content=content) - for content in audio_generator) - - responses = client.streaming_recognize(streaming_config, requests) - - # Now, put the transcription responses to use. - listen_print_loop(responses) - - -if __name__ == '__main__': - main() -# [END speech_transcribe_streaming_mic] diff --git a/samples/snippets/transcribe_streaming_mic_test.py b/samples/snippets/transcribe_streaming_mic_test.py deleted file mode 100644 index dd5e7ea6..00000000 --- a/samples/snippets/transcribe_streaming_mic_test.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright 2017, Google, Inc. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import re -import threading -import time - -import mock - -RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') - - -class MockPyAudio(object): - def __init__(self, audio_filename): - self.audio_filename = audio_filename - - def __call__(self, *args): - return self - - def open(self, stream_callback, rate, *args, **kwargs): - self.rate = rate - self.closed = threading.Event() - self.stream_thread = threading.Thread( - target=self.stream_audio, args=( - self.audio_filename, stream_callback, self.closed)) - self.stream_thread.start() - return self - - def close(self): - self.closed.set() - - def stop_stream(self): - pass - - def terminate(self): - pass - - def stream_audio(self, audio_filename, callback, closed, num_frames=512): - with open(audio_filename, 'rb') as audio_file: - while not closed.is_set(): - # Approximate realtime by sleeping for the appropriate time for - # the requested number of frames - time.sleep(num_frames / float(self.rate)) - # audio is 16-bit samples, whereas python byte is 8-bit - num_bytes = 2 * num_frames - chunk = audio_file.read(num_bytes) or b'\0' * num_bytes - callback(chunk, None, None, None) - - -@mock.patch.dict('sys.modules', pyaudio=mock.MagicMock( - PyAudio=MockPyAudio(os.path.join(RESOURCES, 'quit.raw')))) -def test_main(capsys): - import transcribe_streaming_mic - - transcribe_streaming_mic.main() - out, err = capsys.readouterr() - - assert re.search(r'quit', out, re.DOTALL | re.I)