diff --git a/speech/api/grpc_auth.py b/speech/api/grpc_auth.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/speech/api/requirements-speech_grpc.txt b/speech/api/requirements-speech_grpc.txt index 65e0755501df..443a444c5dea 100644 --- a/speech/api/requirements-speech_grpc.txt +++ b/speech/api/requirements-speech_grpc.txt @@ -1,4 +1,4 @@ gcloud==0.17.0 grpcio==0.14.0 PyAudio==0.2.9 -grpc-google-cloud-speech==1.0.4 +grpc-google-cloud-speech-v1beta1==1.0.0 diff --git a/speech/api/speech_gcs.py b/speech/api/speech_gcs.py index b25956c50ae7..8aa0aeb72c0f 100644 --- a/speech/api/speech_gcs.py +++ b/speech/api/speech_gcs.py @@ -18,7 +18,7 @@ import argparse from gcloud.credentials import get_credentials -from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech +from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech from grpc.beta import implementations # Keep the request alive for this many seconds @@ -48,25 +48,23 @@ def make_channel(host, port): return implementations.secure_channel(host, port, composite_channel) -def main(input_uri, output_uri, encoding, sample_rate): +def main(input_uri, encoding, sample_rate): service = cloud_speech.beta_create_Speech_stub( make_channel('speech.googleapis.com', 443)) # The method and parameters can be inferred from the proto from which the # grpc client lib was generated. See: - # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto - response = service.NonStreamingRecognize(cloud_speech.RecognizeRequest( - initial_request=cloud_speech.InitialRecognizeRequest( + # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto + response = service.SyncRecognize(cloud_speech.SyncRecognizeRequest( + config=cloud_speech.RecognitionConfig( encoding=encoding, sample_rate=sample_rate, - output_uri=output_uri, ), - audio_request=cloud_speech.AudioRequest( + audio=cloud_speech.RecognitionAudio( uri=input_uri, ) ), DEADLINE_SECS) - # This shouldn't actually print anything, since the transcription is output - # to the GCS uri specified - print(response.responses) + # Print the recognition results. + print(response.results) def _gcs_uri(text): @@ -77,11 +75,10 @@ def _gcs_uri(text): PROTO_URL = ('https://github.com/googleapis/googleapis/blob/master/' - 'google/cloud/speech/v1/cloud_speech.proto') + 'google/cloud/speech/v1beta1/cloud_speech.proto') if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('input_uri', type=_gcs_uri) - parser.add_argument('output_uri', type=_gcs_uri) parser.add_argument( '--encoding', default='FLAC', choices=[ 'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'], @@ -89,4 +86,4 @@ def _gcs_uri(text): parser.add_argument('--sample_rate', default=16000) args = parser.parse_args() - main(args.input_uri, args.output_uri, args.encoding, args.sample_rate) + main(args.input_uri, args.encoding, args.sample_rate) diff --git a/speech/api/speech_gcs_test.py b/speech/api/speech_gcs_test.py index 7f03ede18ebf..56f2b4a42d3e 100644 --- a/speech/api/speech_gcs_test.py +++ b/speech/api/speech_gcs_test.py @@ -11,6 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re import sys import pytest @@ -24,12 +25,11 @@ 'https://github.com/grpc/grpc/issues/282')) def test_main(cloud_config, capsys): input_uri = 'gs://{}/speech/audio.flac'.format(cloud_config.storage_bucket) - output_uri = 'gs://{}/speech/audio.txt'.format(cloud_config.storage_bucket) - main(input_uri, output_uri, 'FLAC', 16000) + main(input_uri, 'FLAC', 16000) out, err = capsys.readouterr() - assert '[]\n' == out + assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) def test_gcs_uri(): diff --git a/speech/api/speech_rest.py b/speech/api/speech_rest.py index 6ab5160aa8e7..e6f83fb4a6e4 100644 --- a/speech/api/speech_rest.py +++ b/speech/api/speech_rest.py @@ -40,7 +40,7 @@ def get_speech_service(): credentials.authorize(http) return discovery.build( - 'speech', 'v1', http=http, discoveryServiceUrl=DISCOVERY_URL) + 'speech', 'v1beta1', http=http, discoveryServiceUrl=DISCOVERY_URL) # [END authenticating] @@ -57,13 +57,13 @@ def main(speech_file): speech_content = base64.b64encode(speech.read()) service = get_speech_service() - service_request = service.speech().recognize( + service_request = service.speech().syncrecognize( body={ - 'initialRequest': { + 'config': { 'encoding': 'LINEAR16', 'sampleRate': 16000 }, - 'audioRequest': { + 'audio': { 'content': speech_content.decode('UTF-8') } }) diff --git a/speech/api/speech_streaming.py b/speech/api/speech_streaming.py index 6ecac2e2804f..560b072990fa 100644 --- a/speech/api/speech_streaming.py +++ b/speech/api/speech_streaming.py @@ -14,12 +14,14 @@ # limitations under the License. """Sample that streams audio to the Google Cloud Speech API via GRPC.""" +from __future__ import division + import contextlib import re import threading from gcloud.credentials import get_credentials -from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech +from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech from google.rpc import code_pb2 from grpc.beta import implementations import pyaudio @@ -27,7 +29,7 @@ # Audio recording parameters RATE = 16000 CHANNELS = 1 -CHUNK = RATE // 10 # 100ms +CHUNK = int(RATE / 10) # 100ms # Keep the request alive for this many seconds DEADLINE_SECS = 8 * 60 * 60 @@ -43,15 +45,15 @@ def make_channel(host, port): creds = get_credentials().create_scoped([SPEECH_SCOPE]) # Add a plugin to inject the creds into the header auth_header = ( - 'Authorization', - 'Bearer ' + creds.get_access_token().access_token) + 'Authorization', + 'Bearer ' + creds.get_access_token().access_token) auth_plugin = implementations.metadata_call_credentials( - lambda _, cb: cb([auth_header], None), - name='google_creds') + lambda _, cb: cb([auth_header], None), + name='google_creds') # compose the two together for both ssl and google auth composite_channel = implementations.composite_channel_credentials( - ssl_channel, auth_plugin) + ssl_channel, auth_plugin) return implementations.secure_channel(host, port, composite_channel) @@ -75,7 +77,8 @@ def record_audio(channels, rate, chunk): def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK): - """Yields `RecognizeRequest`s constructed from a recording audio stream. + """Yields `StreamingRecognizeRequest`s constructed from a recording audio + stream. Args: stop_audio: A threading.Event object stops the recording when set. @@ -83,33 +86,31 @@ def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK): rate: The sampling rate. chunk: Buffer audio into chunks of this size before sending to the api. """ - with record_audio(channels, rate, chunk) as audio_stream: - # The initial request must contain metadata about the stream, so the - # server knows how to interpret it. - metadata = cloud_speech.InitialRecognizeRequest( - encoding='LINEAR16', sample_rate=rate, - # Note that setting interim_results to True means that you'll - # likely get multiple results for the same bit of audio, as the - # system re-interprets audio in the context of subsequent audio. - # However, this will give us quick results without having to tell - # the server when to finalize a piece of audio. - interim_results=True, continuous=False, - ) - data = audio_stream.read(chunk) - audio_request = cloud_speech.AudioRequest(content=data) - - yield cloud_speech.RecognizeRequest( - initial_request=metadata, - audio_request=audio_request) + # The initial request must contain metadata about the stream, so the + # server knows how to interpret it. + recognition_config = cloud_speech.RecognitionConfig( + encoding='LINEAR16', sample_rate=rate) + streaming_config = cloud_speech.StreamingRecognitionConfig( + config=recognition_config, + # Note that setting interim_results to True means that you'll likely + # get multiple results for the same bit of audio, as the system + # re-interprets audio in the context of subsequent audio. However, this + # will give us quick results without having to tell the server when to + # finalize a piece of audio. + interim_results=True, single_utterance=True + ) + + yield cloud_speech.StreamingRecognizeRequest( + streaming_config=streaming_config) + with record_audio(channels, rate, chunk) as audio_stream: while not stop_audio.is_set(): data = audio_stream.read(chunk) if not data: raise StopIteration() - # Subsequent requests can all just have the content - audio_request = cloud_speech.AudioRequest(content=data) - yield cloud_speech.RecognizeRequest(audio_request=audio_request) + # Subsequent requests can all just have the content + yield cloud_speech.StreamingRecognizeRequest(audio_content=data) def listen_print_loop(recognize_stream): @@ -136,7 +137,8 @@ def main(): make_channel('speech.googleapis.com', 443)) as service: try: listen_print_loop( - service.Recognize(request_stream(stop_audio), DEADLINE_SECS)) + service.StreamingRecognize( + request_stream(stop_audio), DEADLINE_SECS)) finally: # Stop the request stream once we're done with the loop - otherwise # it'll keep going in the thread that the grpc lib makes for it..