Skip to content

Commit

Permalink
Changing cloud speech code samples to work with v1beta1 (#399)
Browse files Browse the repository at this point in the history
  • Loading branch information
xinjiez authored and Jon Wayne Parrott committed Jul 1, 2016
1 parent 15b7063 commit bfef34d
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 51 deletions.
Empty file removed speech/api/grpc_auth.py
Empty file.
2 changes: 1 addition & 1 deletion speech/api/requirements-speech_grpc.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
gcloud==0.17.0
grpcio==0.14.0
PyAudio==0.2.9
grpc-google-cloud-speech==1.0.4
grpc-google-cloud-speech-v1beta1==1.0.0
23 changes: 10 additions & 13 deletions speech/api/speech_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import argparse

from gcloud.credentials import get_credentials
from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
from grpc.beta import implementations

# Keep the request alive for this many seconds
Expand Down Expand Up @@ -48,25 +48,23 @@ def make_channel(host, port):
return implementations.secure_channel(host, port, composite_channel)


def main(input_uri, output_uri, encoding, sample_rate):
def main(input_uri, encoding, sample_rate):
service = cloud_speech.beta_create_Speech_stub(
make_channel('speech.googleapis.com', 443))
# The method and parameters can be inferred from the proto from which the
# grpc client lib was generated. See:
# https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto
response = service.NonStreamingRecognize(cloud_speech.RecognizeRequest(
initial_request=cloud_speech.InitialRecognizeRequest(
# https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
response = service.SyncRecognize(cloud_speech.SyncRecognizeRequest(
config=cloud_speech.RecognitionConfig(
encoding=encoding,
sample_rate=sample_rate,
output_uri=output_uri,
),
audio_request=cloud_speech.AudioRequest(
audio=cloud_speech.RecognitionAudio(
uri=input_uri,
)
), DEADLINE_SECS)
# This shouldn't actually print anything, since the transcription is output
# to the GCS uri specified
print(response.responses)
# Print the recognition results.
print(response.results)


def _gcs_uri(text):
Expand All @@ -77,16 +75,15 @@ def _gcs_uri(text):


PROTO_URL = ('https://github.com/googleapis/googleapis/blob/master/'
'google/cloud/speech/v1/cloud_speech.proto')
'google/cloud/speech/v1beta1/cloud_speech.proto')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('input_uri', type=_gcs_uri)
parser.add_argument('output_uri', type=_gcs_uri)
parser.add_argument(
'--encoding', default='FLAC', choices=[
'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'],
help='How the audio file is encoded. See {}#L67'.format(PROTO_URL))
parser.add_argument('--sample_rate', default=16000)

args = parser.parse_args()
main(args.input_uri, args.output_uri, args.encoding, args.sample_rate)
main(args.input_uri, args.encoding, args.sample_rate)
6 changes: 3 additions & 3 deletions speech/api/speech_gcs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import re
import sys

import pytest
Expand All @@ -24,12 +25,11 @@
'https://github.com/grpc/grpc/issues/282'))
def test_main(cloud_config, capsys):
input_uri = 'gs://{}/speech/audio.flac'.format(cloud_config.storage_bucket)
output_uri = 'gs://{}/speech/audio.txt'.format(cloud_config.storage_bucket)

main(input_uri, output_uri, 'FLAC', 16000)
main(input_uri, 'FLAC', 16000)

out, err = capsys.readouterr()
assert '[]\n' == out
assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)


def test_gcs_uri():
Expand Down
8 changes: 4 additions & 4 deletions speech/api/speech_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def get_speech_service():
credentials.authorize(http)

return discovery.build(
'speech', 'v1', http=http, discoveryServiceUrl=DISCOVERY_URL)
'speech', 'v1beta1', http=http, discoveryServiceUrl=DISCOVERY_URL)
# [END authenticating]


Expand All @@ -57,13 +57,13 @@ def main(speech_file):
speech_content = base64.b64encode(speech.read())

service = get_speech_service()
service_request = service.speech().recognize(
service_request = service.speech().syncrecognize(
body={
'initialRequest': {
'config': {
'encoding': 'LINEAR16',
'sampleRate': 16000
},
'audioRequest': {
'audio': {
'content': speech_content.decode('UTF-8')
}
})
Expand Down
62 changes: 32 additions & 30 deletions speech/api/speech_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,22 @@
# limitations under the License.
"""Sample that streams audio to the Google Cloud Speech API via GRPC."""

from __future__ import division

import contextlib
import re
import threading

from gcloud.credentials import get_credentials
from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
from google.rpc import code_pb2
from grpc.beta import implementations
import pyaudio

# Audio recording parameters
RATE = 16000
CHANNELS = 1
CHUNK = RATE // 10 # 100ms
CHUNK = int(RATE / 10) # 100ms

# Keep the request alive for this many seconds
DEADLINE_SECS = 8 * 60 * 60
Expand All @@ -43,15 +45,15 @@ def make_channel(host, port):
creds = get_credentials().create_scoped([SPEECH_SCOPE])
# Add a plugin to inject the creds into the header
auth_header = (
'Authorization',
'Bearer ' + creds.get_access_token().access_token)
'Authorization',
'Bearer ' + creds.get_access_token().access_token)
auth_plugin = implementations.metadata_call_credentials(
lambda _, cb: cb([auth_header], None),
name='google_creds')
lambda _, cb: cb([auth_header], None),
name='google_creds')

# compose the two together for both ssl and google auth
composite_channel = implementations.composite_channel_credentials(
ssl_channel, auth_plugin)
ssl_channel, auth_plugin)

return implementations.secure_channel(host, port, composite_channel)

Expand All @@ -75,41 +77,40 @@ def record_audio(channels, rate, chunk):


def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
"""Yields `RecognizeRequest`s constructed from a recording audio stream.
"""Yields `StreamingRecognizeRequest`s constructed from a recording audio
stream.
Args:
stop_audio: A threading.Event object stops the recording when set.
channels: How many audio channels to record.
rate: The sampling rate.
chunk: Buffer audio into chunks of this size before sending to the api.
"""
with record_audio(channels, rate, chunk) as audio_stream:
# The initial request must contain metadata about the stream, so the
# server knows how to interpret it.
metadata = cloud_speech.InitialRecognizeRequest(
encoding='LINEAR16', sample_rate=rate,
# Note that setting interim_results to True means that you'll
# likely get multiple results for the same bit of audio, as the
# system re-interprets audio in the context of subsequent audio.
# However, this will give us quick results without having to tell
# the server when to finalize a piece of audio.
interim_results=True, continuous=False,
)
data = audio_stream.read(chunk)
audio_request = cloud_speech.AudioRequest(content=data)

yield cloud_speech.RecognizeRequest(
initial_request=metadata,
audio_request=audio_request)
# The initial request must contain metadata about the stream, so the
# server knows how to interpret it.
recognition_config = cloud_speech.RecognitionConfig(
encoding='LINEAR16', sample_rate=rate)
streaming_config = cloud_speech.StreamingRecognitionConfig(
config=recognition_config,
# Note that setting interim_results to True means that you'll likely
# get multiple results for the same bit of audio, as the system
# re-interprets audio in the context of subsequent audio. However, this
# will give us quick results without having to tell the server when to
# finalize a piece of audio.
interim_results=True, single_utterance=True
)

yield cloud_speech.StreamingRecognizeRequest(
streaming_config=streaming_config)

with record_audio(channels, rate, chunk) as audio_stream:
while not stop_audio.is_set():
data = audio_stream.read(chunk)
if not data:
raise StopIteration()
# Subsequent requests can all just have the content
audio_request = cloud_speech.AudioRequest(content=data)

yield cloud_speech.RecognizeRequest(audio_request=audio_request)
# Subsequent requests can all just have the content
yield cloud_speech.StreamingRecognizeRequest(audio_content=data)


def listen_print_loop(recognize_stream):
Expand All @@ -136,7 +137,8 @@ def main():
make_channel('speech.googleapis.com', 443)) as service:
try:
listen_print_loop(
service.Recognize(request_stream(stop_audio), DEADLINE_SECS))
service.StreamingRecognize(
request_stream(stop_audio), DEADLINE_SECS))
finally:
# Stop the request stream once we're done with the loop - otherwise
# it'll keep going in the thread that the grpc lib makes for it..
Expand Down

0 comments on commit bfef34d

Please sign in to comment.