Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changing cloud speech code samples to work with v1beta1 #399

Merged
merged 4 commits into from
Jul 1, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file removed speech/api/grpc_auth.py
Empty file.
2 changes: 1 addition & 1 deletion speech/api/requirements-speech_grpc.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
gcloud==0.17.0
grpcio==0.14.0
PyAudio==0.2.9
grpc-google-cloud-speech==1.0.4
grpc-google-cloud-speech-v1beta1==1.0.0
23 changes: 10 additions & 13 deletions speech/api/speech_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import argparse

from gcloud.credentials import get_credentials
from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
from grpc.beta import implementations

# Keep the request alive for this many seconds
Expand Down Expand Up @@ -48,25 +48,23 @@ def make_channel(host, port):
return implementations.secure_channel(host, port, composite_channel)


def main(input_uri, output_uri, encoding, sample_rate):
def main(input_uri, encoding, sample_rate):
service = cloud_speech.beta_create_Speech_stub(
make_channel('speech.googleapis.com', 443))
# The method and parameters can be inferred from the proto from which the
# grpc client lib was generated. See:
# https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto
response = service.NonStreamingRecognize(cloud_speech.RecognizeRequest(
initial_request=cloud_speech.InitialRecognizeRequest(
# https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
response = service.SyncRecognize(cloud_speech.SyncRecognizeRequest(
config=cloud_speech.RecognitionConfig(
encoding=encoding,
sample_rate=sample_rate,
output_uri=output_uri,
),
audio_request=cloud_speech.AudioRequest(
audio=cloud_speech.RecognitionAudio(
uri=input_uri,
)
), DEADLINE_SECS)
# This shouldn't actually print anything, since the transcription is output
# to the GCS uri specified
print(response.responses)
# Print the recognition results.
print(response.results)


def _gcs_uri(text):
Expand All @@ -77,16 +75,15 @@ def _gcs_uri(text):


PROTO_URL = ('https://github.com/googleapis/googleapis/blob/master/'
'google/cloud/speech/v1/cloud_speech.proto')
'google/cloud/speech/v1beta1/cloud_speech.proto')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('input_uri', type=_gcs_uri)
parser.add_argument('output_uri', type=_gcs_uri)
parser.add_argument(
'--encoding', default='FLAC', choices=[
'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main point of this sample was to demonstrate having an output uri on gcs (thus '_gcs' in the filename). Is that not supported anymore?

help='How the audio file is encoded. See {}#L67'.format(PROTO_URL))
parser.add_argument('--sample_rate', default=16000)

args = parser.parse_args()
main(args.input_uri, args.output_uri, args.encoding, args.sample_rate)
main(args.input_uri, args.encoding, args.sample_rate)
6 changes: 3 additions & 3 deletions speech/api/speech_gcs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import re
import sys

import pytest
Expand All @@ -24,12 +25,11 @@
'https://github.com/grpc/grpc/issues/282'))
def test_main(cloud_config, capsys):
input_uri = 'gs://{}/speech/audio.flac'.format(cloud_config.storage_bucket)
output_uri = 'gs://{}/speech/audio.txt'.format(cloud_config.storage_bucket)

main(input_uri, output_uri, 'FLAC', 16000)
main(input_uri, 'FLAC', 16000)

out, err = capsys.readouterr()
assert '[]\n' == out
assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)


def test_gcs_uri():
Expand Down
8 changes: 4 additions & 4 deletions speech/api/speech_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def get_speech_service():
credentials.authorize(http)

return discovery.build(
'speech', 'v1', http=http, discoveryServiceUrl=DISCOVERY_URL)
'speech', 'v1beta1', http=http, discoveryServiceUrl=DISCOVERY_URL)
# [END authenticating]


Expand All @@ -57,13 +57,13 @@ def main(speech_file):
speech_content = base64.b64encode(speech.read())

service = get_speech_service()
service_request = service.speech().recognize(
service_request = service.speech().syncrecognize(
body={
'initialRequest': {
'config': {
'encoding': 'LINEAR16',
'sampleRate': 16000
},
'audioRequest': {
'audio': {
'content': speech_content.decode('UTF-8')
}
})
Expand Down
62 changes: 32 additions & 30 deletions speech/api/speech_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,22 @@
# limitations under the License.
"""Sample that streams audio to the Google Cloud Speech API via GRPC."""

from __future__ import division

import contextlib
import re
import threading

from gcloud.credentials import get_credentials
from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
from google.rpc import code_pb2
from grpc.beta import implementations
import pyaudio

# Audio recording parameters
RATE = 16000
CHANNELS = 1
CHUNK = RATE // 10 # 100ms
CHUNK = int(RATE / 10) # 100ms

# Keep the request alive for this many seconds
DEADLINE_SECS = 8 * 60 * 60
Expand All @@ -43,15 +45,15 @@ def make_channel(host, port):
creds = get_credentials().create_scoped([SPEECH_SCOPE])
# Add a plugin to inject the creds into the header
auth_header = (
'Authorization',
'Bearer ' + creds.get_access_token().access_token)
'Authorization',
'Bearer ' + creds.get_access_token().access_token)
auth_plugin = implementations.metadata_call_credentials(
lambda _, cb: cb([auth_header], None),
name='google_creds')
lambda _, cb: cb([auth_header], None),
name='google_creds')

# compose the two together for both ssl and google auth
composite_channel = implementations.composite_channel_credentials(
ssl_channel, auth_plugin)
ssl_channel, auth_plugin)

return implementations.secure_channel(host, port, composite_channel)

Expand All @@ -75,41 +77,40 @@ def record_audio(channels, rate, chunk):


def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
"""Yields `RecognizeRequest`s constructed from a recording audio stream.
"""Yields `StreamingRecognizeRequest`s constructed from a recording audio
stream.
Args:
stop_audio: A threading.Event object stops the recording when set.
channels: How many audio channels to record.
rate: The sampling rate.
chunk: Buffer audio into chunks of this size before sending to the api.
"""
with record_audio(channels, rate, chunk) as audio_stream:
# The initial request must contain metadata about the stream, so the
# server knows how to interpret it.
metadata = cloud_speech.InitialRecognizeRequest(
encoding='LINEAR16', sample_rate=rate,
# Note that setting interim_results to True means that you'll
# likely get multiple results for the same bit of audio, as the
# system re-interprets audio in the context of subsequent audio.
# However, this will give us quick results without having to tell
# the server when to finalize a piece of audio.
interim_results=True, continuous=False,
)
data = audio_stream.read(chunk)
audio_request = cloud_speech.AudioRequest(content=data)

yield cloud_speech.RecognizeRequest(
initial_request=metadata,
audio_request=audio_request)
# The initial request must contain metadata about the stream, so the
# server knows how to interpret it.
recognition_config = cloud_speech.RecognitionConfig(
encoding='LINEAR16', sample_rate=rate)
streaming_config = cloud_speech.StreamingRecognitionConfig(
config=recognition_config,
# Note that setting interim_results to True means that you'll likely
# get multiple results for the same bit of audio, as the system
# re-interprets audio in the context of subsequent audio. However, this
# will give us quick results without having to tell the server when to
# finalize a piece of audio.
interim_results=True, single_utterance=True
)

yield cloud_speech.StreamingRecognizeRequest(
streaming_config=streaming_config)

with record_audio(channels, rate, chunk) as audio_stream:
while not stop_audio.is_set():
data = audio_stream.read(chunk)
if not data:
raise StopIteration()
# Subsequent requests can all just have the content
audio_request = cloud_speech.AudioRequest(content=data)

yield cloud_speech.RecognizeRequest(audio_request=audio_request)
# Subsequent requests can all just have the content
yield cloud_speech.StreamingRecognizeRequest(audio_content=data)


def listen_print_loop(recognize_stream):
Expand All @@ -136,7 +137,8 @@ def main():
make_channel('speech.googleapis.com', 443)) as service:
try:
listen_print_loop(
service.Recognize(request_stream(stop_audio), DEADLINE_SECS))
service.StreamingRecognize(
request_stream(stop_audio), DEADLINE_SECS))
finally:
# Stop the request stream once we're done with the loop - otherwise
# it'll keep going in the thread that the grpc lib makes for it..
Expand Down