diff --git a/.travis.yml b/.travis.yml index 8b43097f2c0b..0566d6b8ab6c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,8 +16,13 @@ env: - GOOGLE_CLIENT_SECRETS=${TRAVIS_BUILD_DIR}/testing/resources/client-secrets.json - GAE_ROOT=${HOME}/.cache/ - secure: Orp9Et2TIwCG/Hf59aa0NUDF1pNcwcS4TFulXX175918cFREOzf/cNZNg+Ui585ZRFjbifZdc858tVuCVd8XlxQPXQgp7bwB7nXs3lby3LYg4+HD83Gaz7KOWxRLWVor6IVn8OxeCzwl6fJkdmffsTTO9csC4yZ7izHr+u7hiO4= +addons: + apt: + packages: + - portaudio19-dev before_install: - pip install --upgrade pip wheel virtualenv + # for speech api sample - openssl aes-256-cbc -k "$secrets_password" -in secrets.tar.enc -out secrets.tar -d - tar xvf secrets.tar install: diff --git a/nox.py b/nox.py index 6772a97d4bff..c330541ff2a1 100644 --- a/nox.py +++ b/nox.py @@ -86,6 +86,11 @@ def session_tests(session, interpreter, extra_pytest_args=None): # allows users to run a particular test instead of all of them. for sample in (session.posargs or collect_sample_dirs('.', SESSION_TESTS_BLACKLIST)): + # Install additional dependencies if they exist + dirname = sample if os.path.isdir(sample) else os.path.dirname(sample) + for reqfile in list_files(dirname, 'requirements*.txt'): + session.install('-r', reqfile) + session.run( 'py.test', sample, *pytest_args, diff --git a/speech/api/README.md b/speech/api/README.md index 3179f0d71076..e0c31dab2877 100644 --- a/speech/api/README.md +++ b/speech/api/README.md @@ -37,10 +37,36 @@ See the [Cloud Platform Auth Guide](https://cloud.google.com/docs/authentication#developer_workflow) for more information. +### Install the dependencies + +* If you're running the `speechrest.py` sample: + + ```sh + $ pip install requirements-speechrest.txt + ``` + +* If you're running the `speech_streaming.py` sample: + + ```sh + $ pip install requirements-speech_streaming.txt + ``` + ## Run the example -```sh -$ python speechrest.py resources/audio.raw -``` +* To run the `speechrest.py` sample: + + ```sh + $ python speechrest.py resources/audio.raw + ``` + + You should see a response with the transcription result. + +* To run the `speech_streaming.py` sample: + + ```sh + $ python speech_streaming.py + ``` -You should see a response with the transcription result. + The sample will run in a continuous loop, printing the data and metadata + it receives from the Speech API, which includes alternative transcriptions + of what it hears, and a confidence score. Say "exit" to exit the loop. diff --git a/speech/api/requirements-speech_streaming.txt b/speech/api/requirements-speech_streaming.txt new file mode 100644 index 000000000000..8240e5fd7f15 --- /dev/null +++ b/speech/api/requirements-speech_streaming.txt @@ -0,0 +1,4 @@ +gcloud==0.12.0 +grpcio==0.13.1 +PyAudio==0.2.9 +grpc-google-cloud-speech==1.0.0 diff --git a/speech/api/requirements-speechrest.txt b/speech/api/requirements-speechrest.txt new file mode 100644 index 000000000000..c3b2784ce876 --- /dev/null +++ b/speech/api/requirements-speechrest.txt @@ -0,0 +1 @@ +google-api-python-client==1.5.0 diff --git a/speech/api/resources/quit.raw b/speech/api/resources/quit.raw new file mode 100644 index 000000000000..a01dfc45a597 Binary files /dev/null and b/speech/api/resources/quit.raw differ diff --git a/speech/api/speech_streaming.py b/speech/api/speech_streaming.py new file mode 100644 index 000000000000..a860e37bf931 --- /dev/null +++ b/speech/api/speech_streaming.py @@ -0,0 +1,120 @@ +#!/usr/bin/python + +import contextlib +import threading + +from gcloud.credentials import get_credentials +from google.cloud.speech.v1.cloud_speech_pb2 import * # noqa +from google.rpc import code_pb2 +from grpc.beta import implementations +import pyaudio + +# Audio recording parameters +RATE = 16000 +CHANNELS = 1 +CHUNK = RATE // 10 # 100ms + +# Keep the request alive for this many seconds +DEADLINE_SECS = 8 * 60 * 60 +SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform' + + +def make_channel(host, port): + """Creates an SSL channel with auth credentials from the environment.""" + # In order to make an https call, use an ssl channel with defaults + ssl_channel = implementations.ssl_channel_credentials(None, None, None) + + # Grab application default credentials from the environment + creds = get_credentials().create_scoped([SPEECH_SCOPE]) + # Add a plugin to inject the creds into the header + auth_header = ( + 'Authorization', + 'Bearer ' + creds.get_access_token().access_token) + auth_plugin = implementations.metadata_call_credentials( + lambda _, cb: cb([auth_header], None), + name='google_creds') + + # compose the two together for both ssl and google auth + composite_channel = implementations.composite_channel_credentials( + ssl_channel, auth_plugin) + + return implementations.secure_channel(host, port, composite_channel) + + +@contextlib.contextmanager +def record_audio(channels, rate, chunk): + """Opens a recording stream in a context manager.""" + audio_interface = pyaudio.PyAudio() + audio_stream = audio_interface.open( + format=pyaudio.paInt16, channels=channels, rate=rate, + input=True, frames_per_buffer=chunk, + ) + + yield audio_stream + + audio_stream.stop_stream() + audio_stream.close() + audio_interface.terminate() + + +def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK): + """Yields `RecognizeRequest`s constructed from a recording audio stream. + + Args: + stop_audio: A threading.Event object stops the recording when set. + channels: How many audio channels to record. + rate: The sampling rate. + chunk: Buffer audio into chunks of this size before sending to the api. + """ + with record_audio(channels, rate, chunk) as audio_stream: + # The initial request must contain metadata about the stream, so the + # server knows how to interpret it. + metadata = InitialRecognizeRequest( + encoding='LINEAR16', sample_rate=rate) + audio_request = AudioRequest(content=audio_stream.read(chunk)) + + yield RecognizeRequest( + initial_request=metadata, + audio_request=audio_request) + + while not stop_audio.is_set(): + # Subsequent requests can all just have the content + audio_request = AudioRequest(content=audio_stream.read(chunk)) + + yield RecognizeRequest(audio_request=audio_request) + + +def listen_print_loop(recognize_stream): + for resp in recognize_stream: + if resp.error.code != code_pb2.OK: + raise RuntimeError('Server error: ' + resp.error.message) + + # Display the transcriptions & their alternatives + for result in resp.results: + print(result.alternatives) + + # Exit recognition if any of the transcribed phrases could be + # one of our keywords. + if any(alt.confidence > .5 and + (alt.transcript.strip() in ('exit', 'quit')) + for result in resp.results + for alt in result.alternatives): + print('Exiting..') + return + + +def main(): + stop_audio = threading.Event() + with beta_create_Speech_stub( + make_channel('speech.googleapis.com', 443)) as service: + try: + listen_print_loop( + service.Recognize(request_stream(stop_audio), DEADLINE_SECS)) + finally: + # Stop the request stream once we're done with the loop - otherwise + # it'll keep going in the thread that the grpc lib makes for it.. + stop_audio.set() + + +if __name__ == '__main__': + main() diff --git a/speech/api/speech_streaming_test.py b/speech/api/speech_streaming_test.py new file mode 100644 index 000000000000..e44e613ea437 --- /dev/null +++ b/speech/api/speech_streaming_test.py @@ -0,0 +1,67 @@ +# Copyright 2016, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import io +import re +import sys + +import pytest + +import speech_streaming + + +class MockAudioStream(object): + def __init__(self, audio_filename, trailing_silence_secs=10): + self.audio_filename = audio_filename + self.silence = io.BytesIO('\0\0' * speech_streaming.RATE * + trailing_silence_secs) + + def __enter__(self): + self.audio_file = open(self.audio_filename) + return self + + def __exit__(self, *args): + self.audio_file.close() + + def __call__(self, *args): + return self + + def read(self, num_frames): + # audio is 16-bit samples, whereas python byte is 8-bit + num_bytes = 2 * num_frames + chunk = self.audio_file.read(num_bytes) or self.silence.read(num_bytes) + return chunk + + +def mock_audio_stream(filename): + @contextlib.contextmanager + def mock_audio_stream(channels, rate, chunk): + with open(filename, 'rb') as audio_file: + yield audio_file + + return mock_audio_stream + + +@pytest.mark.skipif( + sys.version_info >= (3, 0), reason="can't get grpc lib to work in python3") +def test_main(resource, monkeypatch, capsys): + monkeypatch.setattr( + speech_streaming, 'record_audio', + mock_audio_stream(resource('quit.raw'))) + monkeypatch.setattr(speech_streaming, 'DEADLINE_SECS', 5) + + speech_streaming.main() + out, err = capsys.readouterr() + + assert re.search(r'transcript.*"quit"', out, re.DOTALL | re.I)