Skip to content

Commit

Permalink
Add stability information to streaming results. Fixes #2702.
Browse files Browse the repository at this point in the history
  • Loading branch information
daspecster committed Nov 10, 2016
1 parent 1abfcbe commit 9f2c34e
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 32 deletions.
28 changes: 18 additions & 10 deletions docs/speech-usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,10 @@ speech data to possible text alternatives on the fly.
... sample = client.sample(content=stream,
... encoding=speech.Encoding.LINEAR16,
... sample_rate=16000)
... alternatives = list(client.streaming_recognize(sample))
>>> print(alternatives[0].transcript)
... results = list(client.streaming_recognize(sample))
>>> print(results[0].alternatives[0].transcript)
'hello'
>>> print(alternatives[0].confidence)
>>> print(results[0].alternatives[0].confidence)
0.973458576
Expand All @@ -196,10 +196,10 @@ See: `Single Utterance`_
... sample_rate=16000)
... responses = client.streaming_recognize(sample,
... single_utterance=True)
... alternatives = list(responses)
>>> print(alternatives[0].transcript)
... results = list(responses)
>>> print(results[0].alternatives[0].transcript)
hello
>>> print(alternatives[0].confidence)
>>> print(results[0].alternatives[0].confidence)
0.96523453546
Expand All @@ -214,20 +214,28 @@ If ``interim_results`` is set to :data:`True`, interim results
... sample = client.sample(content=stream,
... encoding=speech.Encoding.LINEAR16,
... sample_rate=16000)
... for alternatives in client.streaming_recognize(sample,
... interim_results=True):
... for results in client.streaming_recognize(sample,
... interim_results=True):
... print('=' * 20)
... print(alternatives[0].transcript)
... print(alternatives[0].confidence)
... print(results[0].alternatives[0].transcript)
... print(results[0].alternatives[0].confidence)
... print(results[0].is_final)
... print(results[0].stability)
====================
'he'
None
False
0.113245
====================
'hell'
None
False
0.132454
====================
'hello'
0.973458576
True
0.982345
.. _Single Utterance: https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#streamingrecognitionconfig
Expand Down
71 changes: 69 additions & 2 deletions speech/google/cloud/speech/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,7 @@ def streaming_recognize(self, sample, language_code=None,
for response in responses:
for result in response.results:
if result.is_final or interim_results:
yield [Alternative.from_pb(alternative)
for alternative in result.alternatives]
yield StreamingSpeechResult.from_pb(result)

def sync_recognize(self, sample, language_code=None,
max_alternatives=None, profanity_filter=None,
Expand Down Expand Up @@ -299,6 +298,74 @@ def sync_recognize(self, sample, language_code=None,
profanity_filter, speech_context)


class StreamingSpeechResult(object):
"""Streaming speech result representation.
:type alternatives: list
:param alternatives: List of protobuf speech alternatives.
:type is_final: bool
:param is_final: Boolean indicator of results finality.
:type stability: float
:param stability: 0.0-1.0 stability score for the results returned.
:rtype: :class:`~google.cloud.speech.client.StreamingSpeechResult`
:returns: Instance of ``StreamingSpeechResult``.
"""
def __init__(self, alternatives, is_final=False, stability=0.0):
self._alternatives = [Alternative.from_pb(alternative)
for alternative in alternatives]
self._is_final = is_final
self._stability = stability

@classmethod
def from_pb(cls, response):
"""Factory: construct instance of ``StreamingSpeechResult``.
:type response: :class:`~google.cloud.grpc.speech.v1beta1\
.cloud_speech_pb2.StreamingRecognizeResult`
:param response: Istance of ``StreamingRecognizeResult`` protobuf.
:rtype: :class:`~google.cloud.speech.client.StreamingSpeechResult`
:returns: Instance of ``StreamingSpeechResult``.
"""
alternatives = response.alternatives
is_final = response.is_final
stability = response.stability
return cls(alternatives=alternatives, is_final=is_final,
stability=stability)

@property
def alternatives(self):
"""List of alternative transcripts.
:rtype: list of :class:`~google.cloud.speech.alternative.Alternative`
:returns: List of ``Alternative`` instances.
"""
return self._alternatives

@property
def is_final(self):
"""Boolean indicator of result finality.
:rtype: bool
:returns: True if this result is final and no more processing will
occur. False if more processing can will be done and results
may change.
"""
return self._is_final

@property
def stability(self):
"""Result stability indicator.
:rtype: float
:returns: 0.0-1.0 value indicating the stability the currents results.
"""
return self._stability


class _JSONSpeechAPI(object):
"""Speech API for interacting with the JSON/REST version of the API.
Expand Down
38 changes: 27 additions & 11 deletions speech/unit_tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def _make_result(alternatives=()):
)


def _make_streaming_result(alternatives=(), is_final=True):
def _make_streaming_result(alternatives=(), is_final=True, stability=1.0):
from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2

return cloud_speech_pb2.StreamingRecognitionResult(
Expand All @@ -39,6 +39,7 @@ def _make_streaming_result(alternatives=(), is_final=True):
) for alternative in alternatives
],
is_final=is_final,
stability=stability,
)


Expand Down Expand Up @@ -476,6 +477,7 @@ def test_stream_recognize_interim_results(self):

from google.cloud.speech import _gax
from google.cloud.speech.encoding import Encoding
from google.cloud.speech.client import StreamingSpeechResult

stream = BytesIO(b'Some audio data...')
credentials = _Credentials()
Expand All @@ -491,11 +493,13 @@ def test_stream_recognize_interim_results(self):
'confidence': 0.0123456,
}]
first_response = _make_streaming_response(
_make_streaming_result([], is_final=False))
_make_streaming_result([], is_final=False, stability=0.122435))
second_response = _make_streaming_response(
_make_streaming_result(alternatives, is_final=False))
_make_streaming_result(alternatives, is_final=False,
stability=0.1432343))
last_response = _make_streaming_response(
_make_streaming_result(alternatives, is_final=True))
_make_streaming_result(alternatives, is_final=True,
stability=0.9834534))
responses = [first_response, second_response, last_response]

channel_args = []
Expand All @@ -521,15 +525,27 @@ def speech_api(channel=None):

results = list(client.streaming_recognize(sample,
interim_results=True))
self.assertEqual(results[0], [])
self.assertEqual(results[1][0].transcript,

self.assertIsInstance(results[0], StreamingSpeechResult)
self.assertEqual(results[0].alternatives, [])
self.assertFalse(results[0].is_final)
self.assertEqual(results[0].stability, 0.122435)
self.assertEqual(results[1].stability, 0.1432343)
self.assertFalse(results[1].is_final)
self.assertEqual(results[1].alternatives[0].transcript,
alternatives[0]['transcript'])
self.assertEqual(results[1][0].confidence,
self.assertEqual(results[1].alternatives[0].confidence,
alternatives[0]['confidence'])
self.assertEqual(results[1][1].transcript,
self.assertEqual(results[1].alternatives[1].transcript,
alternatives[1]['transcript'])
self.assertEqual(results[1][1].confidence,
self.assertEqual(results[1].alternatives[1].confidence,
alternatives[1]['confidence'])
self.assertTrue(results[2].is_final)
self.assertEqual(results[2].stability, 0.9834534)
self.assertEqual(results[2].alternatives[0].transcript,
alternatives[0]['transcript'])
self.assertEqual(results[2].alternatives[0].confidence,
alternatives[0]['confidence'])

def test_stream_recognize(self):
from io import BytesIO
Expand Down Expand Up @@ -582,9 +598,9 @@ def speech_api(channel=None):

results = list(client.streaming_recognize(sample))
self.assertEqual(len(results), 1)
self.assertEqual(results[0][0].transcript,
self.assertEqual(results[0].alternatives[0].transcript,
alternatives[0]['transcript'])
self.assertEqual(results[0][0].confidence,
self.assertEqual(results[0].alternatives[0].confidence,
alternatives[0]['confidence'])

def test_stream_recognize_no_results(self):
Expand Down
18 changes: 9 additions & 9 deletions system_tests/speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,15 +127,15 @@ def _make_streaming_request(self, file_obj, single_utterance=True,
single_utterance=single_utterance,
interim_results=interim_results)

def _check_results(self, results, num_results=1):
self.assertEqual(len(results), num_results)
top_result = results[0]
def _check_results(self, alternatives, num_results=1):
self.assertEqual(len(alternatives), num_results)
top_result = alternatives[0]
self.assertIsInstance(top_result, Alternative)
self.assertEqual(top_result.transcript,
'hello ' + self.ASSERT_TEXT)
self.assertGreater(top_result.confidence, 0.90)
if num_results == 2:
second_alternative = results[1]
second_alternative = alternatives[1]
self.assertIsInstance(second_alternative, Alternative)
self.assertEqual(second_alternative.transcript, self.ASSERT_TEXT)
self.assertIsNone(second_alternative.confidence)
Expand Down Expand Up @@ -192,7 +192,7 @@ def test_stream_recognize(self):

with open(AUDIO_FILE, 'rb') as file_obj:
for results in self._make_streaming_request(file_obj):
self._check_results(results)
self._check_results(results.alternatives)

def test_stream_recognize_interim_results(self):
if not Config.USE_GAX:
Expand All @@ -207,12 +207,12 @@ def test_stream_recognize_interim_results(self):
interim_results=True)
responses = list(recognize)
for response in responses:
if response[0].transcript:
self.assertIn(response[0].transcript,
if response.alternatives[0].transcript:
self.assertIn(response.alternatives[0].transcript,
extras + self.ASSERT_TEXT)

self.assertGreater(len(responses), 5)
self._check_results(responses[-1])
self._check_results(responses[-1].alternatives)

def test_stream_recognize_single_utterance(self):
if not Config.USE_GAX:
Expand All @@ -221,4 +221,4 @@ def test_stream_recognize_single_utterance(self):
with open(AUDIO_FILE, 'rb') as file_obj:
for results in self._make_streaming_request(
file_obj, single_utterance=False):
self._check_results(results)
self._check_results(results.alternatives)

0 comments on commit 9f2c34e

Please sign in to comment.