Add stability information to streaming results. Fixes #2702.

googleapis · Nov 10, 2016 · 9f2c34e · 9f2c34e
1 parent 1abfcbe
commit 9f2c34e
Show file tree

Hide file tree

Showing 4 changed files with 123 additions and 32 deletions.
diff --git a/docs/speech-usage.rst b/docs/speech-usage.rst
@@ -171,10 +171,10 @@ speech data to possible text alternatives on the fly.
     ...     sample = client.sample(content=stream,
     ...                            encoding=speech.Encoding.LINEAR16,
     ...                            sample_rate=16000)
-    ...     alternatives = list(client.streaming_recognize(sample))
-    >>> print(alternatives[0].transcript)
+    ...     results = list(client.streaming_recognize(sample))
+    >>> print(results[0].alternatives[0].transcript)
     'hello'
-    >>> print(alternatives[0].confidence)
+    >>> print(results[0].alternatives[0].confidence)
     0.973458576
 
 
@@ -196,10 +196,10 @@ See: `Single Utterance`_
     ...                            sample_rate=16000)
     ...     responses = client.streaming_recognize(sample,
     ...                                            single_utterance=True)
-    ...     alternatives = list(responses)
-    >>> print(alternatives[0].transcript)
+    ...     results = list(responses)
+    >>> print(results[0].alternatives[0].transcript)
     hello
-    >>> print(alternatives[0].confidence)
+    >>> print(results[0].alternatives[0].confidence)
     0.96523453546
 
 
@@ -214,20 +214,28 @@ If ``interim_results`` is set to :data:`True`, interim results
     ...     sample = client.sample(content=stream,
     ...                            encoding=speech.Encoding.LINEAR16,
     ...                            sample_rate=16000)
-    ...     for alternatives in client.streaming_recognize(sample,
-    ...                                                    interim_results=True):
+    ...     for results in client.streaming_recognize(sample,
+    ...                                                interim_results=True):
     ...         print('=' * 20)
-    ...         print(alternatives[0].transcript)
-    ...         print(alternatives[0].confidence)
+    ...         print(results[0].alternatives[0].transcript)
+    ...         print(results[0].alternatives[0].confidence)
+    ...         print(results[0].is_final)
+    ...         print(results[0].stability)
     ====================
     'he'
     None
+    False
+    0.113245
     ====================
     'hell'
     None
+    False
+    0.132454
     ====================
     'hello'
     0.973458576
+    True
+    0.982345
 
 
 .. _Single Utterance: https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#streamingrecognitionconfig

diff --git a/speech/google/cloud/speech/client.py b/speech/google/cloud/speech/client.py
@@ -242,8 +242,7 @@ def streaming_recognize(self, sample, language_code=None,
         for response in responses:
             for result in response.results:
                 if result.is_final or interim_results:
-                    yield [Alternative.from_pb(alternative)
-                           for alternative in result.alternatives]
+                    yield StreamingSpeechResult.from_pb(result)
 
     def sync_recognize(self, sample, language_code=None,
                        max_alternatives=None, profanity_filter=None,
@@ -299,6 +298,74 @@ def sync_recognize(self, sample, language_code=None,
                                   profanity_filter, speech_context)
 
 
+class StreamingSpeechResult(object):
+    """Streaming speech result representation.
+
+    :type alternatives: list
+    :param alternatives: List of protobuf speech alternatives.
+
+    :type is_final: bool
+    :param is_final: Boolean indicator of results finality.
+
+    :type stability: float
+    :param stability: 0.0-1.0 stability score for the results returned.
+
+    :rtype: :class:`~google.cloud.speech.client.StreamingSpeechResult`
+    :returns: Instance of ``StreamingSpeechResult``.
+    """
+    def __init__(self, alternatives, is_final=False, stability=0.0):
+        self._alternatives = [Alternative.from_pb(alternative)
+                              for alternative in alternatives]
+        self._is_final = is_final
+        self._stability = stability
+
+    @classmethod
+    def from_pb(cls, response):
+        """Factory: construct instance of ``StreamingSpeechResult``.
+
+        :type response: :class:`~google.cloud.grpc.speech.v1beta1\
+                               .cloud_speech_pb2.StreamingRecognizeResult`
+        :param response: Istance of ``StreamingRecognizeResult`` protobuf.
+
+        :rtype: :class:`~google.cloud.speech.client.StreamingSpeechResult`
+        :returns: Instance of ``StreamingSpeechResult``.
+        """
+        alternatives = response.alternatives
+        is_final = response.is_final
+        stability = response.stability
+        return cls(alternatives=alternatives, is_final=is_final,
+                   stability=stability)
+
+    @property
+    def alternatives(self):
+        """List of alternative transcripts.
+
+        :rtype: list of :class:`~google.cloud.speech.alternative.Alternative`
+        :returns: List of ``Alternative`` instances.
+        """
+        return self._alternatives
+
+    @property
+    def is_final(self):
+        """Boolean indicator of result finality.
+
+        :rtype: bool
+        :returns: True if this result is final and no more processing will
+                  occur. False if more processing can will be done and results
+                  may change.
+        """
+        return self._is_final
+
+    @property
+    def stability(self):
+        """Result stability indicator.
+
+        :rtype: float
+        :returns: 0.0-1.0 value indicating the stability the currents results.
+        """
+        return self._stability
+
+
 class _JSONSpeechAPI(object):
     """Speech API for interacting with the JSON/REST version of the API.
 

diff --git a/speech/unit_tests/test_client.py b/speech/unit_tests/test_client.py
@@ -28,7 +28,7 @@ def _make_result(alternatives=()):
     )
 
 
-def _make_streaming_result(alternatives=(), is_final=True):
+def _make_streaming_result(alternatives=(), is_final=True, stability=1.0):
     from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2
 
     return cloud_speech_pb2.StreamingRecognitionResult(
@@ -39,6 +39,7 @@ def _make_streaming_result(alternatives=(), is_final=True):
             ) for alternative in alternatives
         ],
         is_final=is_final,
+        stability=stability,
     )
 
 
@@ -476,6 +477,7 @@ def test_stream_recognize_interim_results(self):
 
         from google.cloud.speech import _gax
         from google.cloud.speech.encoding import Encoding
+        from google.cloud.speech.client import StreamingSpeechResult
 
         stream = BytesIO(b'Some audio data...')
         credentials = _Credentials()
@@ -491,11 +493,13 @@ def test_stream_recognize_interim_results(self):
             'confidence': 0.0123456,
         }]
         first_response = _make_streaming_response(
-            _make_streaming_result([], is_final=False))
+            _make_streaming_result([], is_final=False, stability=0.122435))
         second_response = _make_streaming_response(
-            _make_streaming_result(alternatives, is_final=False))
+            _make_streaming_result(alternatives, is_final=False,
+                                   stability=0.1432343))
         last_response = _make_streaming_response(
-            _make_streaming_result(alternatives, is_final=True))
+            _make_streaming_result(alternatives, is_final=True,
+                                   stability=0.9834534))
         responses = [first_response, second_response, last_response]
 
         channel_args = []
@@ -521,15 +525,27 @@ def speech_api(channel=None):
 
         results = list(client.streaming_recognize(sample,
                                                   interim_results=True))
-        self.assertEqual(results[0], [])
-        self.assertEqual(results[1][0].transcript,
+
+        self.assertIsInstance(results[0], StreamingSpeechResult)
+        self.assertEqual(results[0].alternatives, [])
+        self.assertFalse(results[0].is_final)
+        self.assertEqual(results[0].stability, 0.122435)
+        self.assertEqual(results[1].stability, 0.1432343)
+        self.assertFalse(results[1].is_final)
+        self.assertEqual(results[1].alternatives[0].transcript,
                          alternatives[0]['transcript'])
-        self.assertEqual(results[1][0].confidence,
+        self.assertEqual(results[1].alternatives[0].confidence,
                          alternatives[0]['confidence'])
-        self.assertEqual(results[1][1].transcript,
+        self.assertEqual(results[1].alternatives[1].transcript,
                          alternatives[1]['transcript'])
-        self.assertEqual(results[1][1].confidence,
+        self.assertEqual(results[1].alternatives[1].confidence,
                          alternatives[1]['confidence'])
+        self.assertTrue(results[2].is_final)
+        self.assertEqual(results[2].stability, 0.9834534)
+        self.assertEqual(results[2].alternatives[0].transcript,
+                         alternatives[0]['transcript'])
+        self.assertEqual(results[2].alternatives[0].confidence,
+                         alternatives[0]['confidence'])
 
     def test_stream_recognize(self):
         from io import BytesIO
@@ -582,9 +598,9 @@ def speech_api(channel=None):
 
         results = list(client.streaming_recognize(sample))
         self.assertEqual(len(results), 1)
-        self.assertEqual(results[0][0].transcript,
+        self.assertEqual(results[0].alternatives[0].transcript,
                          alternatives[0]['transcript'])
-        self.assertEqual(results[0][0].confidence,
+        self.assertEqual(results[0].alternatives[0].confidence,
                          alternatives[0]['confidence'])
 
     def test_stream_recognize_no_results(self):

diff --git a/system_tests/speech.py b/system_tests/speech.py
@@ -127,15 +127,15 @@ def _make_streaming_request(self, file_obj, single_utterance=True,
                                           single_utterance=single_utterance,
                                           interim_results=interim_results)
 
-    def _check_results(self, results, num_results=1):
-        self.assertEqual(len(results), num_results)
-        top_result = results[0]
+    def _check_results(self, alternatives, num_results=1):
+        self.assertEqual(len(alternatives), num_results)
+        top_result = alternatives[0]
         self.assertIsInstance(top_result, Alternative)
         self.assertEqual(top_result.transcript,
                          'hello ' + self.ASSERT_TEXT)
         self.assertGreater(top_result.confidence, 0.90)
         if num_results == 2:
-            second_alternative = results[1]
+            second_alternative = alternatives[1]
             self.assertIsInstance(second_alternative, Alternative)
             self.assertEqual(second_alternative.transcript, self.ASSERT_TEXT)
             self.assertIsNone(second_alternative.confidence)
@@ -192,7 +192,7 @@ def test_stream_recognize(self):
 
         with open(AUDIO_FILE, 'rb') as file_obj:
             for results in self._make_streaming_request(file_obj):
-                self._check_results(results)
+                self._check_results(results.alternatives)
 
     def test_stream_recognize_interim_results(self):
         if not Config.USE_GAX:
@@ -207,12 +207,12 @@ def test_stream_recognize_interim_results(self):
                                                      interim_results=True)
             responses = list(recognize)
             for response in responses:
-                if response[0].transcript:
-                    self.assertIn(response[0].transcript,
+                if response.alternatives[0].transcript:
+                    self.assertIn(response.alternatives[0].transcript,
                                   extras + self.ASSERT_TEXT)
 
             self.assertGreater(len(responses), 5)
-            self._check_results(responses[-1])
+            self._check_results(responses[-1].alternatives)
 
     def test_stream_recognize_single_utterance(self):
         if not Config.USE_GAX:
@@ -221,4 +221,4 @@ def test_stream_recognize_single_utterance(self):
         with open(AUDIO_FILE, 'rb') as file_obj:
             for results in self._make_streaming_request(
                     file_obj, single_utterance=False):
-                self._check_results(results)
+                self._check_results(results.alternatives)