Merge pull request #33 from richtier/non-fixed-profile

Allow ASR profile and audio format to be specified
richtier · Feb 3, 2019 · b423d0d · b423d0d
2 parents 61b7276 + 7fd925e
commit b423d0d
Show file tree

Hide file tree

Showing 9 changed files with 119 additions and 23 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -1,2 +1,20 @@
 [run]
-omit =.venv/*,venv/*,tests/*,setup.py,alexa_client/refreshtoken/serve.py,alexa_client/alexa_client/device.py,alexa_client/demo/*
+omit =
+    .venv/*
+    venv/*
+    tests/*
+    setup.py
+    alexa_client/refreshtoken/serve.py
+    alexa_client/alexa_client/device.py
+    alexa_client/demo/*
+    *__init__*
+
+
+[report]
+exclude_lines =
+    # Have to re-enable the standard pragma
+    pragma: no cover
+
+    # Don't complain if tests don't hit defensive assertion code:
+    raise AssertionError
+    raise NotImplementedError
diff --git a/README.md b/README.md
@@ -25,14 +25,14 @@ pip install alexa_client[demo]
 ```py
 from alexa_client import AlexaClient
 
-alexa_client = AlexaClient(
+client = AlexaClient(
     client_id='my-client-id',
     secret='my-secret',
     refresh_token='my-refresh-token',
 )
-alexa_client.connect()  # authenticate and other handshaking steps
+client.connect()  # authenticate and other handshaking steps
 with open('./tests/resources/alexa_what_time_is_it.wav', 'rb') as f:
-    for i, directive in enumerate(alexa_client.send_audio_file(f)):
+    for i, directive in enumerate(client.send_audio_file(f)):
         if directive.name in ['Speak', 'Play']:
             with open(f'./output_{i}.mp3', 'wb') as f:
                 f.write(directive.audio_attachment)
@@ -62,7 +62,7 @@ stream = p.open(
     stream_callback=callback,
 )
 
-alexa_client = AlexaClient(
+client = AlexaClient(
     client_id='my-client-id',
     secret='my-secret',
     refresh_token='my-refresh-token',
@@ -72,8 +72,8 @@ buffer = io.BytesIO()
 try:
     stream.start_stream()
     print('listening. Press CTRL + C to exit.')
-    alexa_client.connect()
-    for i, directive in enumerate(alexa_client.send_audio_file(buffer)):
+    client.connect()
+    for i, directive in enumerate(client.send_audio_file(buffer)):
         if directive.name in ['Speak', 'Play']:
             with open(f'./output_{i}.mp3', 'wb') as f:
                 f.write(directive.audio_attachment)
@@ -100,9 +100,9 @@ This can be achieved by passing the same dialog request ID to multiple `send_aud
 from alexa_client.alexa_client import helpers
 
 dialog_request_id = helpers.generate_unique_id()
-directives_one = alexa_client.send_audio_file(audio_one, dialog_request_id=dialog_request_id)
-directives_two = alexa_client.send_audio_file(audio_two, dialog_request_id=dialog_request_id)
-directives_three = alexa_client.send_audio_file(audio_three, dialog_request_id=dialog_request_id)
+directives_one = client.send_audio_file(audio_one, dialog_request_id=dialog_request_id)
+directives_two = client.send_audio_file(audio_two, dialog_request_id=dialog_request_id)
+directives_three = client.send_audio_file(audio_three, dialog_request_id=dialog_request_id)
 
 ```
 
@@ -116,6 +116,35 @@ python -m alexa_client.demo.streaming_microphone \
     --refresh-token="{enter-refresh-token-here}"
 ```
 
+### ASR Profiles
+Automatic Speech Recognition (ASR) profiles optimized for user speech from varying distances. By default CLOSE_TALK is used but this can be specified:
+
+```
+from alexa_client import constants
+
+client.send_audio_file(
+    audio_file=audio_file,
+    distance_profile=constants.NEAR_FIELD,  # or constants.FAR_FIELD
+)
+```
+
+### Audio format
+
+By default PCM audio format is assumed, but OPUS can be specified:
+
+```
+from alexa_client import constants
+
+client.send_audio_file(
+    audio_file=audio_file,
+    audio_format=constants.OPUS,
+)
+```
+
+When PCM format is specified the audio should be 16bit Linear PCM (LPCM16), 16kHz sample rate, single-channel, and little endian.
+
+When OPUS forat is specified the audio should be 16bit Opus, 16kHz sample rate, 32k bit rate, and little endian.
+
 ## Authentication
 
 To use AVS you must first have a [developer account](http://developer.amazon.com). Then register your product [here](https://developer.amazon.com/avs/home.html#/avs/products/new). Choose "Application" under "Is your product an app or a device"?
@@ -154,21 +183,18 @@ Follow the on-screen instructions shown at `http://localhost:9000` in your web b
 On completion Amazon will return your `refresh_token` - which you will require to [send audio](#file-audio) or [recorded voice](#microphone-audio).
 
 ## Steaming audio to AVS
-`alexa_client.send_audio_file` streaming uploads a file-like object to AVS for great latency. The file-like object can be an actual file on your filesystem, an in-memory BytesIo buffer containing audio from your microphone, or even audio streaming from [your browser over a websocket in real-time](https://github.com/richtier/alexa-browser-client).
-
-AVS requires the audio data to be 16bit Linear PCM (LPCM16), 16kHz sample rate, single-channel, and little endian.
+`AlexaClient.send_audio_file` streaming uploads a file-like object to AVS for great latency. The file-like object can be an actual file on your filesystem, an in-memory BytesIo buffer containing audio from your microphone, or even audio streaming from [your browser over a websocket in real-time](https://github.com/richtier/alexa-browser-client).
 
 ## Persistent AVS connection
 
-Calling `alexa_client.connect()` creates a persistent connection to AVS. A thread runs that pings AVS after 4 minutes of no request being made to AVS. This prevents the connection getting forcefully closed due to inactivity.
+Calling `AlexaClient.connect` creates a persistent connection to AVS. A thread runs that pings AVS after 4 minutes of no request being made to AVS. This prevents the connection getting forcefully closed due to inactivity.
 
 ## Unit test ##
 
 To run the unit tests, call the following commands:
 
 ```sh
 git clone git@github.com:richtier/alexa-voice-service-client.git
-pip install -e .[test]
 make test_requirements
 make test
 ```

diff --git a/alexa_client/__init__.py b/alexa_client/__init__.py
@@ -1,5 +1,7 @@
 from alexa_client.alexa_client.client import AlexaClient
+from alexa_client.alexa_client import constants
 
 __all__ = [
-    'AlexaClient'
+    'AlexaClient',
+    'constants',
 ]
diff --git a/alexa_client/alexa_client/client.py b/alexa_client/alexa_client/client.py
@@ -1,7 +1,7 @@
 import warnings
 
 from alexa_client.alexa_client import (
-    authentication, connection, device, helpers, ping
+    authentication, connection, constants, device, helpers, ping
 )
 
 
@@ -49,7 +49,10 @@ def synchronise_device_state(self):
                 device_state=self.device_manager.get_device_state(),
             )
 
-    def send_audio_file(self, audio_file, dialog_request_id=None):
+    def send_audio_file(
+        self, audio_file, dialog_request_id=None,
+        distance_profile=constants.CLOSE_TALK, audio_format=constants.PCM
+    ):
         dialog_request_id = dialog_request_id or helpers.generate_unique_id()
         with self.ping_manager.update_ping_deadline():
             headers = self.authentication_manager.get_headers()
@@ -58,6 +61,8 @@ def send_audio_file(self, audio_file, dialog_request_id=None):
                 device_state=self.device_manager.get_device_state(),
                 audio_file=audio_file,
                 dialog_request_id=dialog_request_id,
+                distance_profile=distance_profile,
+                audio_format=audio_format,
             )
 
     def ping(self):

diff --git a/alexa_client/alexa_client/connection.py b/alexa_client/alexa_client/connection.py
@@ -75,7 +75,7 @@ def synchronise_device_state(self, device_state, authentication_headers):
 
     def send_audio_file(
         self, audio_file, device_state, authentication_headers,
-        dialog_request_id
+        dialog_request_id, distance_profile, audio_format
     ):
         """
         Send audio to AVS
@@ -97,8 +97,8 @@ def send_audio_file(
                     'dialogRequestId': dialog_request_id,
                 },
                 'payload': {
-                    'profile': 'CLOSE_TALK',
-                    'format': 'AUDIO_L16_RATE_16000_CHANNELS_1'
+                    'profile': distance_profile,
+                    'format': audio_format
                 }
             }
         }

diff --git a/alexa_client/alexa_client/constants.py b/alexa_client/alexa_client/constants.py
@@ -0,0 +1,9 @@
+# Automatic Speech Recognition (ASR) profiles optimized for user speech from
+# varying distances
+CLOSE_TALK = 'CLOSE_TALK'  # 0 to 2.5 ft
+NEAR_FIELD = 'NEAR_FIELD'  # 0 to 5 ft
+FAR_FIELD = 'FAR_FIELD'  # 0 to 20+ ft
+
+# format of captured audio
+PCM = 'AUDIO_L16_RATE_16000_CHANNELS_1'
+OPUS = 'OPUS'
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 
 setup(
     name='alexa_client',
-    version='1.3.0',
+    version='1.4.0',
     packages=find_packages(exclude=["tests.*", "tests"]),
     url='https://github.com/richtier/alexa-voice-service-client',
     license='MIT',

diff --git a/tests/alexa_client/test_client.py b/tests/alexa_client/test_client.py
@@ -4,6 +4,7 @@
 import pytest
 
 from alexa_client.alexa_client.client import AlexaClient
+from alexa_client.alexa_client import constants
 
 
 @pytest.fixture
@@ -83,6 +84,31 @@ def test_client_send_audio_file(client):
         device_state={'device': 'state'},
         authentication_headers={'auth': 'value'},
         dialog_request_id='dialog-id',
+        distance_profile=constants.CLOSE_TALK,
+        audio_format=constants.PCM,
+    )
+    assert client.ping_manager.update_ping_deadline.call_count == 1
+
+
+def test_client_send_audio_file_non_defaults(client):
+    client.authentication_manager.get_headers.return_value = {'auth': 'value'}
+    client.device_manager.get_device_state.return_value = {'device': 'state'}
+
+    audio_file = BytesIO(b'things')
+    client.send_audio_file(
+        audio_file,
+        dialog_request_id='dialog-id',
+        distance_profile=constants.FAR_FIELD,
+        audio_format=constants.OPUS,
+    )
+
+    assert client.connection_manager.send_audio_file.call_args == mock.call(
+        audio_file=audio_file,
+        device_state={'device': 'state'},
+        authentication_headers={'auth': 'value'},
+        dialog_request_id='dialog-id',
+        distance_profile=constants.FAR_FIELD,
+        audio_format=constants.OPUS,
     )
     assert client.ping_manager.update_ping_deadline.call_count == 1
 

diff --git a/tests/alexa_client/test_connection.py b/tests/alexa_client/test_connection.py
@@ -5,7 +5,7 @@
 import pytest
 from requests.exceptions import HTTPError
 
-from alexa_client.alexa_client import connection
+from alexa_client.alexa_client import connection, constants
 from tests.alexa_client.helpers import parse_multipart, TestConnectionMixin
 from tests.alexa_client import fixtures
 
@@ -140,6 +140,8 @@ def test_send_audio_file(
             authentication_headers=authentication_headers,
             audio_file=audio_file,
             dialog_request_id='dialogue-id',
+            distance_profile=constants.CLOSE_TALK,
+            audio_format=constants.PCM,
         )
 
     headers = dict(list(manager.connection.recent_stream.headers.items()))
@@ -206,6 +208,8 @@ def test_speak_and_play_response_200(
         device_state=device_state,
         authentication_headers=authentication_headers,
         dialog_request_id='dialogue-id',
+        distance_profile=constants.CLOSE_TALK,
+        audio_format=constants.PCM,
     ))
     assert len(directives) == 3
 
@@ -252,6 +256,8 @@ def test_parse_speak_response_200(
         device_state=device_state,
         authentication_headers=authentication_headers,
         dialog_request_id='dialogue-id',
+        distance_profile=constants.CLOSE_TALK,
+        audio_format=constants.PCM,
     )
     for directive in directives:
         assert directive.get_content_id(directive.directive) == (
@@ -273,6 +279,8 @@ def test_send_audio_204_response(
         authentication_headers=authentication_headers,
         audio_file=audio_file,
         dialog_request_id='dialogue-id',
+        distance_profile=constants.CLOSE_TALK,
+        audio_format=constants.PCM,
     )
 
     assert response is None
@@ -293,6 +301,8 @@ def test_send_audio_non_200_response(
             authentication_headers=authentication_headers,
             audio_file=audio_file,
             dialog_request_id='dialogue-id',
+            distance_profile=constants.CLOSE_TALK,
+            audio_format=constants.PCM,
         )