Skip to content
This repository has been archived by the owner on Dec 28, 2021. It is now read-only.

Commit

Permalink
Merge pull request #33 from richtier/non-fixed-profile
Browse files Browse the repository at this point in the history
Allow ASR profile and audio format to be specified
  • Loading branch information
Richard Tier authored Feb 3, 2019
2 parents 61b7276 + 7fd925e commit b423d0d
Show file tree
Hide file tree
Showing 9 changed files with 119 additions and 23 deletions.
20 changes: 19 additions & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
@@ -1,2 +1,20 @@
[run]
omit =.venv/*,venv/*,tests/*,setup.py,alexa_client/refreshtoken/serve.py,alexa_client/alexa_client/device.py,alexa_client/demo/*
omit =
.venv/*
venv/*
tests/*
setup.py
alexa_client/refreshtoken/serve.py
alexa_client/alexa_client/device.py
alexa_client/demo/*
*__init__*


[report]
exclude_lines =
# Have to re-enable the standard pragma
pragma: no cover

# Don't complain if tests don't hit defensive assertion code:
raise AssertionError
raise NotImplementedError
54 changes: 40 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ pip install alexa_client[demo]
```py
from alexa_client import AlexaClient

alexa_client = AlexaClient(
client = AlexaClient(
client_id='my-client-id',
secret='my-secret',
refresh_token='my-refresh-token',
)
alexa_client.connect() # authenticate and other handshaking steps
client.connect() # authenticate and other handshaking steps
with open('./tests/resources/alexa_what_time_is_it.wav', 'rb') as f:
for i, directive in enumerate(alexa_client.send_audio_file(f)):
for i, directive in enumerate(client.send_audio_file(f)):
if directive.name in ['Speak', 'Play']:
with open(f'./output_{i}.mp3', 'wb') as f:
f.write(directive.audio_attachment)
Expand Down Expand Up @@ -62,7 +62,7 @@ stream = p.open(
stream_callback=callback,
)

alexa_client = AlexaClient(
client = AlexaClient(
client_id='my-client-id',
secret='my-secret',
refresh_token='my-refresh-token',
Expand All @@ -72,8 +72,8 @@ buffer = io.BytesIO()
try:
stream.start_stream()
print('listening. Press CTRL + C to exit.')
alexa_client.connect()
for i, directive in enumerate(alexa_client.send_audio_file(buffer)):
client.connect()
for i, directive in enumerate(client.send_audio_file(buffer)):
if directive.name in ['Speak', 'Play']:
with open(f'./output_{i}.mp3', 'wb') as f:
f.write(directive.audio_attachment)
Expand All @@ -100,9 +100,9 @@ This can be achieved by passing the same dialog request ID to multiple `send_aud
from alexa_client.alexa_client import helpers

dialog_request_id = helpers.generate_unique_id()
directives_one = alexa_client.send_audio_file(audio_one, dialog_request_id=dialog_request_id)
directives_two = alexa_client.send_audio_file(audio_two, dialog_request_id=dialog_request_id)
directives_three = alexa_client.send_audio_file(audio_three, dialog_request_id=dialog_request_id)
directives_one = client.send_audio_file(audio_one, dialog_request_id=dialog_request_id)
directives_two = client.send_audio_file(audio_two, dialog_request_id=dialog_request_id)
directives_three = client.send_audio_file(audio_three, dialog_request_id=dialog_request_id)

```

Expand All @@ -116,6 +116,35 @@ python -m alexa_client.demo.streaming_microphone \
--refresh-token="{enter-refresh-token-here}"
```

### ASR Profiles
Automatic Speech Recognition (ASR) profiles optimized for user speech from varying distances. By default CLOSE_TALK is used but this can be specified:

```
from alexa_client import constants
client.send_audio_file(
audio_file=audio_file,
distance_profile=constants.NEAR_FIELD, # or constants.FAR_FIELD
)
```

### Audio format

By default PCM audio format is assumed, but OPUS can be specified:

```
from alexa_client import constants
client.send_audio_file(
audio_file=audio_file,
audio_format=constants.OPUS,
)
```

When PCM format is specified the audio should be 16bit Linear PCM (LPCM16), 16kHz sample rate, single-channel, and little endian.

When OPUS forat is specified the audio should be 16bit Opus, 16kHz sample rate, 32k bit rate, and little endian.

## Authentication

To use AVS you must first have a [developer account](http://developer.amazon.com). Then register your product [here](https://developer.amazon.com/avs/home.html#/avs/products/new). Choose "Application" under "Is your product an app or a device"?
Expand Down Expand Up @@ -154,21 +183,18 @@ Follow the on-screen instructions shown at `http://localhost:9000` in your web b
On completion Amazon will return your `refresh_token` - which you will require to [send audio](#file-audio) or [recorded voice](#microphone-audio).

## Steaming audio to AVS
`alexa_client.send_audio_file` streaming uploads a file-like object to AVS for great latency. The file-like object can be an actual file on your filesystem, an in-memory BytesIo buffer containing audio from your microphone, or even audio streaming from [your browser over a websocket in real-time](https://github.com/richtier/alexa-browser-client).

AVS requires the audio data to be 16bit Linear PCM (LPCM16), 16kHz sample rate, single-channel, and little endian.
`AlexaClient.send_audio_file` streaming uploads a file-like object to AVS for great latency. The file-like object can be an actual file on your filesystem, an in-memory BytesIo buffer containing audio from your microphone, or even audio streaming from [your browser over a websocket in real-time](https://github.com/richtier/alexa-browser-client).

## Persistent AVS connection

Calling `alexa_client.connect()` creates a persistent connection to AVS. A thread runs that pings AVS after 4 minutes of no request being made to AVS. This prevents the connection getting forcefully closed due to inactivity.
Calling `AlexaClient.connect` creates a persistent connection to AVS. A thread runs that pings AVS after 4 minutes of no request being made to AVS. This prevents the connection getting forcefully closed due to inactivity.

## Unit test ##

To run the unit tests, call the following commands:

```sh
git clone git@github.com:richtier/alexa-voice-service-client.git
pip install -e .[test]
make test_requirements
make test
```
Expand Down
4 changes: 3 additions & 1 deletion alexa_client/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from alexa_client.alexa_client.client import AlexaClient
from alexa_client.alexa_client import constants

__all__ = [
'AlexaClient'
'AlexaClient',
'constants',
]
9 changes: 7 additions & 2 deletions alexa_client/alexa_client/client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import warnings

from alexa_client.alexa_client import (
authentication, connection, device, helpers, ping
authentication, connection, constants, device, helpers, ping
)


Expand Down Expand Up @@ -49,7 +49,10 @@ def synchronise_device_state(self):
device_state=self.device_manager.get_device_state(),
)

def send_audio_file(self, audio_file, dialog_request_id=None):
def send_audio_file(
self, audio_file, dialog_request_id=None,
distance_profile=constants.CLOSE_TALK, audio_format=constants.PCM
):
dialog_request_id = dialog_request_id or helpers.generate_unique_id()
with self.ping_manager.update_ping_deadline():
headers = self.authentication_manager.get_headers()
Expand All @@ -58,6 +61,8 @@ def send_audio_file(self, audio_file, dialog_request_id=None):
device_state=self.device_manager.get_device_state(),
audio_file=audio_file,
dialog_request_id=dialog_request_id,
distance_profile=distance_profile,
audio_format=audio_format,
)

def ping(self):
Expand Down
6 changes: 3 additions & 3 deletions alexa_client/alexa_client/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def synchronise_device_state(self, device_state, authentication_headers):

def send_audio_file(
self, audio_file, device_state, authentication_headers,
dialog_request_id
dialog_request_id, distance_profile, audio_format
):
"""
Send audio to AVS
Expand All @@ -97,8 +97,8 @@ def send_audio_file(
'dialogRequestId': dialog_request_id,
},
'payload': {
'profile': 'CLOSE_TALK',
'format': 'AUDIO_L16_RATE_16000_CHANNELS_1'
'profile': distance_profile,
'format': audio_format
}
}
}
Expand Down
9 changes: 9 additions & 0 deletions alexa_client/alexa_client/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Automatic Speech Recognition (ASR) profiles optimized for user speech from
# varying distances
CLOSE_TALK = 'CLOSE_TALK' # 0 to 2.5 ft
NEAR_FIELD = 'NEAR_FIELD' # 0 to 5 ft
FAR_FIELD = 'FAR_FIELD' # 0 to 20+ ft

# format of captured audio
PCM = 'AUDIO_L16_RATE_16000_CHANNELS_1'
OPUS = 'OPUS'
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setup(
name='alexa_client',
version='1.3.0',
version='1.4.0',
packages=find_packages(exclude=["tests.*", "tests"]),
url='https://github.com/richtier/alexa-voice-service-client',
license='MIT',
Expand Down
26 changes: 26 additions & 0 deletions tests/alexa_client/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

from alexa_client.alexa_client.client import AlexaClient
from alexa_client.alexa_client import constants


@pytest.fixture
Expand Down Expand Up @@ -83,6 +84,31 @@ def test_client_send_audio_file(client):
device_state={'device': 'state'},
authentication_headers={'auth': 'value'},
dialog_request_id='dialog-id',
distance_profile=constants.CLOSE_TALK,
audio_format=constants.PCM,
)
assert client.ping_manager.update_ping_deadline.call_count == 1


def test_client_send_audio_file_non_defaults(client):
client.authentication_manager.get_headers.return_value = {'auth': 'value'}
client.device_manager.get_device_state.return_value = {'device': 'state'}

audio_file = BytesIO(b'things')
client.send_audio_file(
audio_file,
dialog_request_id='dialog-id',
distance_profile=constants.FAR_FIELD,
audio_format=constants.OPUS,
)

assert client.connection_manager.send_audio_file.call_args == mock.call(
audio_file=audio_file,
device_state={'device': 'state'},
authentication_headers={'auth': 'value'},
dialog_request_id='dialog-id',
distance_profile=constants.FAR_FIELD,
audio_format=constants.OPUS,
)
assert client.ping_manager.update_ping_deadline.call_count == 1

Expand Down
12 changes: 11 additions & 1 deletion tests/alexa_client/test_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest
from requests.exceptions import HTTPError

from alexa_client.alexa_client import connection
from alexa_client.alexa_client import connection, constants
from tests.alexa_client.helpers import parse_multipart, TestConnectionMixin
from tests.alexa_client import fixtures

Expand Down Expand Up @@ -140,6 +140,8 @@ def test_send_audio_file(
authentication_headers=authentication_headers,
audio_file=audio_file,
dialog_request_id='dialogue-id',
distance_profile=constants.CLOSE_TALK,
audio_format=constants.PCM,
)

headers = dict(list(manager.connection.recent_stream.headers.items()))
Expand Down Expand Up @@ -206,6 +208,8 @@ def test_speak_and_play_response_200(
device_state=device_state,
authentication_headers=authentication_headers,
dialog_request_id='dialogue-id',
distance_profile=constants.CLOSE_TALK,
audio_format=constants.PCM,
))
assert len(directives) == 3

Expand Down Expand Up @@ -252,6 +256,8 @@ def test_parse_speak_response_200(
device_state=device_state,
authentication_headers=authentication_headers,
dialog_request_id='dialogue-id',
distance_profile=constants.CLOSE_TALK,
audio_format=constants.PCM,
)
for directive in directives:
assert directive.get_content_id(directive.directive) == (
Expand All @@ -273,6 +279,8 @@ def test_send_audio_204_response(
authentication_headers=authentication_headers,
audio_file=audio_file,
dialog_request_id='dialogue-id',
distance_profile=constants.CLOSE_TALK,
audio_format=constants.PCM,
)

assert response is None
Expand All @@ -293,6 +301,8 @@ def test_send_audio_non_200_response(
authentication_headers=authentication_headers,
audio_file=audio_file,
dialog_request_id='dialogue-id',
distance_profile=constants.CLOSE_TALK,
audio_format=constants.PCM,
)


Expand Down

0 comments on commit b423d0d

Please sign in to comment.