Skip to content

Commit

Permalink
Merge branch 'feature/signal_process_on_mini_board' into 'master'
Browse files Browse the repository at this point in the history
add AEC, AGC and NS signal process on mini-board

See merge request adf/esp-adf-internal!363
  • Loading branch information
jason-mao committed Nov 14, 2019
2 parents da1dabc + 8453034 commit 94c68a7
Show file tree
Hide file tree
Showing 17 changed files with 843 additions and 131 deletions.
1 change: 1 addition & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ before_script:
- sed -i "s%https://github.com/espressif/esp-idf%${GITLAB_SSH_SERVER}/idf/esp-idf.git%" .gitmodules
# replace submodule esp-adf-libs to internal repository to speedup cloning
- sed -i "s%https://github.com/espressif/esp-adf-libs%${GITLAB_SSH_SERVER}/adf/esp-adf-libs.git%" .gitmodules
- sed -i "s%https://github.com/espressif/esp-sr.git%${GITLAB_SSH_SERVER}/speech-recognition-internal/esp_sr_public.git%" .gitmodules
- git submodule update --init
# (the same regular expressions are used to set these are used in 'only:' sections below
- source esp-idf/tools/ci/configure_ci_environment.sh
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
[submodule "components/esp-adf-libs"]
path = components/esp-adf-libs
url = https://github.com/espressif/esp-adf-libs
[submodule "components/esp-sr"]
path = components/esp-sr
url = https://github.com/espressif/esp-sr.git
602 changes: 602 additions & 0 deletions components/audio_stream/algorithm_stream.c

Large diffs are not rendered by default.

150 changes: 150 additions & 0 deletions components/audio_stream/include/algorithm_stream.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/*
* ESPRESSIF MIT License
*
* Copyright (c) 2019 <ESPRESSIF SYSTEMS (SHANGHAI) CO., LTD>
*
* Permission is hereby granted for use on all ESPRESSIF SYSTEMS products, in which case,
* it is free of charge, to any person obtaining a copy of this software and associated
* documentation files (the "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the Software is furnished
* to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all copies or
* substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/

#ifndef _ALGORITHM_STREAM_H_
#define _ALGORITHM_STREAM_H_

#include "audio_element.h"

#define ALGORITHM_STREAM_PINNED_TO_CORE 0
#define ALGORITHM_STREAM_TASK_PERIOD 5
#define ALGORITHM_STREAM_TASK_STACK_SIZE (5 * 1024)


/*
// AEC: Acoustic Echo Cancellation
// AGC: Automatic Gain Control
// WWE: Wake Word Engine
// NS: Noise Suppression
+-----------+
| |
| TYPE 1 |
| |
+-----------------------------------------------+-----------+---------------------------------------------------+
| |
| reference signal |
| +-----------+ +-----------+ +-----------\ +-----------+ +-----------+ +-----------+ |
| | | | | | \ | | | | | | |
| | I2S read |--->| Resample |--->| Data split |--->| AEC |--->| NS |--->| AGC | |
| | | | | | / | | | | | | |
| +-----------+ +-----------+ +-----------/ +------------ +-----------+ +-----------+ |
| record signal |
| |
+---------------------------------------------------------------------------------------------------------------+
+-----------+
| |
| TYPE 2 |
| |
+-----------------------------------------------+-----------+---------------------------------------------------+
| |
| |
| +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ |
| | | | | | | | | | | | | |
| | I2S read |--->| Resample |--->| rec signal|--->| AEC |--->| NS |--->| AGC | |
| | | | | | | | | | | | | |
| +-----------+ +-----------+ +-----------+ +-----^-----+ +-----------+ +-----------+ |
| | |
| +-----------+ +-----------+ +-----------+ | |
| | | | | | | | |
| | input_rb |--->| Resample |--->| ref signal|----------+ |
| | | | | | | |
| +-----------+ +-----------+ +-----------+ |
| |
+---------------------------------------------------------------------------------------------------------------+
*/

/**
* @brief Two types of algorithm stream input method
*/
typedef enum {
ALGORITHM_STREAM_INPUT_TYPE1 = 1, /*!< Type 1 is default used by mini-board, the reference signal and the recording signal are respectively read in from the left channel and the right channel of the same I2S */
ALGORITHM_STREAM_INPUT_TYPE2 = 2, /*!< Type 2 read in record signal from I2S and when data be written, the data should be copy as a reference signal and input to the algorithm element by using multiple input buffer. */
} algorithm_stream_input_type_t; /*!< When use type2, you can combine arbitrarily the algorithm modules you want to use, use algo_mask parameters below to configure that. */

/**
* @brief Choose the algorithm to be used
*/
typedef enum {
ALGORITHM_STREAM_USE_AEC = (0x1 << 0), /*!< Use AEC */
ALGORITHM_STREAM_USE_AGC = (0x1 << 1), /*!< Use AGC */
ALGORITHM_STREAM_USE_NS = (0x1 << 2) /*!< Use NS */
} algorithm_stream_mask_t;

/**
* @brief Algorithm stream configurations
*/
typedef struct {
algorithm_stream_input_type_t input_type; /*!< Input type of stream */
int task_stack; /*!< Task stack size */
int task_prio; /*!< Task peroid */
int task_core; /*!< The core that task to be created */
int rec_ch; /*!< Channel number of record signal */
int ref_ch; /*!< Channel number of reference signal */
int ref_sample_rate; /*!< Sample rate of reference signal */
int rec_sample_rate; /*!< Sample rate of record signal */
int rec_linear_factor; /*!< The linear amplication factor of record signal*/
int ref_linear_factor; /*!< The linear amplication factor of reference signal */
int8_t algo_mask; /*!< Choose algorithm to use */
} algorithm_stream_cfg_t;

#define ALGORITHM_STREAM_CFG_DEFAULT() { \
.input_type = ALGORITHM_STREAM_INPUT_TYPE1, \
.task_core = ALGORITHM_STREAM_PINNED_TO_CORE, \
.task_prio = ALGORITHM_STREAM_TASK_PERIOD, \
.task_stack = ALGORITHM_STREAM_TASK_STACK_SIZE, \
.ref_ch = 1, \
.rec_ch = 1, \
.ref_sample_rate = 16000, \
.rec_sample_rate = 16000, \
.rec_linear_factor = 1, \
.ref_linear_factor = 3, \
.algo_mask = (ALGORITHM_STREAM_USE_AEC | ALGORITHM_STREAM_USE_AGC | ALGORITHM_STREAM_USE_NS), \
}

/**
* @brief Initialize algorithm stream
*
* @param config The algorithm Stream configuration
*
* @return The audio element handle
*/
audio_element_handle_t algo_stream_init(algorithm_stream_cfg_t *config);

/**
* @brief Set reference signal input ringbuff
*
* @note If input type2 is choosen, call this function to set ringbuffer to input reference data.
*
* @param algo_handle Handle of algorithm stream
* @param input_rb Ringbuffer handle to be set
*
* @return ESP_OK success
* ESP_FAIL fail
*/
esp_err_t algo_stream_set_multi_input_rb(audio_element_handle_t algo_handle, ringbuf_handle_t input_rb);

#endif
1 change: 1 addition & 0 deletions components/esp-sr
Submodule esp-sr added at eef4ae
6 changes: 2 additions & 4 deletions docs/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ INPUT = \
../../components/audio_stream/include/i2s_stream.h \
../../components/audio_stream/include/raw_stream.h \
../../components/audio_stream/include/spiffs_stream.h \
../../components/audio_stream/include/algorithm_stream.h \
## ESP Codec
../../components/esp-adf-libs/esp_codec/include/codec/esp_decoder.h \
../../components/esp-adf-libs/esp_codec/include/codec/audio_type_def.h \
Expand Down Expand Up @@ -67,10 +68,7 @@ INPUT = \
../../components/esp-adf-libs/esp_codec/include/codec/equalizer.h \
../../components/esp-adf-libs/esp_codec/include/codec/filter_resample.h \
../../components/esp-adf-libs/esp_codec/include/codec/audio_sonic.h \
## Speech Recognitions
../../components/esp-adf-libs/esp_sr/include/esp_sr_iface.h \
../../components/esp-adf-libs/esp_sr/include/esp_sr_models.h \
../../components/esp-adf-libs/esp_sr/include/esp_vad.h \
## Speech Recognition
../../components/esp-adf-libs/recorder_engine/include/recorder_engine.h \
## ESP Audio
../../components/esp-adf-libs/esp_audio/include/audio_def.h \
Expand Down
2 changes: 1 addition & 1 deletion docs/en/api-reference/speech-recognition/esp_vad.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ Implementation of the voice activity detection API is demonstrated in :example:`
API Reference
-------------

.. include:: /_build/inc/esp_vad.inc
For the latest API reference please refer to `Espressif Speech recognition repository <https://github.com/espressif/esp-sr>`_.
Original file line number Diff line number Diff line change
Expand Up @@ -35,30 +35,35 @@ A code snippet below demonstrates how to initialize the model, determine the num

.. code-block:: c
#include "esp_sr_iface.h"
#include "esp_sr_models.h"
#include "esp_wn_iface.h"
#include "esp_wn_models.h"
#include "rec_eng_helper.h"
static const sr_model_iface_t *model = &sr_model_wakenet3_quantized;
esp_wn_iface_t *wakenet;
model_coeff_getter_t *model_coeff_getter;
model_iface_data_t *model_data;
// Initialize wakeNet model data
static model_iface_data_t *model_data = model->create(DET_MODE_90);
get_wakenet_iface(&wakenet);
get_wakenet_coeff(&model_coeff_getter);
model_data = wakenet->create(model_coeff_getter, DET_MODE_90);
// Set parameters of buffer
int audio_chunksize = model->get_samp_chunksize(model_data);
int frequency = model->get_samp_rate(model_data);
int16_t *buffer = malloc(audio_chunksize sizeof(int16_t));
int audio_chunksize = wakenet->get_samp_chunksize(model_data);
int frequency = wakenet->get_samp_rate(model_data);
int16_t *buffer = malloc(audio_chunksize * sizeof(int16_t));
// Get voice data feed to buffer
...
// Detect
int r = model->detect(model_data, buffer);
int r = wakenet->detect(model_data, buffer);
if (r > 0) {
printf("Detection triggered output %d.\n", r);
}
// Destroy model
model->destroy(model_data)
wakenet->destroy(model_data)
Application Example
Expand All @@ -70,5 +75,4 @@ Implementation of the speech recognition API is demonstrated in :example:`speech
API Reference
-------------

.. include:: /_build/inc/esp_sr_iface.inc

For the latest API reference please refer to `Espressif Speech recognition repository <https://github.com/espressif/esp-sr>`_.
5 changes: 2 additions & 3 deletions docs/en/api-reference/speech-recognition/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Speech Recognition
******************

The ESP-ADF comes complete with :doc:`wakeup word libraries <wakeup-word-libs>` and :doc:`speech recognition interface <esp_sr_iface>` to recognize voice wakeup commands. Most of currently implemented wakeup commands are in Chinese with one command "Alexa" in English.
The ESP-ADF comes complete with :doc:`speech recognition interface <esp_wn_iface>` to recognize voice wakeup commands. Most of currently implemented wakeup commands are in Chinese with one command "Hi Jeson" in English.

Provided in this section functions also include automatic speech detection, also known as :doc:`voice activity detection (VAD) <esp_vad>`, and :doc:`speech recording engine <recorder_engine>`.

Expand All @@ -12,7 +12,6 @@ The Speech Recognition API is designed to easy integrate with existing :doc:`../
:caption: In This Section
:maxdepth: 1

wakeup-word-libs
esp_sr_iface
esp_wn_iface
esp_vad
recorder_engine
72 changes: 0 additions & 72 deletions docs/en/api-reference/speech-recognition/wakeup-word-libs.rst

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.. include:: ../../../en/api-reference/speech-recognition/esp_wn_iface.rst

This file was deleted.

Loading

0 comments on commit 94c68a7

Please sign in to comment.