Merge branch 'feature/signal_process_on_mini_board' into 'master'

add AEC, AGC and NS signal process on mini-board See merge request adf/esp-adf-internal!363
espressif · Nov 14, 2019 · 94c68a7 · 94c68a7
2 parents da1dabc + 8453034
commit 94c68a7
Show file tree

Hide file tree

Showing 17 changed files with 843 additions and 131 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -35,6 +35,7 @@ before_script:
   - sed -i "s%https://github.com/espressif/esp-idf%${GITLAB_SSH_SERVER}/idf/esp-idf.git%" .gitmodules
   # replace submodule esp-adf-libs to internal repository to speedup cloning
   - sed -i "s%https://github.com/espressif/esp-adf-libs%${GITLAB_SSH_SERVER}/adf/esp-adf-libs.git%" .gitmodules
+  - sed -i "s%https://github.com/espressif/esp-sr.git%${GITLAB_SSH_SERVER}/speech-recognition-internal/esp_sr_public.git%" .gitmodules
   - git submodule update --init
   # (the same regular expressions are used to set these are used in 'only:' sections below
   - source esp-idf/tools/ci/configure_ci_environment.sh

diff --git a/.gitmodules b/.gitmodules
@@ -4,3 +4,6 @@
 [submodule "components/esp-adf-libs"]
 	path = components/esp-adf-libs
 	url = https://github.com/espressif/esp-adf-libs
+[submodule "components/esp-sr"]
+	path = components/esp-sr
+	url = https://github.com/espressif/esp-sr.git
diff --git a/components/audio_stream/algorithm_stream.c b/components/audio_stream/algorithm_stream.c
diff --git a/components/audio_stream/include/algorithm_stream.h b/components/audio_stream/include/algorithm_stream.h
@@ -0,0 +1,150 @@
+/*
+ * ESPRESSIF MIT License
+ *
+ * Copyright (c) 2019 <ESPRESSIF SYSTEMS (SHANGHAI) CO., LTD>
+ *
+ * Permission is hereby granted for use on all ESPRESSIF SYSTEMS products, in which case,
+ * it is free of charge, to any person obtaining a copy of this software and associated
+ * documentation files (the "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the Software is furnished
+ * to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies or
+ * substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+ * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _ALGORITHM_STREAM_H_
+#define _ALGORITHM_STREAM_H_
+
+#include "audio_element.h"
+
+#define ALGORITHM_STREAM_PINNED_TO_CORE   0
+#define ALGORITHM_STREAM_TASK_PERIOD      5
+#define ALGORITHM_STREAM_TASK_STACK_SIZE (5 * 1024)
+
+
+/*
+
+//  AEC: Acoustic Echo Cancellation
+//  AGC: Automatic Gain Control
+//  WWE: Wake Word Engine
+//  NS:  Noise Suppression
+                                                +-----------+
+                                                |           |
+                                                |  TYPE 1   |
+                                                |           |
++-----------------------------------------------+-----------+---------------------------------------------------+
+|                                                                                                               |
+|                                       reference signal                                                        |
+|     +-----------+    +-----------+    +-----------\      +-----------+    +-----------+    +-----------+      |
+|     |           |    |           |    |            \     |           |    |           |    |           |      |
+|     | I2S read  |--->| Resample  |--->| Data split  |--->|    AEC    |--->|    NS     |--->|    AGC    |      |
+|     |           |    |           |    |            /     |           |    |           |    |           |      |
+|     +-----------+    +-----------+    +-----------/      +------------    +-----------+    +-----------+      |
+|                                       record signal                                                           |
+|                                                                                                               |
++---------------------------------------------------------------------------------------------------------------+
+
+                                                +-----------+
+                                                |           |
+                                                |  TYPE 2   |
+                                                |           |
++-----------------------------------------------+-----------+---------------------------------------------------+
+|                                                                                                               |
+|                                                                                                               |
+|     +-----------+    +-----------+    +-----------+    +-----------+    +-----------+    +-----------+        |
+|     |           |    |           |    |           |    |           |    |           |    |           |        |
+|     | I2S read  |--->| Resample  |--->| rec signal|--->|    AEC    |--->|    NS     |--->|    AGC    |        |
+|     |           |    |           |    |           |    |           |    |           |    |           |        |
+|     +-----------+    +-----------+    +-----------+    +-----^-----+    +-----------+    +-----------+        |
+|                                                              |                                                |
+|     +-----------+    +-----------+    +-----------+          |                                                |
+|     |           |    |           |    |           |          |                                                |
+|     | input_rb  |--->| Resample  |--->| ref signal|----------+                                                |
+|     |           |    |           |    |           |                                                           |
+|     +-----------+    +-----------+    +-----------+                                                           |
+|                                                                                                               |
++---------------------------------------------------------------------------------------------------------------+
+
+*/
+
+/**
+ * @brief Two types of algorithm stream input method
+ */
+typedef enum {
+    ALGORITHM_STREAM_INPUT_TYPE1 = 1, /*!< Type 1 is default used by mini-board, the reference signal and the recording signal are respectively read in from the left channel and the right channel of the same I2S */
+    ALGORITHM_STREAM_INPUT_TYPE2 = 2, /*!< Type 2 read in record signal from I2S and when data be written, the data should be copy as a reference signal and input to the algorithm element by using multiple input buffer. */
+} algorithm_stream_input_type_t;      /*!< When use type2, you can combine arbitrarily the algorithm modules you want to use, use algo_mask parameters below to configure that. */
+
+/**
+ * @brief Choose the algorithm to be used
+ */
+typedef enum {
+    ALGORITHM_STREAM_USE_AEC = (0x1 << 0), /*!< Use AEC */
+    ALGORITHM_STREAM_USE_AGC = (0x1 << 1), /*!< Use AGC */
+    ALGORITHM_STREAM_USE_NS  = (0x1 << 2)  /*!< Use NS  */
+} algorithm_stream_mask_t;
+
+/**
+ * @brief Algorithm stream configurations
+ */
+typedef struct {
+    algorithm_stream_input_type_t input_type;   /*!< Input type of stream */
+    int task_stack;                             /*!< Task stack size */
+    int task_prio;                              /*!< Task peroid */
+    int task_core;                              /*!< The core that task to be created */
+    int rec_ch;                                 /*!< Channel number of record signal */
+    int ref_ch;                                 /*!< Channel number of reference signal */
+    int ref_sample_rate;                        /*!< Sample rate of reference signal */
+    int rec_sample_rate;                        /*!< Sample rate of record signal */
+    int rec_linear_factor;                      /*!< The linear amplication factor of record signal*/
+    int ref_linear_factor;                      /*!< The linear amplication factor of reference signal */
+    int8_t algo_mask;                           /*!< Choose algorithm to use */
+} algorithm_stream_cfg_t;
+
+#define ALGORITHM_STREAM_CFG_DEFAULT() {                  \
+    .input_type = ALGORITHM_STREAM_INPUT_TYPE1,           \
+    .task_core  = ALGORITHM_STREAM_PINNED_TO_CORE,        \
+    .task_prio  = ALGORITHM_STREAM_TASK_PERIOD,           \
+    .task_stack = ALGORITHM_STREAM_TASK_STACK_SIZE,       \
+    .ref_ch     = 1,                                      \
+    .rec_ch     = 1,                                      \
+    .ref_sample_rate    = 16000,                          \
+    .rec_sample_rate    = 16000,                          \
+    .rec_linear_factor = 1,                               \
+    .ref_linear_factor = 3,                               \
+    .algo_mask = (ALGORITHM_STREAM_USE_AEC | ALGORITHM_STREAM_USE_AGC | ALGORITHM_STREAM_USE_NS), \
+}
+
+/**
+ * @brief      Initialize algorithm stream
+ *
+ * @param      config   The algorithm Stream configuration
+ *
+ * @return     The audio element handle
+ */
+audio_element_handle_t algo_stream_init(algorithm_stream_cfg_t *config);
+
+/**
+ * @brief      Set reference signal input ringbuff
+ *
+ * @note       If input type2 is choosen, call this function to set ringbuffer to input reference data. 
+ *
+ * @param      algo_handle   Handle of algorithm stream
+ * @param      input_rb      Ringbuffer handle to be set
+ *
+ * @return     ESP_OK   success
+ *             ESP_FAIL fail
+ */
+esp_err_t algo_stream_set_multi_input_rb(audio_element_handle_t algo_handle, ringbuf_handle_t input_rb);
+
+#endif
diff --git a/components/esp-adf-libs b/components/esp-adf-libs
diff --git a/components/esp-sr b/components/esp-sr
diff --git a/docs/Doxyfile b/docs/Doxyfile
@@ -33,6 +33,7 @@ INPUT = \
     ../../components/audio_stream/include/i2s_stream.h \
     ../../components/audio_stream/include/raw_stream.h \
     ../../components/audio_stream/include/spiffs_stream.h \
+    ../../components/audio_stream/include/algorithm_stream.h \
     ## ESP Codec
     ../../components/esp-adf-libs/esp_codec/include/codec/esp_decoder.h \
     ../../components/esp-adf-libs/esp_codec/include/codec/audio_type_def.h \
@@ -67,10 +68,7 @@ INPUT = \
     ../../components/esp-adf-libs/esp_codec/include/codec/equalizer.h \
     ../../components/esp-adf-libs/esp_codec/include/codec/filter_resample.h \
     ../../components/esp-adf-libs/esp_codec/include/codec/audio_sonic.h \
-    ## Speech Recognitions
-    ../../components/esp-adf-libs/esp_sr/include/esp_sr_iface.h \
-    ../../components/esp-adf-libs/esp_sr/include/esp_sr_models.h \
-    ../../components/esp-adf-libs/esp_sr/include/esp_vad.h \
+    ## Speech Recognition
     ../../components/esp-adf-libs/recorder_engine/include/recorder_engine.h \
     ## ESP Audio
     ../../components/esp-adf-libs/esp_audio/include/audio_def.h \

diff --git a/docs/en/api-reference/speech-recognition/esp_vad.rst b/docs/en/api-reference/speech-recognition/esp_vad.rst
@@ -15,4 +15,4 @@ Implementation of the voice activity detection API is demonstrated in :example:`
 API Reference
 -------------
 
-.. include:: /_build/inc/esp_vad.inc
+For the latest API reference please refer to `Espressif Speech recognition repository <https://github.com/espressif/esp-sr>`_.
diff --git a/...rence/speech-recognition/esp_sr_iface.rst → ...rence/speech-recognition/esp_wn_iface.rst b/...rence/speech-recognition/esp_sr_iface.rst → ...rence/speech-recognition/esp_wn_iface.rst
@@ -35,30 +35,35 @@ A code snippet below demonstrates how to initialize the model, determine the num
 
 .. code-block:: c
 
-    #include "esp_sr_iface.h"
-    #include "esp_sr_models.h"
+    #include "esp_wn_iface.h"
+    #include "esp_wn_models.h"
+    #include "rec_eng_helper.h"
 
-    static const sr_model_iface_t *model = &sr_model_wakenet3_quantized;
+    esp_wn_iface_t *wakenet;
+    model_coeff_getter_t *model_coeff_getter;
+    model_iface_data_t *model_data;
 
     // Initialize wakeNet model data
-    static model_iface_data_t *model_data = model->create(DET_MODE_90);
+    get_wakenet_iface(&wakenet);
+    get_wakenet_coeff(&model_coeff_getter);
+    model_data = wakenet->create(model_coeff_getter, DET_MODE_90);
 
     // Set parameters of buffer
-    int audio_chunksize = model->get_samp_chunksize(model_data);
-    int frequency = model->get_samp_rate(model_data);
-    int16_t *buffer = malloc(audio_chunksize sizeof(int16_t));
+    int audio_chunksize = wakenet->get_samp_chunksize(model_data);
+    int frequency = wakenet->get_samp_rate(model_data);
+    int16_t *buffer = malloc(audio_chunksize * sizeof(int16_t));
 
     // Get voice data feed to buffer
     ...
 
     // Detect
-    int r = model->detect(model_data, buffer);
+    int r = wakenet->detect(model_data, buffer);
     if (r > 0) {
         printf("Detection triggered output %d.\n",  r);
     }
     
     // Destroy model
-    model->destroy(model_data)
+    wakenet->destroy(model_data)
 
 
 Application Example
@@ -70,5 +75,4 @@ Implementation of the speech recognition API is demonstrated in :example:`speech
 API Reference
 -------------
 
-.. include:: /_build/inc/esp_sr_iface.inc
-
+For the latest API reference please refer to `Espressif Speech recognition repository <https://github.com/espressif/esp-sr>`_.
diff --git a/docs/en/api-reference/speech-recognition/index.rst b/docs/en/api-reference/speech-recognition/index.rst
@@ -2,7 +2,7 @@
 Speech Recognition
 ******************
 
-The ESP-ADF comes complete with :doc:`wakeup word libraries <wakeup-word-libs>` and :doc:`speech recognition interface <esp_sr_iface>` to recognize voice wakeup commands. Most of currently implemented wakeup commands are in Chinese with one command "Alexa" in English. 
+The ESP-ADF comes complete with :doc:`speech recognition interface <esp_wn_iface>` to recognize voice wakeup commands. Most of currently implemented wakeup commands are in Chinese with one command "Hi Jeson" in English. 
 
 Provided in this section functions also include automatic speech detection, also known as :doc:`voice activity detection (VAD) <esp_vad>`, and :doc:`speech recording engine <recorder_engine>`.
 
@@ -12,7 +12,6 @@ The Speech Recognition API is designed to easy integrate with existing :doc:`../
     :caption: In This Section
     :maxdepth: 1
 
-    wakeup-word-libs
-    esp_sr_iface
+    esp_wn_iface
     esp_vad
     recorder_engine
diff --git a/docs/en/api-reference/speech-recognition/wakeup-word-libs.rst b/docs/en/api-reference/speech-recognition/wakeup-word-libs.rst
diff --git a/docs/zh_CN/api-reference/speech-recognition/esp_sr_iface.rst b/docs/zh_CN/api-reference/speech-recognition/esp_sr_iface.rst
diff --git a/docs/zh_CN/api-reference/speech-recognition/esp_wn_iface.rst b/docs/zh_CN/api-reference/speech-recognition/esp_wn_iface.rst
@@ -0,0 +1 @@
+.. include:: ../../../en/api-reference/speech-recognition/esp_wn_iface.rst
diff --git a/docs/zh_CN/api-reference/speech-recognition/wakeup-word-libs.rst b/docs/zh_CN/api-reference/speech-recognition/wakeup-word-libs.rst