diff --git a/Include/arm_nnsupportfunctions.h b/Include/arm_nnsupportfunctions.h index 26d096e9..c010c295 100644 --- a/Include/arm_nnsupportfunctions.h +++ b/Include/arm_nnsupportfunctions.h @@ -21,8 +21,8 @@ * Title: arm_nnsupportfunctions.h * Description: Public header file of support functions for CMSIS NN Library * - * $Date: 14 February 2024 - * $Revision: V.20.1.0 + * $Date: 10 April 2024 + * $Revision: V.20.2.0 * * Target : Arm(R) M-Profile Architecture * -------------------------------------------------------------------- */ @@ -1278,11 +1278,21 @@ __STATIC_FORCEINLINE int32_t arm_nn_divide_by_power_of_two(const int32_t dividen /** * @brief Requantize a given value. + * @details Essentially returns (val * multiplier)/(2 ^ shift) with different rounding depending if + * CMSIS_NN_USE_SINGLE_ROUNDING is defined or not. * @param[in] val Value to be requantized - * @param[in] multiplier multiplier. Range {NN_Q31_MIN + 1, Q32_MAX} - * @param[in] shift left or right shift for 'val * multiplier' + * @param[in] multiplier Multiplier. Range {NN_Q31_MIN + 1, Q32_MAX} + * @param[in] shift Shift. Range: {-31, 30} + * Default branch: + * If shift is positive left shift 'val * multiplier' with shift + * If shift is negative right shift 'val * multiplier' with abs(shift) + * Single round branch: + * Input for total_shift in divide by '2 ^ total_shift' * - * @return Returns (val * multiplier)/(2 ^ shift) + * @return Default branch: + * Returns (val * multiplier) with rounding divided by (2 ^ shift) with rounding + * Single round branch: + * Returns (val * multiplier)/(2 ^ (31 - shift)) with rounding * */ __STATIC_FORCEINLINE int32_t arm_nn_requantize(const int32_t val, const int32_t multiplier, const int32_t shift) @@ -1394,7 +1404,7 @@ __STATIC_FORCEINLINE int32x4_t arm_divide_by_power_of_two_mve(const int32x4_t di * @param[in] multiplier multiplier * @param[in] shift shift * - * @return Returns (val * multiplier)/(2 ^ shift) + * @return Returns (val * multiplier)/(2 ^ shift) with different rounding. See arm_nn_requantize for detatails. * */ __STATIC_FORCEINLINE int32x4_t arm_requantize_mve(const int32x4_t val, const int32_t multiplier, const int32_t shift) diff --git a/README.md b/README.md index 51840024..a218bb66 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ performance and minimize the memory footprint of neural networks on Arm Cortex-M ## Supported Framework The library follows the [int8](https://www.tensorflow.org/lite/performance/quantization_spec) and int16 quantization specification of TensorFlow Lite for Microcontrollers. +This means CMSIS-NN is bit-exact with Tensorflow Lite reference kernels. In some cases TFL and TFLM reference kernels may not be bit-exact. In that case CMSIS-NN follows TFLM reference kernels. The unit test readme provides an [overview](https://github.com/ARM-software/CMSIS-NN/blob/main/Tests/UnitTest/README.md#tests-depending-on-tflm-interpreter). ## Branches and Tags There is a single branch called 'main'. @@ -96,6 +97,8 @@ you may need to specify '-fomit-frame-pointer'. The compiler option *'-fno-builtin'* does not utilize optimized implementations of e.g. memcpy and memset, which are heavily used by CMSIS-NN. It can significantly downgrade performance. So this should be avoided. The compiler option *'-ffreestanding'* should also be avoided as it enables '-fno-builtin' implicitly. +Another option is to enable CMSIS_NN_USE_SINGLE_ROUNDING. This may affect the output. If enabling this the equivalent flag should be enabled in TFL/TFLM. + ### Supported Compilers * CMSIS-NN is tested on Arm Compiler 6 and on Arm GNU Toolchain. * IAR compiler is not tested and there can be compilation and/or performance issues. diff --git a/Tests/UnitTest/README.md b/Tests/UnitTest/README.md index 3509c835..3b7cafd3 100644 --- a/Tests/UnitTest/README.md +++ b/Tests/UnitTest/README.md @@ -128,13 +128,22 @@ When adding a new test data set, new c files should be added or existing c files The steps to add a new unit test are as follows. Add a new test test in the load_all_testdatasets() function. Run the generate script with that new test set as input. Add the new generated header files to an existing or new unit test. ### Tests depending on TFLM interpreter -#### SVDF INT8 -This test is depending on tflite_micro for its reference data. This is because the operator is only supported by TFLM. -#### LSTM -This test is depending on tflite_micro for its reference data. This is because the operator differs between TFLM and TFLite. - -Note that tflite_micro interpreter is currently only supported for SVDF and LSTM. +If TFL and TFLM reference kernels differ, CMSIS-NN aims to be bit-exact to TFLM reference kernels. Hence those operators depends on tflite_micro interpreter. + +Operator bit-exactness compability: + +| Operator | TFL bit-exact | TFLM bit-exact | Notes +| --- | --- | --- | --- +| convolution | x | x | +| fully_connected | x | x | +| lstm | | x | +| svdf | | x | Operator is only fully supported by TFLM. +| softmax | x | x | +| avgpool | x | x | +| maxpool | x | x | +| add | x | x | +| mul | x | x | ### Refactoring of generate_test_data.py Test data generation is in progress of incrementally moving over to the cleaned up scripts placed in `RefactoredTestGen`.