diff --git a/Include/arm_nnsupportfunctions.h b/Include/arm_nnsupportfunctions.h
index 26d096e9..c010c295 100644
--- a/Include/arm_nnsupportfunctions.h
+++ b/Include/arm_nnsupportfunctions.h
@@ -21,8 +21,8 @@
  * Title:        arm_nnsupportfunctions.h
  * Description:  Public header file of support functions for CMSIS NN Library
  *
- * $Date:        14 February 2024
- * $Revision:    V.20.1.0
+ * $Date:        10 April 2024
+ * $Revision:    V.20.2.0
  *
  * Target :  Arm(R) M-Profile Architecture
  * -------------------------------------------------------------------- */
@@ -1278,11 +1278,21 @@ __STATIC_FORCEINLINE int32_t arm_nn_divide_by_power_of_two(const int32_t dividen
 
 /**
  * @brief           Requantize a given value.
+ * @details         Essentially returns (val * multiplier)/(2 ^ shift) with different rounding depending if
+ *                  CMSIS_NN_USE_SINGLE_ROUNDING is defined or not.
  * @param[in]       val         Value to be requantized
- * @param[in]       multiplier  multiplier. Range {NN_Q31_MIN + 1, Q32_MAX}
- * @param[in]       shift       left or right shift for 'val * multiplier'
+ * @param[in]       multiplier  Multiplier. Range {NN_Q31_MIN + 1, Q32_MAX}
+ * @param[in]       shift       Shift. Range: {-31, 30}
+ *                              Default branch:
+ *                                  If shift is positive left shift 'val * multiplier' with shift
+ *                                  If shift is negative right shift 'val * multiplier' with abs(shift)
+ *                              Single round branch:
+ *                                  Input for total_shift in divide by '2 ^ total_shift'
  *
- * @return          Returns (val * multiplier)/(2 ^ shift)
+ * @return          Default branch:
+ *                      Returns (val * multiplier) with rounding divided by (2 ^ shift) with rounding
+ *                  Single round branch:
+ *                      Returns (val * multiplier)/(2 ^ (31 - shift)) with rounding
  *
  */
 __STATIC_FORCEINLINE int32_t arm_nn_requantize(const int32_t val, const int32_t multiplier, const int32_t shift)
@@ -1394,7 +1404,7 @@ __STATIC_FORCEINLINE int32x4_t arm_divide_by_power_of_two_mve(const int32x4_t di
  * @param[in]       multiplier  multiplier
  * @param[in]       shift       shift
  *
- * @return          Returns (val * multiplier)/(2 ^ shift)
+ * @return          Returns (val * multiplier)/(2 ^ shift) with different rounding. See arm_nn_requantize for detatails.
  *
  */
 __STATIC_FORCEINLINE int32x4_t arm_requantize_mve(const int32x4_t val, const int32_t multiplier, const int32_t shift)
diff --git a/README.md b/README.md
index 51840024..a218bb66 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,7 @@ performance and minimize the memory footprint of neural networks on Arm Cortex-M
 
 ## Supported Framework
 The library follows the [int8](https://www.tensorflow.org/lite/performance/quantization_spec) and int16 quantization specification of TensorFlow Lite for Microcontrollers.
+This means CMSIS-NN is bit-exact with Tensorflow Lite reference kernels. In some cases TFL and TFLM reference kernels may not be bit-exact. In that case CMSIS-NN follows TFLM reference kernels. The unit test readme provides an [overview](https://github.com/ARM-software/CMSIS-NN/blob/main/Tests/UnitTest/README.md#tests-depending-on-tflm-interpreter).
 
 ## Branches and Tags
 There is a single branch called 'main'.
@@ -96,6 +97,8 @@ you may need to specify '-fomit-frame-pointer'.
 
 The compiler option *'-fno-builtin'* does not utilize optimized implementations of e.g. memcpy and memset, which are heavily used by CMSIS-NN. It can significantly downgrade performance. So this should be avoided. The compiler option *'-ffreestanding'* should also be avoided as it enables '-fno-builtin' implicitly.
 
+Another option is to enable CMSIS_NN_USE_SINGLE_ROUNDING. This may affect the output. If enabling this the equivalent flag should be enabled in TFL/TFLM.
+
 ### Supported Compilers
 * CMSIS-NN is tested on Arm Compiler 6 and on Arm GNU Toolchain.
 * IAR compiler is not tested and there can be compilation and/or performance issues.
diff --git a/Tests/UnitTest/README.md b/Tests/UnitTest/README.md
index 3509c835..3b7cafd3 100644
--- a/Tests/UnitTest/README.md
+++ b/Tests/UnitTest/README.md
@@ -128,13 +128,22 @@ When adding a new test data set, new c files should be added or existing c files
 The steps to add a new unit test are as follows. Add a new test test in the load_all_testdatasets() function. Run the generate script with that new test set as input. Add the new generated header files to an existing or new unit test.
 
 ### Tests depending on TFLM interpreter
-#### SVDF INT8
-This test is depending on tflite_micro for its reference data. This is because the operator is only supported by TFLM.
 
-#### LSTM
-This test is depending on tflite_micro for its reference data. This is because the operator differs between TFLM and TFLite.
-
-Note that tflite_micro interpreter is currently only supported for SVDF and LSTM.
+If TFL and TFLM reference kernels differ, CMSIS-NN aims to be bit-exact to TFLM reference kernels. Hence those operators depends on tflite_micro interpreter.
+
+Operator bit-exactness compability:
+
+| Operator        |  TFL bit-exact  | TFLM bit-exact |  Notes
+| ---             | ---             | ---       | ---
+| convolution     |   x             |  x        |
+| fully_connected |   x             |  x        |
+| lstm            |                 |  x        |
+| svdf            |                 |  x        | Operator is only fully supported by TFLM.
+| softmax         |   x             |  x        |
+| avgpool         |   x             |  x        |
+| maxpool         |   x             |  x        |
+| add             |   x             |  x        |
+| mul             |   x             |  x        |
 
 ### Refactoring of generate_test_data.py
 Test data generation is in progress of incrementally moving over to the cleaned up scripts placed in `RefactoredTestGen`.