Add int8 padding operator and unit tests

mansnils · Oct 18, 2024 · 25eb94f · 25eb94f
1 parent 5f8f1a9
commit 25eb94f
Show file tree

Hide file tree

Showing 22 changed files with 532 additions and 51 deletions.
diff --git a/ARM.CMSIS-NN.pdsc b/ARM.CMSIS-NN.pdsc
@@ -149,6 +149,7 @@
         <file category="source" name="Source/SoftmaxFunctions/arm_softmax_s8_s16.c"/>
         <file category="source" name="Source/SoftmaxFunctions/arm_softmax_s16.c"/>
         <file category="source" name="Source/SoftmaxFunctions/arm_softmax_u8.c"/>
+        <file category="source" name="Source/PadFunctions/arm_pad_s8.c"/>
       </files>
     </component>
   </components>

diff --git a/Include/arm_nnfunctions.h b/Include/arm_nnfunctions.h
@@ -21,8 +21,8 @@
  * Title:        arm_nnfunctions.h
  * Description:  Public header file for CMSIS NN Library
  *
- * $Date:        08 October 2024
- * $Revision:    V.17.1.0
+ * $Date:        17 October 2024
+ * $Revision:    V.17.2.0
  *
  * Target :  Arm(R) M-Profile Architecture
  * -------------------------------------------------------------------- */
@@ -2780,6 +2780,31 @@ arm_cmsis_nn_status arm_batch_matmul_s16(const cmsis_nn_context *ctx,
                                          const cmsis_nn_dims *output_dims,
                                          int16_t *output);
 
+/**
+ * @defgroup Pad Pad Layer Functions:
+ *
+ */
+
+/**
+ * @brief Expands the size of the input by adding constant values before and after the data, in all dimensions.
+ *
+ * @param[in]   input                      Pointer to input data
+ * @param[out]  output                     Pointer to output data
+ * @param[in]   pad_value                  Value to pad with
+ * @param[in]   input_size                 Input tensor dimensions
+ * @param[in]   pre_pad                           Padding to apply before data in each dimension
+ * @param[in]        post_pad                   Padding to apply after data in each dimension
+ *
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ */
+arm_cmsis_nn_status arm_pad_s8(const int8_t *input,
+                               int8_t *output,
+                               const int8_t pad_value,
+                               const cmsis_nn_dims *input_size,
+                               const cmsis_nn_dims *pre_pad,
+                               const cmsis_nn_dims *post_pad);
+
 /**
  * @brief Elementwise binary minimum with 8bit data.
  *

diff --git a/README.md b/README.md
@@ -40,6 +40,7 @@ Examples are Cortex-M55 or Cortex-M85 configured with MVE.
 | Softmax         | Yes         | Yes        | N/A        | Yes         | Yes          | N/A          | Yes         | No           | N/A          |
 | LSTM            | Yes         | Yes        | No         | Yes         | Yes          | No           | Yes         | Yes          | No           |
 | SVDF            | Yes         | No         | No         | Yes         | No           | No           | Yes         | No           | No           |
+| Pad             | Yes         | No         | N/A        | No          | No           | N/A          | Yes         | No           | N/A          |
 
 * int4 weights + int8 activations
 
@@ -91,7 +92,7 @@ cmake .. -DCMAKE_TOOLCHAIN_FILE=</path/to/ethos-u-core-platform>/cmake/toolchain
 ```
 
 ### Compiler Options
-Default optimization level is set at Ofast. This can be overwritten with CMake on command line by using <nobr>*"-DCMSIS_OPTIMIZATION_LEVEL"*</nobr>. Please change according to project needs. 
+Default optimization level is set at Ofast. This can be overwritten with CMake on command line by using <nobr>*"-DCMSIS_OPTIMIZATION_LEVEL"*</nobr>. Please change according to project needs.
 Just bear in mind this can impact performance. With only optimization level -O0, *ARM_MATH_AUTOVECTORIZE* needs to be defined for processors with Helium
 Technology.
 

diff --git a/Source/CMakeLists.txt b/Source/CMakeLists.txt
@@ -32,6 +32,7 @@ option(BASICMATHSNN         "Basic Maths for NN"    ON)
 option(RESHAPE              "Reshape"               ON)
 option(SVDF                 "SVDF"                  ON)
 option(LSTM                 "LSTM"                  ON)
+option(PAD                  "Pad"                   ON)
 
 # Always needed if any other module above is on.
 option(NNSUPPORT            "NN Support"            ON)
@@ -81,6 +82,10 @@ if (RESHAPE)
   add_subdirectory(ReshapeFunctions)
 endif()
 
+if (PAD)
+  add_subdirectory(PadFunctions)
+endif()
+
 # Keep NNSUPPORT at the end
 if (NNSUPPORT)
   add_subdirectory(NNSupportFunctions)

diff --git a/Source/PadFunctions/CMakeLists.txt b/Source/PadFunctions/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+file(GLOB SRC "./*_s8.c")
+target_sources(cmsis-nn PRIVATE ${SRC})
diff --git a/Source/PadFunctions/arm_pad_s8.c b/Source/PadFunctions/arm_pad_s8.c
@@ -0,0 +1,117 @@
+
+/*
+ * SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_pad_s8.c
+ * Description:  Pad a s8 vector
+ *
+ * $Date:        19 Sep 2024
+ * $Revision:    V.1.0.0
+ *
+ * Target :  Arm(R) M-Profile Architecture
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nn_types.h"
+#include "arm_nnfunctions.h"
+#include "arm_nnsupportfunctions.h"
+/**
+ *  @ingroup Public
+ */
+
+/**
+ * @addtogroup Pad
+ * @{
+ */
+
+/*
+ * Basic s8 pad function.
+ *
+ * Refer header file for details.
+ *
+ */
+
+arm_cmsis_nn_status arm_pad_s8(const int8_t *input,
+                               int8_t *output,
+                               const int8_t pad_value,
+                               const cmsis_nn_dims *input_size,
+                               const cmsis_nn_dims *pre_pad,
+                               const cmsis_nn_dims *post_pad)
+{
+
+    const cmsis_nn_dims output_size = {pre_pad->n + input_size->n + post_pad->n,
+                                       pre_pad->h + input_size->h + post_pad->h,
+                                       pre_pad->w + input_size->w + post_pad->w,
+                                       pre_pad->c + input_size->c + post_pad->c};
+
+    const int32_t batch_block_size = output_size.h * output_size.w * output_size.c;
+    const int32_t row_block_size = output_size.w * output_size.c;
+    const int32_t col_block_size = output_size.c;
+
+    arm_memset_s8(output, pad_value, batch_block_size * pre_pad->n);
+    output += batch_block_size * pre_pad->n;
+    for (int32_t b = 0; b < input_size->n; b++)
+    {
+
+        arm_memset_s8(output, pad_value, row_block_size * pre_pad->h);
+        output += row_block_size * pre_pad->h;
+        for (int32_t y = 0; y < input_size->h; y++)
+        {
+
+            arm_memset_s8(output, pad_value, col_block_size * pre_pad->w);
+            output += col_block_size * pre_pad->w;
+            if (input_size->c == output_size.c)
+            {
+                arm_memcpy_s8(output, input, input_size->w * input_size->c);
+                output += input_size->w * input_size->c;
+                input += input_size->w * input_size->c;
+            }
+            else
+            {
+                for (int32_t x = 0; x < input_size->w; x++)
+                {
+
+                    arm_memset_s8(output, pad_value, pre_pad->c);
+                    output += pre_pad->c;
+
+                    arm_memcpy_s8(output, input, input_size->c);
+                    output += input_size->c;
+                    input += input_size->c;
+
+                    arm_memset_s8(output, pad_value, post_pad->c);
+                    output += post_pad->c;
+                }
+            }
+
+            arm_memset_s8(output, pad_value, col_block_size * post_pad->w);
+            output += col_block_size * post_pad->w;
+        }
+
+        arm_memset_s8(output, pad_value, row_block_size * post_pad->h);
+        output += row_block_size * post_pad->h;
+    }
+    arm_memset_s8(output, pad_value, batch_block_size * post_pad->n);
+
+    return ARM_CMSIS_NN_SUCCESS;
+}
+
+/**
+ * @} end of Pad group
+ */
diff --git a/Tests/UnitTest/CMakeLists.txt b/Tests/UnitTest/CMakeLists.txt
@@ -109,6 +109,7 @@ add_subdirectory(TestCases/test_arm_transpose_conv_s8)
 add_subdirectory(TestCases/test_arm_lstm_unidirectional_s16)
 add_subdirectory(TestCases/test_arm_batch_matmul_s8)
 add_subdirectory(TestCases/test_arm_batch_matmul_s16)
+add_subdirectory(TestCases/test_arm_pad_s8)
 
 set(MAKE_CMD "python3")
 set(MAKE_CMD_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/unittest_targets.py")

diff --git a/Tests/UnitTest/README.md b/Tests/UnitTest/README.md
@@ -23,7 +23,7 @@ If in a virtual environment just start by upgrading pip.
 pip install --upgrade pip
 ```
 
-After upgrading pip, the requirements file found in Tests/UnitTests can be installed. This contains all 
+After upgrading pip, the requirements file found in Tests/UnitTests can be installed. This contains all
 python modules required to run all of the scripts. This will install tensorflow and keras to allow the use of
 the generate_test_data.py script. If you have version specific requirements, it is reccomended to install this
 requirements.txt in a virtual environment.
@@ -74,11 +74,11 @@ The easiest way to run the unit tests on Corstone-300 is to use the build_and_ru
 
 Sample usage:
 ```
-./build_and_run_tests.sh -c cortex-m3,cortex-m7,cortex-m55 -o '-Ofast' 
+./build_and_run_tests.sh -c cortex-m3,cortex-m7,cortex-m55 -o '-Ofast'
 ```
 By default the script will download and target gcc. To use arm compiler ensure that arm compilers folder is located in path, export CC and use the -a option on the script.
 
-Downloaded dependencies including python venv can be found in Tests/UnitTests/downloads. Test elfs can be found in Tests/UnitTests/build-($cpu) directories. 
+Downloaded dependencies including python venv can be found in Tests/UnitTests/downloads. Test elfs can be found in Tests/UnitTests/build-($cpu) directories.
 
 Otherwise, you can build it manually:
 
@@ -150,6 +150,7 @@ Operator bit-exactness compability:
 | add             |   x             |  x        |
 | mul             |   x             |  x        |
 | batch matmul    |   x             |  x        |
+| pad             |   x             |  x        |
 | minimum         |   x             |  x        |
 | maximum         |   x             |  x        |
 
@@ -178,6 +179,7 @@ Current progress:
 | add             |  x   |     |
 | mul             |  x   |     |
 | batch matmul    |      |  x  |
+| pad             |      |  x  |
 | minimum         |      |  x  |
 | maximum         |      |  x  |
 

diff --git a/Tests/UnitTest/RefactoredTestGen/Lib/op_pad.py b/Tests/UnitTest/RefactoredTestGen/Lib/op_pad.py
@@ -0,0 +1,69 @@
+# SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import Lib.op_utils
+import tensorflow as tf
+import math
+import numpy as np
+
+from tensorflow.lite.python.interpreter import Interpreter
+from tensorflow.lite.python.interpreter import OpResolverType
+import tf_keras as keras
+
+class Op_pad(Lib.op_utils.Op_type):
+
+    def get_shapes(params):
+        shapes = {}
+        shapes["input_tensor"] = (params["input_n"], params["input_h"], params["input_w"], params["input_c"])
+        shapes["representational_dataset"] = shapes["input_tensor"]
+
+        return shapes
+
+    def generate_keras_model(shapes, params):
+
+        model = keras.models.Sequential()
+        model.add(keras.layers.InputLayer(input_shape=shapes["input_tensor"][1:]))
+
+        if (params["pre_pad_n"] == params["post_pad_n"] == params["pre_pad_h"] == params["post_pad_h"] == 0):
+            model.add(keras.layers.ZeroPadding2D(padding=((params["pre_pad_w"], params["post_pad_w"]), (params["pre_pad_c"], params["post_pad_c"])), data_format="channels_first"))
+        elif (params["pre_pad_n"] == params["post_pad_n"] == params["pre_pad_c"] == params["post_pad_c"] == 0):
+            model.add(keras.layers.ZeroPadding2D(padding=((params["pre_pad_h"], params["post_pad_h"]), (params["pre_pad_w"], params["post_pad_w"])), data_format="channels_last"))
+        else:
+            raise ValueError(f"Keras can only generate padding for (h,w) or (w,c), the others must be zero.")
+
+        return model
+
+    def generate_data_tflite(tflite_fname, params):
+        tensors = {}
+        effective_scales = {}
+        scales = {}
+        generated_params = {}
+
+        generated_params["pad_value"] = -128
+
+        interpreter = Interpreter(str(tflite_fname), experimental_op_resolver_type=OpResolverType.BUILTIN_REF)
+        interpreter.allocate_tensors()
+
+        output_details = interpreter.get_output_details()
+        output_n = output_details[0]['shape'][3]
+        output_h = output_details[0]['shape'][2]
+        output_w = output_details[0]['shape'][1]
+        output_c = output_details[0]['shape'][0]
+
+        generated_params["output_size"] = output_n * output_h * output_w * output_c;
+
+        return Lib.op_utils.Generated_data(generated_params, tensors, scales, effective_scales)
+
diff --git a/Tests/UnitTest/RefactoredTestGen/Lib/test.py b/Tests/UnitTest/RefactoredTestGen/Lib/test.py
@@ -20,6 +20,7 @@
 import Lib.op_batch_matmul
 import Lib.op_fully_connected
 import Lib.op_pooling
+import Lib.op_pad
 import Lib.op_maximum_minimum
 import tensorflow as tf
 import numpy as np
@@ -186,6 +187,8 @@ def get_op_type(op_type_string):
         return Lib.op_fully_connected.Op_fully_connected
     elif op_type_string == "avgpool" or op_type_string == "maxpool":
         return Lib.op_pooling.Op_pooling
+    if op_type_string == "pad":
+        return Lib.op_pad.Op_pad
     elif op_type_string == "maximum_minimum":
         return Lib.op_maximum_minimum.Op_maximum_minimum
     else: