From 25eb94f7bd673bd131c71c4e3b8d6f04e427724c Mon Sep 17 00:00:00 2001
From: Adrian Lundell <36153706+AdrianLundell@users.noreply.github.com>
Date: Fri, 18 Oct 2024 10:55:05 +0200
Subject: [PATCH] Add int8 padding operator and unit tests

---
 ARM.CMSIS-NN.pdsc                             |   1 +
 Include/arm_nnfunctions.h                     |  29 +++-
 README.md                                     |   3 +-
 Source/CMakeLists.txt                         |   5 +
 Source/PadFunctions/CMakeLists.txt            |  20 +++
 Source/PadFunctions/arm_pad_s8.c              | 117 ++++++++++++++++
 Tests/UnitTest/CMakeLists.txt                 |   1 +
 Tests/UnitTest/README.md                      |   8 +-
 .../UnitTest/RefactoredTestGen/Lib/op_pad.py  |  69 ++++++++++
 Tests/UnitTest/RefactoredTestGen/Lib/test.py  |   3 +
 .../UnitTest/RefactoredTestGen/test_plan.json | 127 +++++++++++-------
 .../TestData/pad_int8_1/config_data.h         |  17 +++
 .../TestData/pad_int8_1/input_tensor.h        |   6 +
 .../TestCases/TestData/pad_int8_1/output.h    |   9 ++
 .../TestCases/TestData/pad_int8_1/test_data.h |   3 +
 .../TestData/pad_int8_2/config_data.h         |  17 +++
 .../TestData/pad_int8_2/input_tensor.h        |   6 +
 .../TestCases/TestData/pad_int8_2/output.h    |   9 ++
 .../TestCases/TestData/pad_int8_2/test_data.h |   3 +
 .../TestCases/test_arm_pad_s8/CMakeLists.txt  |  23 ++++
 .../Unity/unity_test_arm_pad_s8.c             |  49 +++++++
 .../test_arm_pad_s8/test_arm_pad_s8.c         |  58 ++++++++
 22 files changed, 532 insertions(+), 51 deletions(-)
 create mode 100644 Source/PadFunctions/CMakeLists.txt
 create mode 100644 Source/PadFunctions/arm_pad_s8.c
 create mode 100644 Tests/UnitTest/RefactoredTestGen/Lib/op_pad.py
 create mode 100644 Tests/UnitTest/TestCases/TestData/pad_int8_1/config_data.h
 create mode 100644 Tests/UnitTest/TestCases/TestData/pad_int8_1/input_tensor.h
 create mode 100644 Tests/UnitTest/TestCases/TestData/pad_int8_1/output.h
 create mode 100644 Tests/UnitTest/TestCases/TestData/pad_int8_1/test_data.h
 create mode 100644 Tests/UnitTest/TestCases/TestData/pad_int8_2/config_data.h
 create mode 100644 Tests/UnitTest/TestCases/TestData/pad_int8_2/input_tensor.h
 create mode 100644 Tests/UnitTest/TestCases/TestData/pad_int8_2/output.h
 create mode 100644 Tests/UnitTest/TestCases/TestData/pad_int8_2/test_data.h
 create mode 100644 Tests/UnitTest/TestCases/test_arm_pad_s8/CMakeLists.txt
 create mode 100644 Tests/UnitTest/TestCases/test_arm_pad_s8/Unity/unity_test_arm_pad_s8.c
 create mode 100644 Tests/UnitTest/TestCases/test_arm_pad_s8/test_arm_pad_s8.c
diff --git a/ARM.CMSIS-NN.pdsc b/ARM.CMSIS-NN.pdsc
index 48cc75c2..e967fcc9 100644
--- a/ARM.CMSIS-NN.pdsc
+++ b/ARM.CMSIS-NN.pdsc
@@ -149,6 +149,7 @@
         <file category="source" name="Source/SoftmaxFunctions/arm_softmax_s8_s16.c"/>
         <file category="source" name="Source/SoftmaxFunctions/arm_softmax_s16.c"/>
         <file category="source" name="Source/SoftmaxFunctions/arm_softmax_u8.c"/>
+        <file category="source" name="Source/PadFunctions/arm_pad_s8.c"/>
       </files>
     </component>
   </components>
diff --git a/Include/arm_nnfunctions.h b/Include/arm_nnfunctions.h
index 26b61422..2d89094a 100644
--- a/Include/arm_nnfunctions.h
+++ b/Include/arm_nnfunctions.h
@@ -21,8 +21,8 @@
  * Title:        arm_nnfunctions.h
  * Description:  Public header file for CMSIS NN Library
  *
- * $Date:        08 October 2024
- * $Revision:    V.17.1.0
+ * $Date:        17 October 2024
+ * $Revision:    V.17.2.0
  *
  * Target :  Arm(R) M-Profile Architecture
  * -------------------------------------------------------------------- */
@@ -2780,6 +2780,31 @@ arm_cmsis_nn_status arm_batch_matmul_s16(const cmsis_nn_context *ctx,
                                          const cmsis_nn_dims *output_dims,
                                          int16_t *output);
 
+/**
+ * @defgroup Pad Pad Layer Functions:
+ *
+ */
+
+/**
+ * @brief Expands the size of the input by adding constant values before and after the data, in all dimensions.
+ *
+ * @param[in]   input                      Pointer to input data
+ * @param[out]  output                     Pointer to output data
+ * @param[in]   pad_value                  Value to pad with
+ * @param[in]   input_size                 Input tensor dimensions
+ * @param[in]   pre_pad                           Padding to apply before data in each dimension
+ * @param[in]        post_pad                   Padding to apply after data in each dimension
+ *
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ */
+arm_cmsis_nn_status arm_pad_s8(const int8_t *input,
+                               int8_t *output,
+                               const int8_t pad_value,
+                               const cmsis_nn_dims *input_size,
+                               const cmsis_nn_dims *pre_pad,
+                               const cmsis_nn_dims *post_pad);
+
 /**
  * @brief Elementwise binary minimum with 8bit data.
  *
diff --git a/README.md b/README.md
index eb059b0e..cba13fd9 100644
--- a/README.md
+++ b/README.md
@@ -40,6 +40,7 @@ Examples are Cortex-M55 or Cortex-M85 configured with MVE.
 | Softmax         | Yes         | Yes        | N/A        | Yes         | Yes          | N/A          | Yes         | No           | N/A          |
 | LSTM            | Yes         | Yes        | No         | Yes         | Yes          | No           | Yes         | Yes          | No           |
 | SVDF            | Yes         | No         | No         | Yes         | No           | No           | Yes         | No           | No           |
+| Pad             | Yes         | No         | N/A        | No          | No           | N/A          | Yes         | No           | N/A          |
 
 * int4 weights + int8 activations
 
@@ -91,7 +92,7 @@ cmake .. -DCMAKE_TOOLCHAIN_FILE=</path/to/ethos-u-core-platform>/cmake/toolchain
 ```
 
 ### Compiler Options
-Default optimization level is set at Ofast. This can be overwritten with CMake on command line by using <nobr>*"-DCMSIS_OPTIMIZATION_LEVEL"*</nobr>. Please change according to project needs. 
+Default optimization level is set at Ofast. This can be overwritten with CMake on command line by using <nobr>*"-DCMSIS_OPTIMIZATION_LEVEL"*</nobr>. Please change according to project needs.
 Just bear in mind this can impact performance. With only optimization level -O0, *ARM_MATH_AUTOVECTORIZE* needs to be defined for processors with Helium
 Technology.
 
diff --git a/Source/CMakeLists.txt b/Source/CMakeLists.txt
index a12f3856..440aa610 100644
--- a/Source/CMakeLists.txt
+++ b/Source/CMakeLists.txt
@@ -32,6 +32,7 @@ option(BASICMATHSNN         "Basic Maths for NN"    ON)
 option(RESHAPE              "Reshape"               ON)
 option(SVDF                 "SVDF"                  ON)
 option(LSTM                 "LSTM"                  ON)
+option(PAD                  "Pad"                   ON)
 
 # Always needed if any other module above is on.
 option(NNSUPPORT            "NN Support"            ON)
@@ -81,6 +82,10 @@ if (RESHAPE)
   add_subdirectory(ReshapeFunctions)
 endif()
 
+if (PAD)
+  add_subdirectory(PadFunctions)
+endif()
+
 # Keep NNSUPPORT at the end
 if (NNSUPPORT)
   add_subdirectory(NNSupportFunctions)
diff --git a/Source/PadFunctions/CMakeLists.txt b/Source/PadFunctions/CMakeLists.txt
new file mode 100644
index 00000000..5da8af95
--- /dev/null
+++ b/Source/PadFunctions/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+file(GLOB SRC "./*_s8.c")
+target_sources(cmsis-nn PRIVATE ${SRC})
diff --git a/Source/PadFunctions/arm_pad_s8.c b/Source/PadFunctions/arm_pad_s8.c
new file mode 100644
index 00000000..5f71ae26
--- /dev/null
+++ b/Source/PadFunctions/arm_pad_s8.c
@@ -0,0 +1,117 @@
+
+/*
+ * SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_pad_s8.c
+ * Description:  Pad a s8 vector
+ *
+ * $Date:        19 Sep 2024
+ * $Revision:    V.1.0.0
+ *
+ * Target :  Arm(R) M-Profile Architecture
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nn_types.h"
+#include "arm_nnfunctions.h"
+#include "arm_nnsupportfunctions.h"
+/**
+ *  @ingroup Public
+ */
+
+/**
+ * @addtogroup Pad
+ * @{
+ */
+
+/*
+ * Basic s8 pad function.
+ *
+ * Refer header file for details.
+ *
+ */
+
+arm_cmsis_nn_status arm_pad_s8(const int8_t *input,
+                               int8_t *output,
+                               const int8_t pad_value,
+                               const cmsis_nn_dims *input_size,
+                               const cmsis_nn_dims *pre_pad,
+                               const cmsis_nn_dims *post_pad)
+{
+
+    const cmsis_nn_dims output_size = {pre_pad->n + input_size->n + post_pad->n,
+                                       pre_pad->h + input_size->h + post_pad->h,
+                                       pre_pad->w + input_size->w + post_pad->w,
+                                       pre_pad->c + input_size->c + post_pad->c};
+
+    const int32_t batch_block_size = output_size.h * output_size.w * output_size.c;
+    const int32_t row_block_size = output_size.w * output_size.c;
+    const int32_t col_block_size = output_size.c;
+
+    arm_memset_s8(output, pad_value, batch_block_size * pre_pad->n);
+    output += batch_block_size * pre_pad->n;
+    for (int32_t b = 0; b < input_size->n; b++)
+    {
+
+        arm_memset_s8(output, pad_value, row_block_size * pre_pad->h);
+        output += row_block_size * pre_pad->h;
+        for (int32_t y = 0; y < input_size->h; y++)
+        {
+
+            arm_memset_s8(output, pad_value, col_block_size * pre_pad->w);
+            output += col_block_size * pre_pad->w;
+            if (input_size->c == output_size.c)
+            {
+                arm_memcpy_s8(output, input, input_size->w * input_size->c);
+                output += input_size->w * input_size->c;
+                input += input_size->w * input_size->c;
+            }
+            else
+            {
+                for (int32_t x = 0; x < input_size->w; x++)
+                {
+
+                    arm_memset_s8(output, pad_value, pre_pad->c);
+                    output += pre_pad->c;
+
+                    arm_memcpy_s8(output, input, input_size->c);
+                    output += input_size->c;
+                    input += input_size->c;
+
+                    arm_memset_s8(output, pad_value, post_pad->c);
+                    output += post_pad->c;
+                }
+            }
+
+            arm_memset_s8(output, pad_value, col_block_size * post_pad->w);
+            output += col_block_size * post_pad->w;
+        }
+
+        arm_memset_s8(output, pad_value, row_block_size * post_pad->h);
+        output += row_block_size * post_pad->h;
+    }
+    arm_memset_s8(output, pad_value, batch_block_size * post_pad->n);
+
+    return ARM_CMSIS_NN_SUCCESS;
+}
+
+/**
+ * @} end of Pad group
+ */
diff --git a/Tests/UnitTest/CMakeLists.txt b/Tests/UnitTest/CMakeLists.txt
index a333bcc4..495d17db 100644
--- a/Tests/UnitTest/CMakeLists.txt
+++ b/Tests/UnitTest/CMakeLists.txt
@@ -109,6 +109,7 @@ add_subdirectory(TestCases/test_arm_transpose_conv_s8)
 add_subdirectory(TestCases/test_arm_lstm_unidirectional_s16)
 add_subdirectory(TestCases/test_arm_batch_matmul_s8)
 add_subdirectory(TestCases/test_arm_batch_matmul_s16)
+add_subdirectory(TestCases/test_arm_pad_s8)
 
 set(MAKE_CMD "python3")
 set(MAKE_CMD_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/unittest_targets.py")
diff --git a/Tests/UnitTest/README.md b/Tests/UnitTest/README.md
index 01ff3eb1..427fbfa5 100644
--- a/Tests/UnitTest/README.md
+++ b/Tests/UnitTest/README.md
@@ -23,7 +23,7 @@ If in a virtual environment just start by upgrading pip.
 pip install --upgrade pip
 ```
 
-After upgrading pip, the requirements file found in Tests/UnitTests can be installed. This contains all 
+After upgrading pip, the requirements file found in Tests/UnitTests can be installed. This contains all
 python modules required to run all of the scripts. This will install tensorflow and keras to allow the use of
 the generate_test_data.py script. If you have version specific requirements, it is reccomended to install this
 requirements.txt in a virtual environment.
@@ -74,11 +74,11 @@ The easiest way to run the unit tests on Corstone-300 is to use the build_and_ru
 
 Sample usage:
 ```
-./build_and_run_tests.sh -c cortex-m3,cortex-m7,cortex-m55 -o '-Ofast' 
+./build_and_run_tests.sh -c cortex-m3,cortex-m7,cortex-m55 -o '-Ofast'
 ```
 By default the script will download and target gcc. To use arm compiler ensure that arm compilers folder is located in path, export CC and use the -a option on the script.
 
-Downloaded dependencies including python venv can be found in Tests/UnitTests/downloads. Test elfs can be found in Tests/UnitTests/build-($cpu) directories. 
+Downloaded dependencies including python venv can be found in Tests/UnitTests/downloads. Test elfs can be found in Tests/UnitTests/build-($cpu) directories.
 
 Otherwise, you can build it manually:
 
@@ -150,6 +150,7 @@ Operator bit-exactness compability:
 | add             |   x             |  x        |
 | mul             |   x             |  x        |
 | batch matmul    |   x             |  x        |
+| pad             |   x             |  x        |
 | minimum         |   x             |  x        |
 | maximum         |   x             |  x        |
 
@@ -178,6 +179,7 @@ Current progress:
 | add             |  x   |     |
 | mul             |  x   |     |
 | batch matmul    |      |  x  |
+| pad             |      |  x  |
 | minimum         |      |  x  |
 | maximum         |      |  x  |
 
diff --git a/Tests/UnitTest/RefactoredTestGen/Lib/op_pad.py b/Tests/UnitTest/RefactoredTestGen/Lib/op_pad.py
new file mode 100644
index 00000000..0ee5e6a7
--- /dev/null
+++ b/Tests/UnitTest/RefactoredTestGen/Lib/op_pad.py
@@ -0,0 +1,69 @@
+# SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import Lib.op_utils
+import tensorflow as tf
+import math
+import numpy as np
+
+from tensorflow.lite.python.interpreter import Interpreter
+from tensorflow.lite.python.interpreter import OpResolverType
+import tf_keras as keras
+
+class Op_pad(Lib.op_utils.Op_type):
+
+    def get_shapes(params):
+        shapes = {}
+        shapes["input_tensor"] = (params["input_n"], params["input_h"], params["input_w"], params["input_c"])
+        shapes["representational_dataset"] = shapes["input_tensor"]
+
+        return shapes
+
+    def generate_keras_model(shapes, params):
+
+        model = keras.models.Sequential()
+        model.add(keras.layers.InputLayer(input_shape=shapes["input_tensor"][1:]))
+
+        if (params["pre_pad_n"] == params["post_pad_n"] == params["pre_pad_h"] == params["post_pad_h"] == 0):
+            model.add(keras.layers.ZeroPadding2D(padding=((params["pre_pad_w"], params["post_pad_w"]), (params["pre_pad_c"], params["post_pad_c"])), data_format="channels_first"))
+        elif (params["pre_pad_n"] == params["post_pad_n"] == params["pre_pad_c"] == params["post_pad_c"] == 0):
+            model.add(keras.layers.ZeroPadding2D(padding=((params["pre_pad_h"], params["post_pad_h"]), (params["pre_pad_w"], params["post_pad_w"])), data_format="channels_last"))
+        else:
+            raise ValueError(f"Keras can only generate padding for (h,w) or (w,c), the others must be zero.")
+
+        return model
+
+    def generate_data_tflite(tflite_fname, params):
+        tensors = {}
+        effective_scales = {}
+        scales = {}
+        generated_params = {}
+
+        generated_params["pad_value"] = -128
+
+        interpreter = Interpreter(str(tflite_fname), experimental_op_resolver_type=OpResolverType.BUILTIN_REF)
+        interpreter.allocate_tensors()
+
+        output_details = interpreter.get_output_details()
+        output_n = output_details[0]['shape'][3]
+        output_h = output_details[0]['shape'][2]
+        output_w = output_details[0]['shape'][1]
+        output_c = output_details[0]['shape'][0]
+
+        generated_params["output_size"] = output_n * output_h * output_w * output_c;
+
+        return Lib.op_utils.Generated_data(generated_params, tensors, scales, effective_scales)
+
diff --git a/Tests/UnitTest/RefactoredTestGen/Lib/test.py b/Tests/UnitTest/RefactoredTestGen/Lib/test.py
index e1a72cc2..f9eda303 100644
--- a/Tests/UnitTest/RefactoredTestGen/Lib/test.py
+++ b/Tests/UnitTest/RefactoredTestGen/Lib/test.py
@@ -20,6 +20,7 @@
 import Lib.op_batch_matmul
 import Lib.op_fully_connected
 import Lib.op_pooling
+import Lib.op_pad
 import Lib.op_maximum_minimum
 import tensorflow as tf
 import numpy as np
@@ -186,6 +187,8 @@ def get_op_type(op_type_string):
         return Lib.op_fully_connected.Op_fully_connected
     elif op_type_string == "avgpool" or op_type_string == "maxpool":
         return Lib.op_pooling.Op_pooling
+    if op_type_string == "pad":
+        return Lib.op_pad.Op_pad
     elif op_type_string == "maximum_minimum":
         return Lib.op_maximum_minimum.Op_maximum_minimum
     else:
diff --git a/Tests/UnitTest/RefactoredTestGen/test_plan.json b/Tests/UnitTest/RefactoredTestGen/test_plan.json
index dd269014..5c153876 100644
--- a/Tests/UnitTest/RefactoredTestGen/test_plan.json
+++ b/Tests/UnitTest/RefactoredTestGen/test_plan.json
@@ -820,7 +820,7 @@
          "out_ch" : 22,
          "generate_bias": true,
          "per_channel_quant": true
-	}
+        }
     ]
 },
 {
@@ -841,8 +841,8 @@
          "stride_w": 9,
          "stride_h": 5,
          "pad" : "SAME",
-	 "activation_max": 127,
-	 "activation_min": -128
+         "activation_max": 127,
+         "activation_min": -128
         },
         {"name" : "avgpooling_1",
          "batch_size" : 1,
@@ -855,8 +855,8 @@
          "stride_w": 1,
          "stride_h": 2,
          "pad" : "VALID",
-	 "activation_max": 127,
-	 "activation_min": -128
+         "activation_max": 127,
+         "activation_min": -128
         },
         {"name" : "avgpooling_2",
          "batch_size" : 1,
@@ -869,8 +869,8 @@
          "stride_w": 1,
          "stride_h": 2,
          "pad" : "SAME",
-	 "activation_max": 127,
-	 "activation_min": -128
+         "activation_max": 127,
+         "activation_min": -128
         },
         {"name" : "avgpooling_3",
          "batch_size" : 1,
@@ -883,8 +883,8 @@
          "stride_w": 2,
          "stride_h": 1,
          "pad" : "VALID",
-	 "activation_max": 127,
-	 "activation_min": -128
+         "activation_max": 127,
+         "activation_min": -128
         },
         {"name" : "avgpooling_4",
          "batch_size" : 3,
@@ -897,8 +897,8 @@
          "stride_w": 1,
          "stride_h": 3,
          "pad" : "SAME",
-	 "activation_max": 127,
-	 "activation_min": -128
+         "activation_max": 127,
+         "activation_min": -128
         },
         {"name" : "avgpooling_5",
          "batch_size" : 1,
@@ -911,8 +911,8 @@
          "stride_w": 1,
          "stride_h": 1,
          "pad" : "SAME",
-	 "activation_max": 6,
-	 "activation_min": 0
+         "activation_max": 6,
+         "activation_min": 0
         }
     ]
 },
@@ -934,8 +934,8 @@
          "stride_w": 2,
          "stride_h": 1,
          "pad" : "SAME",
-	 "activation_max": 32767,
-	 "activation_min": -32768
+         "activation_max": 32767,
+         "activation_min": -32768
         },
         {"name" : "avgpooling_int16_1",
          "batch_size" : 3,
@@ -948,8 +948,8 @@
          "stride_w": 2,
          "stride_h": 1,
          "pad" : "VALID",
-	 "activation_max": 32767,
-	 "activation_min": -32768
+         "activation_max": 32767,
+         "activation_min": -32768
         },
         {"name" : "avgpooling_int16_2",
          "batch_size" : 1,
@@ -962,8 +962,8 @@
          "stride_w": 2,
          "stride_h": 1,
          "pad" : "VALID",
-	 "activation_max": 32767,
-	 "activation_min": -32768
+         "activation_max": 32767,
+         "activation_min": -32768
         },
         {"name" : "avgpooling_int16_3",
          "batch_size" : 2,
@@ -976,8 +976,8 @@
          "stride_w": 9,
          "stride_h": 5,
          "pad" : "SAME",
-	 "activation_max": 32767,
-	 "activation_min": -32768
+         "activation_max": 32767,
+         "activation_min": -32768
         }
     ]
 },
@@ -999,8 +999,8 @@
          "stride_w": 9,
          "stride_h": 5,
          "pad" : "SAME",
-	 "activation_max": 127,
-	 "activation_min": -128
+         "activation_max": 127,
+         "activation_min": -128
         },
         {"name" : "maxpooling_1",
          "batch_size" : 1,
@@ -1013,8 +1013,8 @@
          "stride_w": 1,
          "stride_h": 2,
          "pad" : "VALID",
-	 "activation_max": 127,
-	 "activation_min": -128
+         "activation_max": 127,
+         "activation_min": -128
         },
         {"name" : "maxpooling_2",
          "batch_size" : 1,
@@ -1027,8 +1027,8 @@
          "stride_w": 1,
          "stride_h": 2,
          "pad" : "SAME",
-	 "activation_max": 127,
-	 "activation_min": -128
+         "activation_max": 127,
+         "activation_min": -128
         },
         {"name" : "maxpooling_3",
          "batch_size" : 1,
@@ -1041,8 +1041,8 @@
          "stride_w": 2,
          "stride_h": 1,
          "pad" : "VALID",
-	 "activation_max": 127,
-	 "activation_min": -128
+         "activation_max": 127,
+         "activation_min": -128
         },
         {"name" : "maxpooling_4",
          "batch_size" : 1,
@@ -1055,8 +1055,8 @@
          "stride_w": 1,
          "stride_h": 3,
          "pad" : "SAME",
-	 "activation_max": 127,
-	 "activation_min": -128
+         "activation_max": 127,
+         "activation_min": -128
         },
         {"name" : "maxpooling_5",
          "batch_size" : 1,
@@ -1069,10 +1069,10 @@
          "stride_w": 1,
          "stride_h": 1,
          "pad" : "SAME",
-	 "activation_max": 127,
-	 "activation_min": -128
+         "activation_max": 127,
+         "activation_min": -128
         },
-	{"name" : "maxpooling_6",
+        {"name" : "maxpooling_6",
          "batch_size" : 1,
          "input_n" : 1,
          "input_w" : 1,
@@ -1083,10 +1083,10 @@
          "stride_w": 1,
          "stride_h": 3,
          "pad" : "SAME",
-	 "activation_max": 127,
-	 "activation_min": -128
+         "activation_max": 127,
+         "activation_min": -128
         },
-	{"name" : "maxpooling_7",
+        {"name" : "maxpooling_7",
          "batch_size" : 1,
          "input_n" : 1,
          "input_w" : 4,
@@ -1097,8 +1097,8 @@
          "stride_w": 2,
          "stride_h": 2,
          "pad" : "VALID",
-	 "activation_max": 6,
-	 "activation_min": 0
+         "activation_max": 6,
+         "activation_min": 0
         }
     ]
 },
@@ -1120,8 +1120,8 @@
          "stride_w": 2,
          "stride_h": 2,
          "pad" : "VALID",
-	 "activation_max": 32767,
-	 "activation_min": -32768
+         "activation_max": 32767,
+         "activation_min": -32768
         },
         {"name" : "maxpool_int16_1",
          "batch_size" : 2,
@@ -1134,8 +1134,8 @@
          "stride_w": 2,
          "stride_h": 1,
          "pad" : "SAME",
-	 "activation_max": 30000,
-	 "activation_min": -30000
+         "activation_max": 30000,
+         "activation_min": -30000
         },
         {"name" : "maxpool_int16_2",
          "batch_size" : 1,
@@ -1148,8 +1148,45 @@
          "stride_w": 1,
          "stride_h": 1,
          "pad" : "VALID",
-	 "activation_max": 30000,
-	 "activation_min": -30000
+         "activation_max": 30000,
+         "activation_min": -30000
+        }
+    ]
+},
+{
+    "suite_name" : "test_arm_pad_s8",
+    "op_type" : "pad",
+    "input_data_type": "int8_t",
+    "interpreter": "tensorflow",
+    "tflite_generator": "keras",
+    "tests" : [
+        {"name" : "pad_int8_1",
+         "input_n" : 1,
+         "input_w" : 2,
+         "input_h" : 2,
+         "input_c" : 2,
+         "pre_pad_n": 0,
+         "pre_pad_h": 0,
+         "pre_pad_w": 1,
+         "pre_pad_c": 1,
+         "post_pad_n": 0,
+         "post_pad_h": 0,
+         "post_pad_w": 2,
+         "post_pad_c": 2
+        },
+        {"name" : "pad_int8_2",
+         "input_n" : 1,
+         "input_w" : 2,
+         "input_h" : 2,
+         "input_c" : 2,
+         "pre_pad_n": 0,
+         "pre_pad_h": 2,
+         "pre_pad_w": 2,
+         "pre_pad_c": 0,
+         "post_pad_n": 0,
+         "post_pad_h": 1,
+         "post_pad_w": 1,
+         "post_pad_c": 0
         }
     ]
 },
diff --git a/Tests/UnitTest/TestCases/TestData/pad_int8_1/config_data.h b/Tests/UnitTest/TestCases/TestData/pad_int8_1/config_data.h
new file mode 100644
index 00000000..07e73115
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/pad_int8_1/config_data.h
@@ -0,0 +1,17 @@
+// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.5.0).
+// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d.
+#pragma once
+#define PAD_INT8_1_INPUT_N 1
+#define PAD_INT8_1_INPUT_W 2
+#define PAD_INT8_1_INPUT_H 2
+#define PAD_INT8_1_INPUT_C 2
+#define PAD_INT8_1_PRE_PAD_N 0
+#define PAD_INT8_1_PRE_PAD_H 0
+#define PAD_INT8_1_PRE_PAD_W 1
+#define PAD_INT8_1_PRE_PAD_C 1
+#define PAD_INT8_1_POST_PAD_N 0
+#define PAD_INT8_1_POST_PAD_H 0
+#define PAD_INT8_1_POST_PAD_W 2
+#define PAD_INT8_1_POST_PAD_C 2
+#define PAD_INT8_1_PAD_VALUE -128
+#define PAD_INT8_1_OUTPUT_SIZE 50
diff --git a/Tests/UnitTest/TestCases/TestData/pad_int8_1/input_tensor.h b/Tests/UnitTest/TestCases/TestData/pad_int8_1/input_tensor.h
new file mode 100644
index 00000000..502aef2a
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/pad_int8_1/input_tensor.h
@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.5.0).
+// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d.
+#pragma once
+#include <stdint.h>
+
+const int8_t pad_int8_1_input_tensor[8] = {-102, 10, 30, 95, 76, 9, 79, -121};
diff --git a/Tests/UnitTest/TestCases/TestData/pad_int8_1/output.h b/Tests/UnitTest/TestCases/TestData/pad_int8_1/output.h
new file mode 100644
index 00000000..89558d03
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/pad_int8_1/output.h
@@ -0,0 +1,9 @@
+// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.5.0).
+// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d.
+#pragma once
+#include <stdint.h>
+
+const int8_t pad_int8_1_output[50] = {-128, -128, -128, -128, -128, -128, -102, 10,   -128, -128, -128, 30,   95,
+                                      -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
+                                      -128, -128, -128, -128, -128, 76,   9,    -128, -128, -128, 79,   -121, -128,
+                                      -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128};
diff --git a/Tests/UnitTest/TestCases/TestData/pad_int8_1/test_data.h b/Tests/UnitTest/TestCases/TestData/pad_int8_1/test_data.h
new file mode 100644
index 00000000..0e46bdee
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/pad_int8_1/test_data.h
@@ -0,0 +1,3 @@
+#include "config_data.h"
+#include "input_tensor.h"
+#include "output.h"
diff --git a/Tests/UnitTest/TestCases/TestData/pad_int8_2/config_data.h b/Tests/UnitTest/TestCases/TestData/pad_int8_2/config_data.h
new file mode 100644
index 00000000..c7303ef8
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/pad_int8_2/config_data.h
@@ -0,0 +1,17 @@
+// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.5.0).
+// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d.
+#pragma once
+#define PAD_INT8_2_INPUT_N 1
+#define PAD_INT8_2_INPUT_W 2
+#define PAD_INT8_2_INPUT_H 2
+#define PAD_INT8_2_INPUT_C 2
+#define PAD_INT8_2_PRE_PAD_N 0
+#define PAD_INT8_2_PRE_PAD_H 2
+#define PAD_INT8_2_PRE_PAD_W 2
+#define PAD_INT8_2_PRE_PAD_C 0
+#define PAD_INT8_2_POST_PAD_N 0
+#define PAD_INT8_2_POST_PAD_H 1
+#define PAD_INT8_2_POST_PAD_W 1
+#define PAD_INT8_2_POST_PAD_C 0
+#define PAD_INT8_2_PAD_VALUE -128
+#define PAD_INT8_2_OUTPUT_SIZE 50
diff --git a/Tests/UnitTest/TestCases/TestData/pad_int8_2/input_tensor.h b/Tests/UnitTest/TestCases/TestData/pad_int8_2/input_tensor.h
new file mode 100644
index 00000000..021612bc
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/pad_int8_2/input_tensor.h
@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.5.0).
+// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d.
+#pragma once
+#include <stdint.h>
+
+const int8_t pad_int8_2_input_tensor[8] = {77, 94, -41, 47, 21, 61, -98, -2};
diff --git a/Tests/UnitTest/TestCases/TestData/pad_int8_2/output.h b/Tests/UnitTest/TestCases/TestData/pad_int8_2/output.h
new file mode 100644
index 00000000..17f6635e
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/pad_int8_2/output.h
@@ -0,0 +1,9 @@
+// Generated by generate_test_data.py using tensorflow version 2.17.0 (Keras version 3.5.0).
+// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d.
+#pragma once
+#include <stdint.h>
+
+const int8_t pad_int8_2_output[50] = {-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
+                                      -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 77,   94,
+                                      -41,  47,   -128, -128, -128, -128, -128, -128, 21,   61,   -98,  -2,   -128,
+                                      -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128};
diff --git a/Tests/UnitTest/TestCases/TestData/pad_int8_2/test_data.h b/Tests/UnitTest/TestCases/TestData/pad_int8_2/test_data.h
new file mode 100644
index 00000000..0e46bdee
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/pad_int8_2/test_data.h
@@ -0,0 +1,3 @@
+#include "config_data.h"
+#include "input_tensor.h"
+#include "output.h"
diff --git a/Tests/UnitTest/TestCases/test_arm_pad_s8/CMakeLists.txt b/Tests/UnitTest/TestCases/test_arm_pad_s8/CMakeLists.txt
new file mode 100644
index 00000000..b9c04b9c
--- /dev/null
+++ b/Tests/UnitTest/TestCases/test_arm_pad_s8/CMakeLists.txt
@@ -0,0 +1,23 @@
+#
+# SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_cmsis_nn_unit_test_executable(test_arm_pad_s8)
+
+target_sources(test_arm_pad_s8 PRIVATE
+    Unity/unity_test_arm_pad_s8.c
+    Unity/TestRunner/unity_test_arm_pad_s8_runner.c)
diff --git a/Tests/UnitTest/TestCases/test_arm_pad_s8/Unity/unity_test_arm_pad_s8.c b/Tests/UnitTest/TestCases/test_arm_pad_s8/Unity/unity_test_arm_pad_s8.c
new file mode 100644
index 00000000..8dcea1ea
--- /dev/null
+++ b/Tests/UnitTest/TestCases/test_arm_pad_s8/Unity/unity_test_arm_pad_s8.c
@@ -0,0 +1,49 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../test_arm_pad_s8.c"
+#include "unity.h"
+
+#ifdef USING_FVP_CORSTONE_300
+extern void uart_init(void);
+#endif
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void setUp(void)
+{ /* This is run before EACH TEST */
+#ifdef USING_FVP_CORSTONE_300
+    uart_init();
+#endif
+}
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void tearDown(void) {}
+
+void test_pad_int8_1_arm_pad_s8(void) { pad_int8_1_arm_pad_s8(); }
+
+void test_pad_int8_2_arm_pad_s8(void) { pad_int8_2_arm_pad_s8(); }
diff --git a/Tests/UnitTest/TestCases/test_arm_pad_s8/test_arm_pad_s8.c b/Tests/UnitTest/TestCases/test_arm_pad_s8/test_arm_pad_s8.c
new file mode 100644
index 00000000..9114d806
--- /dev/null
+++ b/Tests/UnitTest/TestCases/test_arm_pad_s8/test_arm_pad_s8.c
@@ -0,0 +1,58 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../TestData/pad_int8_1/test_data.h"
+#include "../TestData/pad_int8_2/test_data.h"
+#include "../Utils/validate.h"
+#include "arm_nn_types.h"
+#include "arm_nnfunctions.h"
+#include "unity.h"
+
+void pad_int8_1_arm_pad_s8(void)
+{
+    const int8_t *input_ptr = pad_int8_1_input_tensor;
+    int8_t output_ptr[PAD_INT8_1_OUTPUT_SIZE] = {0};
+
+    const cmsis_nn_dims input_size = {PAD_INT8_1_INPUT_N, PAD_INT8_1_INPUT_H, PAD_INT8_1_INPUT_W, PAD_INT8_1_INPUT_C};
+    const cmsis_nn_dims pre_pad = {
+        PAD_INT8_1_PRE_PAD_N, PAD_INT8_1_PRE_PAD_H, PAD_INT8_1_PRE_PAD_W, PAD_INT8_1_PRE_PAD_C};
+    const cmsis_nn_dims post_pad = {
+        PAD_INT8_1_POST_PAD_N, PAD_INT8_1_POST_PAD_H, PAD_INT8_1_POST_PAD_W, PAD_INT8_1_POST_PAD_C};
+
+    const arm_cmsis_nn_status result =
+        arm_pad_s8(input_ptr, output_ptr, PAD_INT8_1_PAD_VALUE, &input_size, &pre_pad, &post_pad);
+    TEST_ASSERT_EQUAL(ARM_CMSIS_NN_SUCCESS, result);
+    TEST_ASSERT_TRUE(validate(output_ptr, pad_int8_1_output, PAD_INT8_1_OUTPUT_SIZE));
+}
+
+void pad_int8_2_arm_pad_s8(void)
+{
+    const int8_t *input_ptr = pad_int8_2_input_tensor;
+    int8_t output_ptr[PAD_INT8_2_OUTPUT_SIZE] = {0};
+
+    const cmsis_nn_dims input_size = {PAD_INT8_2_INPUT_N, PAD_INT8_2_INPUT_H, PAD_INT8_2_INPUT_W, PAD_INT8_2_INPUT_C};
+    const cmsis_nn_dims pre_pad = {
+        PAD_INT8_2_PRE_PAD_N, PAD_INT8_2_PRE_PAD_H, PAD_INT8_2_PRE_PAD_W, PAD_INT8_2_PRE_PAD_C};
+    const cmsis_nn_dims post_pad = {
+        PAD_INT8_2_POST_PAD_N, PAD_INT8_2_POST_PAD_H, PAD_INT8_2_POST_PAD_W, PAD_INT8_2_POST_PAD_C};
+
+    const arm_cmsis_nn_status result =
+        arm_pad_s8(input_ptr, output_ptr, PAD_INT8_2_PAD_VALUE, &input_size, &pre_pad, &post_pad);
+    TEST_ASSERT_EQUAL(ARM_CMSIS_NN_SUCCESS, result);
+    TEST_ASSERT_TRUE(validate(output_ptr, pad_int8_2_output, PAD_INT8_2_OUTPUT_SIZE));
+}