Fix ARM patch

Use updated patch from #17101
easybuilders · Jan 13, 2023 · eadb4b6 · eadb4b6
1 parent 5acbdd2
commit eadb4b6
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 27 deletions.
diff --git a/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch b/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch
@@ -1,37 +1,42 @@
-The comment is not true, the function actually takes the arguments as it should
-Hence just redefine the function
+Fix compile error on ARM:
+> ./tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h:132:58: error: cannot convert 'int32x2_t' to 'int8x8_t'
 
-Author: Alexander Grund (TU Dresden)
+From https://github.com/tensorflow/tensorflow/pull/53782
+
+From 4463f25d1622d162f870ff685da20f2c6df5bc6a Mon Sep 17 00:00:00 2001
+From: Stephan Hartmann <stha09@googlemail.com>
+Date: Sat, 15 Jan 2022 21:06:27 +0100
+Subject: [PATCH] Fix casting in vdotq_four_lane_s32() in TFLite
+
+When building with GCC and dotprod ARM extension enabled,
+vreinterpret_s32_s8() casts int8x8_t to int32x2_t. However, third
+argument of vdotq_lane_s32() expects parameter of type int8x8_t.
+---
+ .../optimized/depthwiseconv_3x3_filter_common.h        | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
 
 diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h
-index 916edd561ff..9c8025dac49 100644
+index 916edd561ff32..c519a81bc864d 100644
 --- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h
 +++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h
-@@ -122,26 +122,7 @@ inline int32x4_t vpaddq_s32(int32x4_t a, int32x4_t b) {
- #endif  // !__aarch64__
-
- #ifdef __ARM_FEATURE_DOTPROD
--// The vdotq_lane_s32 takes int8x8t for the rhs parameter, whereas the actual
--// instruction selects from between 4 32-bit (4x8-bit packed) sub-registers, an
--// unusual interpretation of "lane".
--inline int32x4_t vdotq_four_lane_s32(int32x4_t acc, int8x16_t lhs,
--                                     int8x16_t rhs, const int lane) {
--  switch (lane) {
--    case 0:
+@@ -129,16 +129,14 @@ inline int32x4_t vdotq_four_lane_s32(int32x4_t acc, int8x16_t lhs,
+                                      int8x16_t rhs, const int lane) {
+   switch (lane) {
+     case 0:
 -      return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_low_s8(rhs)), 0);
--    case 1:
++      return vdotq_lane_s32(acc, lhs, vget_low_s8(rhs), 0);
+     case 1:
 -      return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_low_s8(rhs)), 1);
--    case 2:
++      return vdotq_lane_s32(acc, lhs, vget_low_s8(rhs), 1);
+     case 2:
 -      return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_high_s8(rhs)),
 -                            0);
--    case 3:
--    default:
++      return vdotq_lane_s32(acc, lhs, vget_high_s8(rhs), 0);
+     case 3:
+     default:
 -      return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_high_s8(rhs)),
 -                            1);
--  }
--}
--
-+#define vdotq_four_lane_s32 vdotq_lane_s32
- #else
++      return vdotq_lane_s32(acc, lhs, vget_high_s8(rhs), 1);
+   }
+ }
 
- inline int32x4_t vdotq_s32(int32x4_t acc, int8x16_t lhs, int8x16_t rhs) {
diff --git a/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.8.4-foss-2021b-CUDA-11.4.1.eb b/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.8.4-foss-2021b-CUDA-11.4.1.eb
@@ -178,7 +178,7 @@ exts_list = [
             {'TensorFlow-2.5.0-fix-alias-violation-in-absl.patch':
              '12454fda3330fb45cd380377e283f04488b40e0b8ae7378e786ddf731a581f75'},
             {'TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch':
-             '6abfadc0f67ff3b510d70430843201cb46d7bd65db045ec9b482af70e0c8c0c8'},
+             '5edea55ce87d5adb14f6ed6996f308879e268b8cec760cf11288e3a56179a029'},
             {'TensorFlow-2.5.0_fix-crash-on-shutdown.patch':
              '578c7493221ebd3dc25ca43d63a72cbb28fdf4112b1e2baa7390f25781bd78fd'},
             {'TensorFlow-2.7.1_fix_cpu_count.patch':

diff --git a/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.8.4-foss-2021b.eb b/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.8.4-foss-2021b.eb
@@ -174,7 +174,7 @@ exts_list = [
             {'TensorFlow-2.5.0-fix-alias-violation-in-absl.patch':
              '12454fda3330fb45cd380377e283f04488b40e0b8ae7378e786ddf731a581f75'},
             {'TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch':
-             '6abfadc0f67ff3b510d70430843201cb46d7bd65db045ec9b482af70e0c8c0c8'},
+             '5edea55ce87d5adb14f6ed6996f308879e268b8cec760cf11288e3a56179a029'},
             {'TensorFlow-2.5.0_fix-crash-on-shutdown.patch':
              '578c7493221ebd3dc25ca43d63a72cbb28fdf4112b1e2baa7390f25781bd78fd'},
             {'TensorFlow-2.7.1_fix_cpu_count.patch':