diff --git a/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch b/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch index 5d80e47df8f..eaed0decf23 100644 --- a/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch +++ b/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch @@ -1,37 +1,42 @@ -The comment is not true, the function actually takes the arguments as it should -Hence just redefine the function +Fix compile error on ARM: +> ./tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h:132:58: error: cannot convert 'int32x2_t' to 'int8x8_t' -Author: Alexander Grund (TU Dresden) +From https://github.com/tensorflow/tensorflow/pull/53782 + +From 4463f25d1622d162f870ff685da20f2c6df5bc6a Mon Sep 17 00:00:00 2001 +From: Stephan Hartmann +Date: Sat, 15 Jan 2022 21:06:27 +0100 +Subject: [PATCH] Fix casting in vdotq_four_lane_s32() in TFLite + +When building with GCC and dotprod ARM extension enabled, +vreinterpret_s32_s8() casts int8x8_t to int32x2_t. However, third +argument of vdotq_lane_s32() expects parameter of type int8x8_t. +--- + .../optimized/depthwiseconv_3x3_filter_common.h | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h -index 916edd561ff..9c8025dac49 100644 +index 916edd561ff32..c519a81bc864d 100644 --- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h +++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h -@@ -122,26 +122,7 @@ inline int32x4_t vpaddq_s32(int32x4_t a, int32x4_t b) { - #endif // !__aarch64__ - - #ifdef __ARM_FEATURE_DOTPROD --// The vdotq_lane_s32 takes int8x8t for the rhs parameter, whereas the actual --// instruction selects from between 4 32-bit (4x8-bit packed) sub-registers, an --// unusual interpretation of "lane". --inline int32x4_t vdotq_four_lane_s32(int32x4_t acc, int8x16_t lhs, -- int8x16_t rhs, const int lane) { -- switch (lane) { -- case 0: +@@ -129,16 +129,14 @@ inline int32x4_t vdotq_four_lane_s32(int32x4_t acc, int8x16_t lhs, + int8x16_t rhs, const int lane) { + switch (lane) { + case 0: - return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_low_s8(rhs)), 0); -- case 1: ++ return vdotq_lane_s32(acc, lhs, vget_low_s8(rhs), 0); + case 1: - return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_low_s8(rhs)), 1); -- case 2: ++ return vdotq_lane_s32(acc, lhs, vget_low_s8(rhs), 1); + case 2: - return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_high_s8(rhs)), - 0); -- case 3: -- default: ++ return vdotq_lane_s32(acc, lhs, vget_high_s8(rhs), 0); + case 3: + default: - return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_high_s8(rhs)), - 1); -- } --} -- -+#define vdotq_four_lane_s32 vdotq_lane_s32 - #else ++ return vdotq_lane_s32(acc, lhs, vget_high_s8(rhs), 1); + } + } - inline int32x4_t vdotq_s32(int32x4_t acc, int8x16_t lhs, int8x16_t rhs) { diff --git a/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.8.4-foss-2021b-CUDA-11.4.1.eb b/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.8.4-foss-2021b-CUDA-11.4.1.eb index 2b038a9c0b5..7af33a9479b 100644 --- a/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.8.4-foss-2021b-CUDA-11.4.1.eb +++ b/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.8.4-foss-2021b-CUDA-11.4.1.eb @@ -178,7 +178,7 @@ exts_list = [ {'TensorFlow-2.5.0-fix-alias-violation-in-absl.patch': '12454fda3330fb45cd380377e283f04488b40e0b8ae7378e786ddf731a581f75'}, {'TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch': - '6abfadc0f67ff3b510d70430843201cb46d7bd65db045ec9b482af70e0c8c0c8'}, + '5edea55ce87d5adb14f6ed6996f308879e268b8cec760cf11288e3a56179a029'}, {'TensorFlow-2.5.0_fix-crash-on-shutdown.patch': '578c7493221ebd3dc25ca43d63a72cbb28fdf4112b1e2baa7390f25781bd78fd'}, {'TensorFlow-2.7.1_fix_cpu_count.patch': diff --git a/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.8.4-foss-2021b.eb b/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.8.4-foss-2021b.eb index 040743f82e7..323be5f47ac 100644 --- a/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.8.4-foss-2021b.eb +++ b/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.8.4-foss-2021b.eb @@ -174,7 +174,7 @@ exts_list = [ {'TensorFlow-2.5.0-fix-alias-violation-in-absl.patch': '12454fda3330fb45cd380377e283f04488b40e0b8ae7378e786ddf731a581f75'}, {'TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch': - '6abfadc0f67ff3b510d70430843201cb46d7bd65db045ec9b482af70e0c8c0c8'}, + '5edea55ce87d5adb14f6ed6996f308879e268b8cec760cf11288e3a56179a029'}, {'TensorFlow-2.5.0_fix-crash-on-shutdown.patch': '578c7493221ebd3dc25ca43d63a72cbb28fdf4112b1e2baa7390f25781bd78fd'}, {'TensorFlow-2.7.1_fix_cpu_count.patch':