Skip to content

Commit

Permalink
Fix ARM patch
Browse files Browse the repository at this point in the history
Use updated patch from #17101
  • Loading branch information
Flamefire committed Jan 13, 2023
1 parent 5acbdd2 commit eadb4b6
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 27 deletions.
Original file line number Diff line number Diff line change
@@ -1,37 +1,42 @@
The comment is not true, the function actually takes the arguments as it should
Hence just redefine the function
Fix compile error on ARM:
> ./tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h:132:58: error: cannot convert 'int32x2_t' to 'int8x8_t'

Author: Alexander Grund (TU Dresden)
From https://github.com/tensorflow/tensorflow/pull/53782

From 4463f25d1622d162f870ff685da20f2c6df5bc6a Mon Sep 17 00:00:00 2001
From: Stephan Hartmann <stha09@googlemail.com>
Date: Sat, 15 Jan 2022 21:06:27 +0100
Subject: [PATCH] Fix casting in vdotq_four_lane_s32() in TFLite

When building with GCC and dotprod ARM extension enabled,
vreinterpret_s32_s8() casts int8x8_t to int32x2_t. However, third
argument of vdotq_lane_s32() expects parameter of type int8x8_t.
---
.../optimized/depthwiseconv_3x3_filter_common.h | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h
index 916edd561ff..9c8025dac49 100644
index 916edd561ff32..c519a81bc864d 100644
--- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h
+++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h
@@ -122,26 +122,7 @@ inline int32x4_t vpaddq_s32(int32x4_t a, int32x4_t b) {
#endif // !__aarch64__

#ifdef __ARM_FEATURE_DOTPROD
-// The vdotq_lane_s32 takes int8x8t for the rhs parameter, whereas the actual
-// instruction selects from between 4 32-bit (4x8-bit packed) sub-registers, an
-// unusual interpretation of "lane".
-inline int32x4_t vdotq_four_lane_s32(int32x4_t acc, int8x16_t lhs,
- int8x16_t rhs, const int lane) {
- switch (lane) {
- case 0:
@@ -129,16 +129,14 @@ inline int32x4_t vdotq_four_lane_s32(int32x4_t acc, int8x16_t lhs,
int8x16_t rhs, const int lane) {
switch (lane) {
case 0:
- return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_low_s8(rhs)), 0);
- case 1:
+ return vdotq_lane_s32(acc, lhs, vget_low_s8(rhs), 0);
case 1:
- return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_low_s8(rhs)), 1);
- case 2:
+ return vdotq_lane_s32(acc, lhs, vget_low_s8(rhs), 1);
case 2:
- return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_high_s8(rhs)),
- 0);
- case 3:
- default:
+ return vdotq_lane_s32(acc, lhs, vget_high_s8(rhs), 0);
case 3:
default:
- return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_high_s8(rhs)),
- 1);
- }
-}
-
+#define vdotq_four_lane_s32 vdotq_lane_s32
#else
+ return vdotq_lane_s32(acc, lhs, vget_high_s8(rhs), 1);
}
}

inline int32x4_t vdotq_s32(int32x4_t acc, int8x16_t lhs, int8x16_t rhs) {
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ exts_list = [
{'TensorFlow-2.5.0-fix-alias-violation-in-absl.patch':
'12454fda3330fb45cd380377e283f04488b40e0b8ae7378e786ddf731a581f75'},
{'TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch':
'6abfadc0f67ff3b510d70430843201cb46d7bd65db045ec9b482af70e0c8c0c8'},
'5edea55ce87d5adb14f6ed6996f308879e268b8cec760cf11288e3a56179a029'},
{'TensorFlow-2.5.0_fix-crash-on-shutdown.patch':
'578c7493221ebd3dc25ca43d63a72cbb28fdf4112b1e2baa7390f25781bd78fd'},
{'TensorFlow-2.7.1_fix_cpu_count.patch':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ exts_list = [
{'TensorFlow-2.5.0-fix-alias-violation-in-absl.patch':
'12454fda3330fb45cd380377e283f04488b40e0b8ae7378e786ddf731a581f75'},
{'TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch':
'6abfadc0f67ff3b510d70430843201cb46d7bd65db045ec9b482af70e0c8c0c8'},
'5edea55ce87d5adb14f6ed6996f308879e268b8cec760cf11288e3a56179a029'},
{'TensorFlow-2.5.0_fix-crash-on-shutdown.patch':
'578c7493221ebd3dc25ca43d63a72cbb28fdf4112b1e2baa7390f25781bd78fd'},
{'TensorFlow-2.7.1_fix_cpu_count.patch':
Expand Down

0 comments on commit eadb4b6

Please sign in to comment.