Skip to content

Commit

Permalink
fix neon off build
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Sep 3, 2024
1 parent 464da66 commit f269b20
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/layer/arm/rmsnorm_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,15 @@ static void rmsnorm(float* ptr, const float* gamma_ptr, float eps, int elemcount
float a;
if (elempack == 1)
{
#if __ARM_NEON
#if __aarch64__
sqsum += vaddvq_f32(_sqsum);
#else
float32x2_t _s2 = vadd_f32(vget_low_f32(_sqsum), vget_high_f32(_sqsum));
_s2 = vpadd_f32(_s2, _s2);
sqsum += vget_lane_f32(_s2, 0);
#endif
#endif // __ARM_NEON

a = 1.f / sqrtf(sqsum / elemcount + eps);
#if __ARM_NEON
Expand Down Expand Up @@ -286,13 +288,15 @@ static void rmsnorm_bf16s(unsigned short* ptr, const float* gamma_ptr, float eps
float a;
if (elempack == 1)
{
#if __ARM_NEON
#if __aarch64__
sqsum += vaddvq_f32(_sqsum);
#else
float32x2_t _s2 = vadd_f32(vget_low_f32(_sqsum), vget_high_f32(_sqsum));
_s2 = vpadd_f32(_s2, _s2);
sqsum += vget_lane_f32(_s2, 0);
#endif
#endif // __ARM_NEON

a = 1.f / sqrtf(sqsum / elemcount + eps);
#if __ARM_NEON
Expand Down

0 comments on commit f269b20

Please sign in to comment.