Skip to content

Commit

Permalink
Help xcode produce fmla instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
jart committed Apr 21, 2024
1 parent 9d4d14c commit d3d40bf
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions sgemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,20 @@ inline U madd(T a, T b, U c) {
return add(mul(a, b), c);
}

// xcode needs a little help to produce fmla instructions
#if defined(__ARM_FEATURE_FMA)
template <>
inline float32x4_t madd(float32x4_t a, float32x4_t b, float32x4_t c) {
return vfmaq_f32(c, b, a);
}
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && !defined(_MSC_VER)
template <>
inline float16x8_t madd(float16x8_t a, float16x8_t b, float16x8_t c) {
return vfmaq_f16(c, b, a);
}
#endif
#endif

////////////////////////////////////////////////////////////////////////////////////////////////////
// FLOATING POINT MATRIX MULTIPLICATION

Expand Down

0 comments on commit d3d40bf

Please sign in to comment.