diff --git a/sgemm.cpp b/sgemm.cpp index 799723b2ffe789..862dfa0d47ed88 100644 --- a/sgemm.cpp +++ b/sgemm.cpp @@ -213,6 +213,20 @@ inline U madd(T a, T b, U c) { return add(mul(a, b), c); } +// xcode needs a little help to produce fmla instructions +#if defined(__ARM_FEATURE_FMA) +template <> +inline float32x4_t madd(float32x4_t a, float32x4_t b, float32x4_t c) { + return vfmaq_f32(c, b, a); +} +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && !defined(_MSC_VER) +template <> +inline float16x8_t madd(float16x8_t a, float16x8_t b, float16x8_t c) { + return vfmaq_f16(c, b, a); +} +#endif +#endif + //////////////////////////////////////////////////////////////////////////////////////////////////// // FLOATING POINT MATRIX MULTIPLICATION