From d3d40bfd1e20d7c77029081c56188c3381a1a1b4 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Sun, 21 Apr 2024 12:22:39 -0700 Subject: [PATCH] Help xcode produce fmla instructions --- sgemm.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sgemm.cpp b/sgemm.cpp index 799723b2ffe789..862dfa0d47ed88 100644 --- a/sgemm.cpp +++ b/sgemm.cpp @@ -213,6 +213,20 @@ inline U madd(T a, T b, U c) { return add(mul(a, b), c); } +// xcode needs a little help to produce fmla instructions +#if defined(__ARM_FEATURE_FMA) +template <> +inline float32x4_t madd(float32x4_t a, float32x4_t b, float32x4_t c) { + return vfmaq_f32(c, b, a); +} +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && !defined(_MSC_VER) +template <> +inline float16x8_t madd(float16x8_t a, float16x8_t b, float16x8_t c) { + return vfmaq_f16(c, b, a); +} +#endif +#endif + //////////////////////////////////////////////////////////////////////////////////////////////////// // FLOATING POINT MATRIX MULTIPLICATION