From 6aa28af6a91ff49f0d0b66c7bb1c31377c3da759 Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 11 Jul 2024 16:58:50 +0800 Subject: [PATCH 1/2] fix potential fp16s bf16s conflicts on armv7 vfpv4 --- src/net.cpp | 4 ++-- tests/testutil.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/net.cpp b/src/net.cpp index 996337ba36a..c84107de23c 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -622,7 +622,7 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio // *INDENT-OFF* #if NCNN_VFPV4 - if (opt.use_fp16_storage && cpu_support_arm_vfpv4() && layer->support_fp16_storage) + if (opt.use_fp16_storage && !opt.use_bf16_storage && cpu_support_arm_vfpv4() && layer->support_fp16_storage) { Mat bottom_blob_fp16; cast_float32_to_float16(bottom_blob, bottom_blob_fp16, opt); @@ -741,7 +741,7 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio // *INDENT-OFF* #if NCNN_VFPV4 - if (opt.use_fp16_storage && cpu_support_arm_vfpv4() && !layer->support_fp16_storage) + if (opt.use_fp16_storage && !opt.use_bf16_storage && cpu_support_arm_vfpv4() && !layer->support_fp16_storage) { Mat bottom_blob_fp32; cast_float16_to_float32(bottom_blob, bottom_blob_fp32, opt); diff --git a/tests/testutil.cpp b/tests/testutil.cpp index 07d95547d44..893b85418e2 100644 --- a/tests/testutil.cpp +++ b/tests/testutil.cpp @@ -329,7 +329,7 @@ static int convert_to_optimal_layout(const ncnn::Mat& a, ncnn::Mat& a4, const nc // clang-format off // *INDENT-OFF* #if NCNN_VFPV4 - if (opt.use_fp16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) + if (opt.use_fp16_storage && !opt.use_bf16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) { ncnn::cast_float32_to_float16(a, a4, opt); } @@ -450,7 +450,7 @@ static int convert_to_vanilla_layout(const ncnn::Mat& c4, ncnn::Mat& c, const nc // clang-format off // *INDENT-OFF* #if NCNN_VFPV4 - if (opt.use_fp16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && c4_unpacked.elembits() == 16) + if (opt.use_fp16_storage && !opt.use_bf16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && c4_unpacked.elembits() == 16) { ncnn::cast_float16_to_float32(c4_unpacked, c, opt); } From 6bc83d623b83eba13e54ee1b48a20527c7aeab6f Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 11 Jul 2024 17:29:47 +0800 Subject: [PATCH 2/2] but prefer fp16 on armv8.2 --- src/net.cpp | 32 +++++++++++++++++++++++++++++++- tests/testutil.cpp | 14 ++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/src/net.cpp b/src/net.cpp index c84107de23c..3574944e726 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -621,6 +621,15 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio // clang-format off // *INDENT-OFF* +#if NCNN_ARM82 + if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && layer->support_fp16_storage) + { + Mat bottom_blob_fp16; + cast_float32_to_float16(bottom_blob, bottom_blob_fp16, opt); + bottom_blob = bottom_blob_fp16; + } + else +#endif // NCNN_ARM82 #if NCNN_VFPV4 if (opt.use_fp16_storage && !opt.use_bf16_storage && cpu_support_arm_vfpv4() && layer->support_fp16_storage) { @@ -740,6 +749,15 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio // clang-format off // *INDENT-OFF* +#if NCNN_ARM82 + if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && !layer->support_fp16_storage) + { + Mat bottom_blob_fp32; + cast_float16_to_float32(bottom_blob, bottom_blob_fp32, opt); + bottom_blob = bottom_blob_fp32; + } + else +#endif // NCNN_ARM82 #if NCNN_VFPV4 if (opt.use_fp16_storage && !opt.use_bf16_storage && cpu_support_arm_vfpv4() && !layer->support_fp16_storage) { @@ -2719,8 +2737,20 @@ int Extractor::extract(int blob_index, Mat& feat, int type) // clang-format off // *INDENT-OFF* +#if NCNN_ARM82 + if (d->opt.use_fp16_storage && cpu_support_arm_asimdhp() && (type == 0)) + { + if (feat.elembits() == 16) + { + Mat feat_fp32; + cast_float16_to_float32(feat, feat_fp32, d->opt); + feat = feat_fp32; + } + } + else +#endif // NCNN_ARM82 #if NCNN_VFPV4 - if (d->opt.use_fp16_storage && cpu_support_arm_vfpv4() && (type == 0)) + if (d->opt.use_fp16_storage && !d->opt.use_bf16_storage && cpu_support_arm_vfpv4() && (type == 0)) { if (feat.elembits() == 16) { diff --git a/tests/testutil.cpp b/tests/testutil.cpp index 893b85418e2..837043cb754 100644 --- a/tests/testutil.cpp +++ b/tests/testutil.cpp @@ -328,6 +328,13 @@ static int convert_to_optimal_layout(const ncnn::Mat& a, ncnn::Mat& a4, const nc { // clang-format off // *INDENT-OFF* +#if NCNN_ARM82 + if (opt.use_fp16_storage && ncnn::cpu_support_arm_asimdhp() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) + { + ncnn::cast_float32_to_float16(a, a4, opt); + } + else +#endif // NCNN_ARM82 #if NCNN_VFPV4 if (opt.use_fp16_storage && !opt.use_bf16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) { @@ -449,6 +456,13 @@ static int convert_to_vanilla_layout(const ncnn::Mat& c4, ncnn::Mat& c, const nc // clang-format off // *INDENT-OFF* +#if NCNN_ARM82 + if (opt.use_fp16_storage && ncnn::cpu_support_arm_asimdhp() && op->support_fp16_storage && c4_unpacked.elembits() == 16) + { + ncnn::cast_float16_to_float32(c4_unpacked, c, opt); + } + else +#endif // NCNN_ARM82 #if NCNN_VFPV4 if (opt.use_fp16_storage && !opt.use_bf16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && c4_unpacked.elembits() == 16) {