From 207e166d187d10c501455730ede8be622e0723ef Mon Sep 17 00:00:00 2001 From: nihuini Date: Sun, 29 Sep 2024 19:10:51 +0800 Subject: [PATCH] vfpv4 fp16 --- src/layer/arm/gemm_arm.cpp | 602 +- src/layer/arm/gemm_arm.h | 2 + src/layer/arm/gemm_arm_asimddp.cpp | 31 + src/layer/arm/gemm_arm_i8mm.cpp | 21 + src/layer/arm/gemm_arm_vfpv4.cpp | 51 + src/layer/arm/gemm_int8.h | 36 + src/layer/arm/gemm_int8_bf16s.h | 83 +- src/layer/arm/gemm_int8_fp16s.h | 12834 +++++++++++++++++++++++++++ 8 files changed, 13360 insertions(+), 300 deletions(-) create mode 100644 src/layer/arm/gemm_int8_fp16s.h diff --git a/src/layer/arm/gemm_arm.cpp b/src/layer/arm/gemm_arm.cpp index 27dbc93423a..cb25ee6a0a4 100644 --- a/src/layer/arm/gemm_arm.cpp +++ b/src/layer/arm/gemm_arm.cpp @@ -4250,7 +4250,7 @@ int Gemm_arm::create_pipeline(const Option& opt) if (int8_scale_term) { // support_packing = false; - support_fp16_storage = false; + // support_fp16_storage = false; // support_bf16_storage = false; return create_pipeline_int8(opt); // return 0; @@ -5301,6 +5301,208 @@ int Gemm_arm::forward_bf16s(const std::vector& bottom_blobs, std::vector& bottom_blobs, std::vector 4 ? 4 : out_elempack; + if (output_elempack) out_elempack = output_elempack; size_t out_elemsize = 4u * out_elempack; + // FIXME use output_elemtype instead of input_elemtype + int output_elemtype = input_elemtype; + + // TODO use output_elemtype if (opt.use_bf16_storage) { out_elemsize = 2u * out_elempack; } +#if NCNN_VFPV4 + else if (support_fp16_storage && opt.use_fp16_storage) + { + out_elemsize = 2u * out_elempack; + } +#endif Mat& top_blob = top_blobs[0]; if (output_transpose) @@ -6241,23 +6259,23 @@ int Gemm_arm::forward_int8(const std::vector& bottom_blobs, std::vector& bottom_blobs, std::vector