diff --git a/docs/developer-guide/operators.md b/docs/developer-guide/operators.md index 11d41295f91..c0255375b38 100644 --- a/docs/developer-guide/operators.md +++ b/docs/developer-guide/operators.md @@ -529,6 +529,7 @@ y = activation(x3, act_type, act_params) | 19 | output_pad_bottom| int | output_pad_right | | | 20 | output_w | int | 0 | | | 21 | output_h | int | output_w | | +| 28 | dynamic_weight| int | 0 | | | weight | type | shape | | ------------- | ----- | --------------------- | @@ -558,6 +559,7 @@ y = activation(x3, act_type, act_params) | 15 | pad_right | int | pad_left | | | 18 | output_pad_right| int | 0 | | | 20 | output_w | int | 0 | | +| 28 | dynamic_weight| int | 0 | | | weight | type | shape | | ------------- | ----- | --------------------- | @@ -638,6 +640,7 @@ y = activation(x3, act_type, act_params) | 19 | output_pad_bottom| int | output_pad_right | | | 20 | output_w | int | 0 | | | 21 | output_h | int | output_w | | +| 28 | dynamic_weight| int | 0 | | | weight | type | shape | | ------------- | ----- | --------------------- | @@ -668,6 +671,7 @@ y = activation(x3, act_type, act_params) | 15 | pad_right | int | pad_left | | | 18 | output_pad_right| int | 0 | | | 20 | output_w | int | 0 | | +| 28 | dynamic_weight| int | 0 | | | weight | type | shape | | ------------- | ----- | --------------------- | diff --git a/src/layer/arm/convolution_arm.cpp b/src/layer/arm/convolution_arm.cpp index 849a8daea6b..4e424c86c91 100644 --- a/src/layer/arm/convolution_arm.cpp +++ b/src/layer/arm/convolution_arm.cpp @@ -806,9 +806,9 @@ int Convolution_arm::forward(const std::vector& bottom_blobs, std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const +{ + const Mat& bottom_blob = bottom_blobs[0]; + const Mat& _weight_data = bottom_blobs[1]; + Mat& top_blob = top_blobs[0]; + + const int _num_input = bottom_blob.c * bottom_blob.elempack; + const int _kernel_w = _weight_data.w; + const int _kernel_h = _weight_data.h; + const int _num_output = _weight_data.d * 1; + + Mat weight_data_flattened; + flatten(_weight_data, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + +#if NCNN_ARM82 + if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && weight_data_flattened.elembits() == 16) + { + Mat weight_data_flattened_fp32; + cast_float16_to_float32(weight_data_flattened, weight_data_flattened_fp32, opt); + weight_data_flattened = weight_data_flattened_fp32; + } +#endif // NCNN_ARM82 +#if NCNN_BF16 + if (opt.use_bf16_storage && weight_data_flattened.elembits() == 16) + { + Mat weight_data_flattened_fp32; + cast_bfloat16_to_float32(weight_data_flattened, weight_data_flattened_fp32, opt); + weight_data_flattened = weight_data_flattened_fp32; + } +#endif // NCNN_BF16 + + // weight_data_flattened as pack1 + weight_data_flattened.w *= weight_data_flattened.elempack; + weight_data_flattened.elemsize /= weight_data_flattened.elempack; + weight_data_flattened.elempack = 1; + + // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw + Mat weight_data_transposed; + { + weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / 1, 4u, opt.workspace_allocator); + if (weight_data_transposed.empty()) + return -100; + + const int outch_g = _num_output / 1; + const int inch_g = _num_input / 1; + const int maxk = _kernel_h * _kernel_w; + + for (int g = 0; g < 1; g++) + { + // reorder weight from inch-outch to outch-inch + float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk; + const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk; + for (int i = 0; i < outch_g; i++) + { + for (int j = 0; j < inch_g; j++) + { + for (int k = 0; k < maxk; k++) + { + wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k]; + } + } + } + } + } + + Mat bias_data_flattened; + if (bias_term) + { + const Mat& _bias_data = bottom_blobs[2]; + flatten(_bias_data, bias_data_flattened, opt); + if (bias_data_flattened.empty()) + return -100; + +#if NCNN_ARM82 + if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && bias_data_flattened.elembits() == 16) + { + Mat bias_data_flattened_fp32; + cast_float16_to_float32(bias_data_flattened, bias_data_flattened_fp32, opt); + bias_data_flattened = bias_data_flattened_fp32; + } +#endif // NCNN_ARM82 +#if NCNN_BF16 + if (opt.use_bf16_storage && bias_data_flattened.elembits() == 16) + { + Mat bias_data_flattened_fp32; + cast_bfloat16_to_float32(bias_data_flattened, bias_data_flattened_fp32, opt); + bias_data_flattened = bias_data_flattened_fp32; + } +#endif // NCNN_BF16 + + // bias_data_flattened as pack1 + bias_data_flattened.w *= bias_data_flattened.elempack; + bias_data_flattened.elemsize /= bias_data_flattened.elempack; + bias_data_flattened.elempack = 1; + } + + ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Deconvolution); + + ncnn::ParamDict pd; + pd.set(0, _num_output); + pd.set(1, _kernel_w); + pd.set(11, _kernel_h); + pd.set(2, dilation_w); + pd.set(12, dilation_h); + pd.set(3, stride_w); + pd.set(13, stride_h); + pd.set(4, pad_left); + pd.set(15, pad_right); + pd.set(14, pad_top); + pd.set(16, pad_bottom); + pd.set(18, output_pad_right); + pd.set(19, output_pad_bottom); + pd.set(20, output_w); + pd.set(21, output_h); + pd.set(5, bias_term); + pd.set(6, weight_data_transposed.w); + pd.set(9, activation_type); + pd.set(10, activation_params); + + op->load_param(pd); + + ncnn::Mat weights[2]; + weights[0] = weight_data_transposed; + weights[1] = bias_data_flattened; + + op->load_model(ncnn::ModelBinFromMatArray(weights)); + + op->create_pipeline(opt); + + op->forward(bottom_blob, top_blob, opt); + + op->destroy_pipeline(opt); + + delete op; + + return 0; +} + #if NCNN_BF16 int Deconvolution_arm::create_pipeline_bf16s(const Option& opt) { @@ -1167,28 +1310,7 @@ int Deconvolution_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, cons kptr += maxk; } - if (activation_type == 1) - { - sum = std::max(sum, 0.f); - } - else if (activation_type == 2) - { - float slope = activation_params[0]; - sum = sum > 0.f ? sum : sum * slope; - } - else if (activation_type == 3) - { - float min = activation_params[0]; - float max = activation_params[1]; - if (sum < min) - sum = min; - if (sum > max) - sum = max; - } - else if (activation_type == 4) - { - sum = 1.f / (1.f + expf(-sum)); - } + sum = activation_ss(sum, activation_type, activation_params); outptr[j] = float32_to_bfloat16(sum); } diff --git a/src/layer/arm/deconvolution_arm.h b/src/layer/arm/deconvolution_arm.h index bbc1e6faa53..3c7979687cb 100644 --- a/src/layer/arm/deconvolution_arm.h +++ b/src/layer/arm/deconvolution_arm.h @@ -29,6 +29,8 @@ class Deconvolution_arm : virtual public Deconvolution virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + protected: #if NCNN_ARM82 int create_pipeline_fp16s(const Option& opt); diff --git a/src/layer/arm/deconvolutiondepthwise_arm.cpp b/src/layer/arm/deconvolutiondepthwise_arm.cpp index dd3b4f8bd0e..478bd1740dc 100644 --- a/src/layer/arm/deconvolutiondepthwise_arm.cpp +++ b/src/layer/arm/deconvolutiondepthwise_arm.cpp @@ -43,6 +43,9 @@ DeconvolutionDepthWise_arm::DeconvolutionDepthWise_arm() int DeconvolutionDepthWise_arm::create_pipeline(const Option& opt) { + if (dynamic_weight) + return 0; + #if NCNN_ARM82 if (support_fp16_storage && opt.use_fp16_storage) { @@ -386,28 +389,7 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c } } - if (activation_type == 1) - { - sum = std::max(sum, 0.f); - } - else if (activation_type == 2) - { - float slope = activation_params[0]; - sum = sum > 0.f ? sum : sum * slope; - } - else if (activation_type == 3) - { - float min = activation_params[0]; - float max = activation_params[1]; - if (sum < min) - sum = min; - if (sum > max) - sum = max; - } - else if (activation_type == 4) - { - sum = 1.f / (1.f + expf(-sum)); - } + sum = activation_ss(sum, activation_type, activation_params); outptr[j] = sum; } @@ -482,6 +464,147 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c return 0; } +int DeconvolutionDepthWise_arm::forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const +{ + const Mat& bottom_blob = bottom_blobs[0]; + const Mat& _weight_data = bottom_blobs[1]; + Mat& top_blob = top_blobs[0]; + + const int _num_input = bottom_blob.c * bottom_blob.elempack; + const int _kernel_w = _weight_data.w; + const int _kernel_h = _weight_data.h; + const int _num_output = _weight_data.d * group; + + Mat weight_data_flattened; + flatten(_weight_data, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + +#if NCNN_ARM82 + if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && weight_data_flattened.elembits() == 16) + { + Mat weight_data_flattened_fp32; + cast_float16_to_float32(weight_data_flattened, weight_data_flattened_fp32, opt); + weight_data_flattened = weight_data_flattened_fp32; + } +#endif // NCNN_ARM82 +#if NCNN_BF16 + if (opt.use_bf16_storage && weight_data_flattened.elembits() == 16) + { + Mat weight_data_flattened_fp32; + cast_bfloat16_to_float32(weight_data_flattened, weight_data_flattened_fp32, opt); + weight_data_flattened = weight_data_flattened_fp32; + } +#endif // NCNN_BF16 + + // weight_data_flattened as pack1 + weight_data_flattened.w *= weight_data_flattened.elempack; + weight_data_flattened.elemsize /= weight_data_flattened.elempack; + weight_data_flattened.elempack = 1; + + // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw + Mat weight_data_transposed; + { + weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / group, 4u, opt.workspace_allocator); + if (weight_data_transposed.empty()) + return -100; + + const int outch_g = _num_output / group; + const int inch_g = _num_input / group; + const int maxk = _kernel_h * _kernel_w; + + for (int g = 0; g < group; g++) + { + // reorder weight from inch-outch to outch-inch + float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk; + const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk; + for (int i = 0; i < outch_g; i++) + { + for (int j = 0; j < inch_g; j++) + { + for (int k = 0; k < maxk; k++) + { + wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k]; + } + } + } + } + } + + Mat bias_data_flattened; + if (bias_term) + { + const Mat& _bias_data = bottom_blobs[2]; + flatten(_bias_data, bias_data_flattened, opt); + if (bias_data_flattened.empty()) + return -100; + +#if NCNN_ARM82 + if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && bias_data_flattened.elembits() == 16) + { + Mat bias_data_flattened_fp32; + cast_float16_to_float32(bias_data_flattened, bias_data_flattened_fp32, opt); + bias_data_flattened = bias_data_flattened_fp32; + } +#endif // NCNN_ARM82 +#if NCNN_BF16 + if (opt.use_bf16_storage && bias_data_flattened.elembits() == 16) + { + Mat bias_data_flattened_fp32; + cast_bfloat16_to_float32(bias_data_flattened, bias_data_flattened_fp32, opt); + bias_data_flattened = bias_data_flattened_fp32; + } +#endif // NCNN_BF16 + + // bias_data_flattened as pack1 + bias_data_flattened.w *= bias_data_flattened.elempack; + bias_data_flattened.elemsize /= bias_data_flattened.elempack; + bias_data_flattened.elempack = 1; + } + + ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::DeconvolutionDepthWise); + + ncnn::ParamDict pd; + pd.set(0, _num_output); + pd.set(1, _kernel_w); + pd.set(11, _kernel_h); + pd.set(2, dilation_w); + pd.set(12, dilation_h); + pd.set(3, stride_w); + pd.set(13, stride_h); + pd.set(4, pad_left); + pd.set(15, pad_right); + pd.set(14, pad_top); + pd.set(16, pad_bottom); + pd.set(18, output_pad_right); + pd.set(19, output_pad_bottom); + pd.set(20, output_w); + pd.set(21, output_h); + pd.set(5, bias_term); + pd.set(6, weight_data_transposed.w); + pd.set(7, group); + pd.set(9, activation_type); + pd.set(10, activation_params); + + op->load_param(pd); + + ncnn::Mat weights[2]; + weights[0] = weight_data_transposed; + weights[1] = bias_data_flattened; + + op->load_model(ncnn::ModelBinFromMatArray(weights)); + + op->create_pipeline(opt); + + op->forward(bottom_blob, top_blob, opt); + + op->destroy_pipeline(opt); + + delete op; + + return 0; +} + #if NCNN_BF16 int DeconvolutionDepthWise_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const { @@ -639,28 +762,7 @@ int DeconvolutionDepthWise_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_b } } - if (activation_type == 1) - { - sum = std::max(sum, 0.f); - } - else if (activation_type == 2) - { - float slope = activation_params[0]; - sum = sum > 0.f ? sum : sum * slope; - } - else if (activation_type == 3) - { - float min = activation_params[0]; - float max = activation_params[1]; - if (sum < min) - sum = min; - if (sum > max) - sum = max; - } - else if (activation_type == 4) - { - sum = 1.f / (1.f + expf(-sum)); - } + sum = activation_ss(sum, activation_type, activation_params); outptr[j] = float32_to_bfloat16(sum); } diff --git a/src/layer/arm/deconvolutiondepthwise_arm.h b/src/layer/arm/deconvolutiondepthwise_arm.h index 2022d2a54cb..6eff45ede3a 100644 --- a/src/layer/arm/deconvolutiondepthwise_arm.h +++ b/src/layer/arm/deconvolutiondepthwise_arm.h @@ -29,6 +29,8 @@ class DeconvolutionDepthWise_arm : virtual public DeconvolutionDepthWise virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + protected: #if NCNN_ARM82 int create_pipeline_fp16s(const Option& opt); diff --git a/src/layer/deconvolution.cpp b/src/layer/deconvolution.cpp index 411395f961e..438e2bf7282 100644 --- a/src/layer/deconvolution.cpp +++ b/src/layer/deconvolution.cpp @@ -46,11 +46,21 @@ int Deconvolution::load_param(const ParamDict& pd) activation_type = pd.get(9, 0); activation_params = pd.get(10, Mat()); + dynamic_weight = pd.get(28, 0); + + if (dynamic_weight) + { + one_blob_only = false; + } + return 0; } int Deconvolution::load_model(const ModelBin& mb) { + if (dynamic_weight) + return 0; + weight_data = mb.load(weight_data_size, 0); if (weight_data.empty()) return -100; @@ -180,6 +190,93 @@ int Deconvolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& return 0; } +int Deconvolution::forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const +{ + const Mat& bottom_blob = bottom_blobs[0]; + const Mat& _weight_data = bottom_blobs[1]; + Mat& top_blob = top_blobs[0]; + + const int _num_input = bottom_blob.c; + const int _kernel_w = _weight_data.w; + const int _kernel_h = _weight_data.h; + const int _num_output = _weight_data.d * 1; + + Mat weight_data_flattened; + flatten(_weight_data, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + + // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw + Mat weight_data_transposed; + { + weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / 1, 4u, opt.workspace_allocator); + if (weight_data_transposed.empty()) + return -100; + + const int outch_g = _num_output / 1; + const int inch_g = _num_input / 1; + const int maxk = _kernel_h * _kernel_w; + + for (int g = 0; g < 1; g++) + { + // reorder weight from inch-outch to outch-inch + float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk; + const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk; + for (int i = 0; i < outch_g; i++) + { + for (int j = 0; j < inch_g; j++) + { + for (int k = 0; k < maxk; k++) + { + wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k]; + } + } + } + } + } + + Mat bias_data_flattened; + if (bias_term) + { + const Mat& _bias_data = bottom_blobs[2]; + flatten(_bias_data, bias_data_flattened, opt); + if (bias_data_flattened.empty()) + return -100; + } + + const int w = bottom_blob.w; + const int h = bottom_blob.h; + + const int kernel_extent_w = dilation_w * (_kernel_w - 1) + 1; + const int kernel_extent_h = dilation_h * (_kernel_h - 1) + 1; + + int outw = (w - 1) * stride_w + kernel_extent_w + output_pad_right; + int outh = (h - 1) * stride_h + kernel_extent_h + output_pad_bottom; + + Mat top_blob_bordered; + if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || (output_w > 0 && output_h > 0)) + { + top_blob_bordered.create(outw, outh, _num_output, 4u, opt.workspace_allocator); + } + else + { + top_blob_bordered = top_blob; + top_blob_bordered.create(outw, outh, _num_output, 4u, opt.blob_allocator); + } + if (top_blob_bordered.empty()) + return -100; + + int ret = deconvolution(bottom_blob, top_blob_bordered, weight_data_transposed, bias_data_flattened, _kernel_w, _kernel_h, stride_w, stride_h, dilation_w, dilation_h, activation_type, activation_params, opt); + if (ret != 0) + return ret; + + cut_padding(top_blob_bordered, top_blob, opt); + if (top_blob.empty()) + return -100; + + return 0; +} + void Deconvolution::cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const { if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) diff --git a/src/layer/deconvolution.h b/src/layer/deconvolution.h index b0025a63ef4..ece5e1e5010 100644 --- a/src/layer/deconvolution.h +++ b/src/layer/deconvolution.h @@ -30,6 +30,8 @@ class Deconvolution : public Layer virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + protected: void cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const; @@ -58,6 +60,8 @@ class Deconvolution : public Layer int activation_type; Mat activation_params; + int dynamic_weight; + // model Mat weight_data; Mat bias_data; diff --git a/src/layer/deconvolution1d.cpp b/src/layer/deconvolution1d.cpp index 50cb5600950..0dd7ae47028 100644 --- a/src/layer/deconvolution1d.cpp +++ b/src/layer/deconvolution1d.cpp @@ -39,11 +39,21 @@ int Deconvolution1D::load_param(const ParamDict& pd) activation_type = pd.get(9, 0); activation_params = pd.get(10, Mat()); + dynamic_weight = pd.get(28, 0); + + if (dynamic_weight) + { + one_blob_only = false; + } + return 0; } int Deconvolution1D::load_model(const ModelBin& mb) { + if (dynamic_weight) + return 0; + weight_data = mb.load(weight_data_size, 0); if (weight_data.empty()) return -100; @@ -143,6 +153,89 @@ int Deconvolution1D::forward(const Mat& bottom_blob, Mat& top_blob, const Option return 0; } +int Deconvolution1D::forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const +{ + const Mat& bottom_blob = bottom_blobs[0]; + const Mat& _weight_data = bottom_blobs[1]; + Mat& top_blob = top_blobs[0]; + + const int _num_input = bottom_blob.h; + const int _kernel_w = _weight_data.w; + const int _num_output = _weight_data.h * 1; + + Mat weight_data_flattened; + flatten(_weight_data, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + + // transpose group-inch/group-outch/group-kw to group-outch/group-inch/group-kw + Mat weight_data_transposed; + { + weight_data_transposed.create(_kernel_w * _num_output * _num_input / 1, 4u, opt.workspace_allocator); + if (weight_data_transposed.empty()) + return -100; + + const int outch_g = _num_output / 1; + const int inch_g = _num_input / 1; + const int maxk = _kernel_w; + + for (int g = 0; g < 1; g++) + { + // reorder weight from inch-outch to outch-inch + float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk; + const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk; + for (int i = 0; i < outch_g; i++) + { + for (int j = 0; j < inch_g; j++) + { + for (int k = 0; k < maxk; k++) + { + wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k]; + } + } + } + } + } + + Mat bias_data_flattened; + if (bias_term) + { + const Mat& _bias_data = bottom_blobs[2]; + flatten(_bias_data, bias_data_flattened, opt); + if (bias_data_flattened.empty()) + return -100; + } + + const int w = bottom_blob.w; + + const int kernel_extent_w = dilation_w * (_kernel_w - 1) + 1; + + int outw = (w - 1) * stride_w + kernel_extent_w + output_pad_right; + + Mat top_blob_bordered; + if (pad_left > 0 || pad_right > 0 || output_w > 0) + { + top_blob_bordered.create(outw, _num_output, 4u, opt.workspace_allocator); + } + else + { + top_blob_bordered = top_blob; + top_blob_bordered.create(outw, _num_output, 4u, opt.blob_allocator); + } + if (top_blob_bordered.empty()) + return -100; + + int ret = deconvolution1d(bottom_blob, top_blob_bordered, weight_data_transposed, bias_data_flattened, _kernel_w, stride_w, dilation_w, activation_type, activation_params, opt); + if (ret != 0) + return ret; + + cut_padding(top_blob_bordered, top_blob, opt); + if (top_blob.empty()) + return -100; + + return 0; +} + void Deconvolution1D::cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const { if (pad_left > 0 || pad_right > 0) diff --git a/src/layer/deconvolution1d.h b/src/layer/deconvolution1d.h index 4f681beee90..c3b64dd561f 100644 --- a/src/layer/deconvolution1d.h +++ b/src/layer/deconvolution1d.h @@ -30,6 +30,8 @@ class Deconvolution1D : public Layer virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + protected: void cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const; @@ -50,6 +52,8 @@ class Deconvolution1D : public Layer int activation_type; Mat activation_params; + int dynamic_weight; + // model Mat weight_data; Mat bias_data; diff --git a/src/layer/deconvolutiondepthwise.cpp b/src/layer/deconvolutiondepthwise.cpp index cd0ef36ea19..462ade2a21f 100644 --- a/src/layer/deconvolutiondepthwise.cpp +++ b/src/layer/deconvolutiondepthwise.cpp @@ -47,11 +47,21 @@ int DeconvolutionDepthWise::load_param(const ParamDict& pd) activation_type = pd.get(9, 0); activation_params = pd.get(10, Mat()); + dynamic_weight = pd.get(28, 0); + + if (dynamic_weight) + { + one_blob_only = false; + } + return 0; } int DeconvolutionDepthWise::load_model(const ModelBin& mb) { + if (dynamic_weight) + return 0; + weight_data = mb.load(weight_data_size, 0); if (weight_data.empty()) return -100; @@ -243,6 +253,93 @@ int DeconvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const return 0; } +int DeconvolutionDepthWise::forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const +{ + const Mat& bottom_blob = bottom_blobs[0]; + const Mat& _weight_data = bottom_blobs[1]; + Mat& top_blob = top_blobs[0]; + + const int _num_input = bottom_blob.c; + const int _kernel_w = _weight_data.w; + const int _kernel_h = _weight_data.h; + const int _num_output = _weight_data.d * group; + + Mat weight_data_flattened; + flatten(_weight_data, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + + // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw + Mat weight_data_transposed; + { + weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / group, 4u, opt.workspace_allocator); + if (weight_data_transposed.empty()) + return -100; + + const int outch_g = _num_output / group; + const int inch_g = _num_input / group; + const int maxk = _kernel_h * _kernel_w; + + for (int g = 0; g < group; g++) + { + // reorder weight from inch-outch to outch-inch + float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk; + const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk; + for (int i = 0; i < outch_g; i++) + { + for (int j = 0; j < inch_g; j++) + { + for (int k = 0; k < maxk; k++) + { + wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k]; + } + } + } + } + } + + Mat bias_data_flattened; + if (bias_term) + { + const Mat& _bias_data = bottom_blobs[2]; + flatten(_bias_data, bias_data_flattened, opt); + if (bias_data_flattened.empty()) + return -100; + } + + const int w = bottom_blob.w; + const int h = bottom_blob.h; + + const int kernel_extent_w = dilation_w * (_kernel_w - 1) + 1; + const int kernel_extent_h = dilation_h * (_kernel_h - 1) + 1; + + int outw = (w - 1) * stride_w + kernel_extent_w + output_pad_right; + int outh = (h - 1) * stride_h + kernel_extent_h + output_pad_bottom; + + Mat top_blob_bordered; + if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || (output_w > 0 && output_h > 0)) + { + top_blob_bordered.create(outw, outh, _num_output, 4u, opt.workspace_allocator); + } + else + { + top_blob_bordered = top_blob; + top_blob_bordered.create(outw, outh, _num_output, 4u, opt.blob_allocator); + } + if (top_blob_bordered.empty()) + return -100; + + int ret = deconvolutiondepthwise(bottom_blob, top_blob_bordered, weight_data_transposed, bias_data_flattened, _kernel_w, _kernel_h, stride_w, stride_h, dilation_w, dilation_h, group, activation_type, activation_params, opt); + if (ret != 0) + return ret; + + cut_padding(top_blob_bordered, top_blob, opt); + if (top_blob.empty()) + return -100; + + return 0; +} + void DeconvolutionDepthWise::cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const { if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) diff --git a/src/layer/deconvolutiondepthwise.h b/src/layer/deconvolutiondepthwise.h index 7d99f0064a5..8e6507d6434 100644 --- a/src/layer/deconvolutiondepthwise.h +++ b/src/layer/deconvolutiondepthwise.h @@ -30,6 +30,8 @@ class DeconvolutionDepthWise : public Layer virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + protected: void cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const; @@ -59,6 +61,8 @@ class DeconvolutionDepthWise : public Layer int activation_type; Mat activation_params; + int dynamic_weight; + // model Mat weight_data; Mat bias_data; diff --git a/src/layer/deconvolutiondepthwise1d.cpp b/src/layer/deconvolutiondepthwise1d.cpp index 15b6c88539b..955795eb788 100644 --- a/src/layer/deconvolutiondepthwise1d.cpp +++ b/src/layer/deconvolutiondepthwise1d.cpp @@ -40,11 +40,21 @@ int DeconvolutionDepthWise1D::load_param(const ParamDict& pd) activation_type = pd.get(9, 0); activation_params = pd.get(10, Mat()); + dynamic_weight = pd.get(28, 0); + + if (dynamic_weight) + { + one_blob_only = false; + } + return 0; } int DeconvolutionDepthWise1D::load_model(const ModelBin& mb) { + if (dynamic_weight) + return 0; + weight_data = mb.load(weight_data_size, 0); if (weight_data.empty()) return -100; @@ -195,6 +205,89 @@ int DeconvolutionDepthWise1D::forward(const Mat& bottom_blob, Mat& top_blob, con return 0; } +int DeconvolutionDepthWise1D::forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const +{ + const Mat& bottom_blob = bottom_blobs[0]; + const Mat& _weight_data = bottom_blobs[1]; + Mat& top_blob = top_blobs[0]; + + const int _num_input = bottom_blob.h; + const int _kernel_w = _weight_data.w; + const int _num_output = _weight_data.h * group; + + Mat weight_data_flattened; + flatten(_weight_data, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + + // transpose group-inch/group-outch/group-kw to group-outch/group-inch/group-kw + Mat weight_data_transposed; + { + weight_data_transposed.create(_kernel_w * _num_output * _num_input / group, 4u, opt.workspace_allocator); + if (weight_data_transposed.empty()) + return -100; + + const int outch_g = _num_output / group; + const int inch_g = _num_input / group; + const int maxk = _kernel_w; + + for (int g = 0; g < group; g++) + { + // reorder weight from inch-outch to outch-inch + float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk; + const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk; + for (int i = 0; i < outch_g; i++) + { + for (int j = 0; j < inch_g; j++) + { + for (int k = 0; k < maxk; k++) + { + wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k]; + } + } + } + } + } + + Mat bias_data_flattened; + if (bias_term) + { + const Mat& _bias_data = bottom_blobs[2]; + flatten(_bias_data, bias_data_flattened, opt); + if (bias_data_flattened.empty()) + return -100; + } + + const int w = bottom_blob.w; + + const int kernel_extent_w = dilation_w * (_kernel_w - 1) + 1; + + int outw = (w - 1) * stride_w + kernel_extent_w + output_pad_right; + + Mat top_blob_bordered; + if (pad_left > 0 || pad_right > 0 || output_w > 0) + { + top_blob_bordered.create(outw, _num_output, 4u, opt.workspace_allocator); + } + else + { + top_blob_bordered = top_blob; + top_blob_bordered.create(outw, _num_output, 4u, opt.blob_allocator); + } + if (top_blob_bordered.empty()) + return -100; + + int ret = deconvolutiondepthwise1d(bottom_blob, top_blob_bordered, weight_data_transposed, bias_data_flattened, _kernel_w, stride_w, dilation_w, group, activation_type, activation_params, opt); + if (ret != 0) + return ret; + + cut_padding(top_blob_bordered, top_blob, opt); + if (top_blob.empty()) + return -100; + + return 0; +} + void DeconvolutionDepthWise1D::cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const { if (pad_left > 0 || pad_right > 0) diff --git a/src/layer/deconvolutiondepthwise1d.h b/src/layer/deconvolutiondepthwise1d.h index 50332f11fc6..1ed1513d256 100644 --- a/src/layer/deconvolutiondepthwise1d.h +++ b/src/layer/deconvolutiondepthwise1d.h @@ -30,6 +30,8 @@ class DeconvolutionDepthWise1D : public Layer virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + protected: void cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const; @@ -51,6 +53,8 @@ class DeconvolutionDepthWise1D : public Layer int activation_type; Mat activation_params; + int dynamic_weight; + // model Mat weight_data; Mat bias_data; diff --git a/src/layer/loongarch/convolution_loongarch.cpp b/src/layer/loongarch/convolution_loongarch.cpp index 31719b3de92..7816d1c66d2 100644 --- a/src/layer/loongarch/convolution_loongarch.cpp +++ b/src/layer/loongarch/convolution_loongarch.cpp @@ -600,9 +600,9 @@ int Convolution_loongarch::forward(const std::vector& bottom_blobs, std::ve pd.set(1, _kernel_w); pd.set(11, _kernel_h); pd.set(2, dilation_w); - pd.set(21, dilation_h); + pd.set(12, dilation_h); pd.set(3, stride_w); - pd.set(31, stride_h); + pd.set(13, stride_h); pd.set(4, pad_left); pd.set(15, pad_right); pd.set(14, pad_top); diff --git a/src/layer/loongarch/deconvolution_loongarch.cpp b/src/layer/loongarch/deconvolution_loongarch.cpp index bb913909b55..2d934bccb06 100644 --- a/src/layer/loongarch/deconvolution_loongarch.cpp +++ b/src/layer/loongarch/deconvolution_loongarch.cpp @@ -40,6 +40,9 @@ Deconvolution_loongarch::Deconvolution_loongarch() int Deconvolution_loongarch::create_pipeline(const Option& opt) { + if (dynamic_weight) + return 0; + const int maxk = kernel_w * kernel_h; int num_input = weight_data_size / maxk / num_output; @@ -281,4 +284,110 @@ int Deconvolution_loongarch::forward(const Mat& bottom_blob, Mat& top_blob, cons return 0; } +int Deconvolution_loongarch::forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const +{ + const Mat& bottom_blob = bottom_blobs[0]; + const Mat& _weight_data = bottom_blobs[1]; + Mat& top_blob = top_blobs[0]; + + const int _num_input = bottom_blob.c * bottom_blob.elempack; + const int _kernel_w = _weight_data.w; + const int _kernel_h = _weight_data.h; + const int _num_output = _weight_data.d * 1; + + Mat weight_data_flattened; + flatten(_weight_data, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + + // weight_data_flattened as pack1 + weight_data_flattened.w *= weight_data_flattened.elempack; + weight_data_flattened.elemsize /= weight_data_flattened.elempack; + weight_data_flattened.elempack = 1; + + // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw + Mat weight_data_transposed; + { + weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / 1, 4u, opt.workspace_allocator); + if (weight_data_transposed.empty()) + return -100; + + const int outch_g = _num_output / 1; + const int inch_g = _num_input / 1; + const int maxk = _kernel_h * _kernel_w; + + for (int g = 0; g < 1; g++) + { + // reorder weight from inch-outch to outch-inch + float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk; + const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk; + for (int i = 0; i < outch_g; i++) + { + for (int j = 0; j < inch_g; j++) + { + for (int k = 0; k < maxk; k++) + { + wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k]; + } + } + } + } + } + + Mat bias_data_flattened; + if (bias_term) + { + const Mat& _bias_data = bottom_blobs[2]; + flatten(_bias_data, bias_data_flattened, opt); + if (bias_data_flattened.empty()) + return -100; + + // bias_data_flattened as pack1 + bias_data_flattened.w *= bias_data_flattened.elempack; + bias_data_flattened.elemsize /= bias_data_flattened.elempack; + bias_data_flattened.elempack = 1; + } + + ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Deconvolution); + + ncnn::ParamDict pd; + pd.set(0, _num_output); + pd.set(1, _kernel_w); + pd.set(11, _kernel_h); + pd.set(2, dilation_w); + pd.set(12, dilation_h); + pd.set(3, stride_w); + pd.set(13, stride_h); + pd.set(4, pad_left); + pd.set(15, pad_right); + pd.set(14, pad_top); + pd.set(16, pad_bottom); + pd.set(18, output_pad_right); + pd.set(19, output_pad_bottom); + pd.set(20, output_w); + pd.set(21, output_h); + pd.set(5, bias_term); + pd.set(6, weight_data_transposed.w); + pd.set(9, activation_type); + pd.set(10, activation_params); + + op->load_param(pd); + + ncnn::Mat weights[2]; + weights[0] = weight_data_transposed; + weights[1] = bias_data_flattened; + + op->load_model(ncnn::ModelBinFromMatArray(weights)); + + op->create_pipeline(opt); + + op->forward(bottom_blob, top_blob, opt); + + op->destroy_pipeline(opt); + + delete op; + + return 0; +} + } // namespace ncnn diff --git a/src/layer/loongarch/deconvolution_loongarch.h b/src/layer/loongarch/deconvolution_loongarch.h index bb7653b563f..f67b5d7e4e1 100644 --- a/src/layer/loongarch/deconvolution_loongarch.h +++ b/src/layer/loongarch/deconvolution_loongarch.h @@ -29,6 +29,8 @@ class Deconvolution_loongarch : virtual public Deconvolution virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + public: Mat weight_data_tm; }; diff --git a/src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp b/src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp index a141dd70360..f4f4d37bf7f 100644 --- a/src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp +++ b/src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp @@ -34,6 +34,9 @@ DeconvolutionDepthWise_loongarch::DeconvolutionDepthWise_loongarch() int DeconvolutionDepthWise_loongarch::create_pipeline(const Option& opt) { + if (dynamic_weight) + return 0; + const int maxk = kernel_w * kernel_h; int channels = (weight_data_size / group) / maxk / (num_output / group) * group; @@ -409,4 +412,111 @@ int DeconvolutionDepthWise_loongarch::forward(const Mat& bottom_blob, Mat& top_b return 0; } +int DeconvolutionDepthWise_loongarch::forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const +{ + const Mat& bottom_blob = bottom_blobs[0]; + const Mat& _weight_data = bottom_blobs[1]; + Mat& top_blob = top_blobs[0]; + + const int _num_input = bottom_blob.c * bottom_blob.elempack; + const int _kernel_w = _weight_data.w; + const int _kernel_h = _weight_data.h; + const int _num_output = _weight_data.d * group; + + Mat weight_data_flattened; + flatten(_weight_data, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + + // weight_data_flattened as pack1 + weight_data_flattened.w *= weight_data_flattened.elempack; + weight_data_flattened.elemsize /= weight_data_flattened.elempack; + weight_data_flattened.elempack = 1; + + // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw + Mat weight_data_transposed; + { + weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / group, 4u, opt.workspace_allocator); + if (weight_data_transposed.empty()) + return -100; + + const int outch_g = _num_output / group; + const int inch_g = _num_input / group; + const int maxk = _kernel_h * _kernel_w; + + for (int g = 0; g < group; g++) + { + // reorder weight from inch-outch to outch-inch + float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk; + const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk; + for (int i = 0; i < outch_g; i++) + { + for (int j = 0; j < inch_g; j++) + { + for (int k = 0; k < maxk; k++) + { + wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k]; + } + } + } + } + } + + Mat bias_data_flattened; + if (bias_term) + { + const Mat& _bias_data = bottom_blobs[2]; + flatten(_bias_data, bias_data_flattened, opt); + if (bias_data_flattened.empty()) + return -100; + + // bias_data_flattened as pack1 + bias_data_flattened.w *= bias_data_flattened.elempack; + bias_data_flattened.elemsize /= bias_data_flattened.elempack; + bias_data_flattened.elempack = 1; + } + + ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::DeconvolutionDepthWise); + + ncnn::ParamDict pd; + pd.set(0, _num_output); + pd.set(1, _kernel_w); + pd.set(11, _kernel_h); + pd.set(2, dilation_w); + pd.set(12, dilation_h); + pd.set(3, stride_w); + pd.set(13, stride_h); + pd.set(4, pad_left); + pd.set(15, pad_right); + pd.set(14, pad_top); + pd.set(16, pad_bottom); + pd.set(18, output_pad_right); + pd.set(19, output_pad_bottom); + pd.set(20, output_w); + pd.set(21, output_h); + pd.set(5, bias_term); + pd.set(6, weight_data_transposed.w); + pd.set(7, group); + pd.set(9, activation_type); + pd.set(10, activation_params); + + op->load_param(pd); + + ncnn::Mat weights[2]; + weights[0] = weight_data_transposed; + weights[1] = bias_data_flattened; + + op->load_model(ncnn::ModelBinFromMatArray(weights)); + + op->create_pipeline(opt); + + op->forward(bottom_blob, top_blob, opt); + + op->destroy_pipeline(opt); + + delete op; + + return 0; +} + } // namespace ncnn diff --git a/src/layer/loongarch/deconvolutiondepthwise_loongarch.h b/src/layer/loongarch/deconvolutiondepthwise_loongarch.h index e41e7cac9e1..b710f07ecf3 100644 --- a/src/layer/loongarch/deconvolutiondepthwise_loongarch.h +++ b/src/layer/loongarch/deconvolutiondepthwise_loongarch.h @@ -29,6 +29,8 @@ class DeconvolutionDepthWise_loongarch : virtual public DeconvolutionDepthWise virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + protected: int create_group_ops(const Option& opt); diff --git a/src/layer/mips/convolution_mips.cpp b/src/layer/mips/convolution_mips.cpp index 5dd648fa3df..bc547e4a667 100644 --- a/src/layer/mips/convolution_mips.cpp +++ b/src/layer/mips/convolution_mips.cpp @@ -600,9 +600,9 @@ int Convolution_mips::forward(const std::vector& bottom_blobs, std::vector< pd.set(1, _kernel_w); pd.set(11, _kernel_h); pd.set(2, dilation_w); - pd.set(21, dilation_h); + pd.set(12, dilation_h); pd.set(3, stride_w); - pd.set(31, stride_h); + pd.set(13, stride_h); pd.set(4, pad_left); pd.set(15, pad_right); pd.set(14, pad_top); diff --git a/src/layer/mips/deconvolution_mips.cpp b/src/layer/mips/deconvolution_mips.cpp index c3838f3ea69..506d3072096 100644 --- a/src/layer/mips/deconvolution_mips.cpp +++ b/src/layer/mips/deconvolution_mips.cpp @@ -40,6 +40,9 @@ Deconvolution_mips::Deconvolution_mips() int Deconvolution_mips::create_pipeline(const Option& opt) { + if (dynamic_weight) + return 0; + const int maxk = kernel_w * kernel_h; int num_input = weight_data_size / maxk / num_output; @@ -281,4 +284,110 @@ int Deconvolution_mips::forward(const Mat& bottom_blob, Mat& top_blob, const Opt return 0; } +int Deconvolution_mips::forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const +{ + const Mat& bottom_blob = bottom_blobs[0]; + const Mat& _weight_data = bottom_blobs[1]; + Mat& top_blob = top_blobs[0]; + + const int _num_input = bottom_blob.c * bottom_blob.elempack; + const int _kernel_w = _weight_data.w; + const int _kernel_h = _weight_data.h; + const int _num_output = _weight_data.d * 1; + + Mat weight_data_flattened; + flatten(_weight_data, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + + // weight_data_flattened as pack1 + weight_data_flattened.w *= weight_data_flattened.elempack; + weight_data_flattened.elemsize /= weight_data_flattened.elempack; + weight_data_flattened.elempack = 1; + + // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw + Mat weight_data_transposed; + { + weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / 1, 4u, opt.workspace_allocator); + if (weight_data_transposed.empty()) + return -100; + + const int outch_g = _num_output / 1; + const int inch_g = _num_input / 1; + const int maxk = _kernel_h * _kernel_w; + + for (int g = 0; g < 1; g++) + { + // reorder weight from inch-outch to outch-inch + float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk; + const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk; + for (int i = 0; i < outch_g; i++) + { + for (int j = 0; j < inch_g; j++) + { + for (int k = 0; k < maxk; k++) + { + wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k]; + } + } + } + } + } + + Mat bias_data_flattened; + if (bias_term) + { + const Mat& _bias_data = bottom_blobs[2]; + flatten(_bias_data, bias_data_flattened, opt); + if (bias_data_flattened.empty()) + return -100; + + // bias_data_flattened as pack1 + bias_data_flattened.w *= bias_data_flattened.elempack; + bias_data_flattened.elemsize /= bias_data_flattened.elempack; + bias_data_flattened.elempack = 1; + } + + ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Deconvolution); + + ncnn::ParamDict pd; + pd.set(0, _num_output); + pd.set(1, _kernel_w); + pd.set(11, _kernel_h); + pd.set(2, dilation_w); + pd.set(12, dilation_h); + pd.set(3, stride_w); + pd.set(13, stride_h); + pd.set(4, pad_left); + pd.set(15, pad_right); + pd.set(14, pad_top); + pd.set(16, pad_bottom); + pd.set(18, output_pad_right); + pd.set(19, output_pad_bottom); + pd.set(20, output_w); + pd.set(21, output_h); + pd.set(5, bias_term); + pd.set(6, weight_data_transposed.w); + pd.set(9, activation_type); + pd.set(10, activation_params); + + op->load_param(pd); + + ncnn::Mat weights[2]; + weights[0] = weight_data_transposed; + weights[1] = bias_data_flattened; + + op->load_model(ncnn::ModelBinFromMatArray(weights)); + + op->create_pipeline(opt); + + op->forward(bottom_blob, top_blob, opt); + + op->destroy_pipeline(opt); + + delete op; + + return 0; +} + } // namespace ncnn diff --git a/src/layer/mips/deconvolution_mips.h b/src/layer/mips/deconvolution_mips.h index 4d5e1ad985a..218bd812672 100644 --- a/src/layer/mips/deconvolution_mips.h +++ b/src/layer/mips/deconvolution_mips.h @@ -29,6 +29,8 @@ class Deconvolution_mips : virtual public Deconvolution virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + public: Mat weight_data_tm; }; diff --git a/src/layer/mips/deconvolutiondepthwise_mips.cpp b/src/layer/mips/deconvolutiondepthwise_mips.cpp index da76b5801cd..533bf522ad9 100644 --- a/src/layer/mips/deconvolutiondepthwise_mips.cpp +++ b/src/layer/mips/deconvolutiondepthwise_mips.cpp @@ -34,6 +34,9 @@ DeconvolutionDepthWise_mips::DeconvolutionDepthWise_mips() int DeconvolutionDepthWise_mips::create_pipeline(const Option& opt) { + if (dynamic_weight) + return 0; + const int maxk = kernel_w * kernel_h; int channels = (weight_data_size / group) / maxk / (num_output / group) * group; @@ -409,4 +412,111 @@ int DeconvolutionDepthWise_mips::forward(const Mat& bottom_blob, Mat& top_blob, return 0; } +int DeconvolutionDepthWise_mips::forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const +{ + const Mat& bottom_blob = bottom_blobs[0]; + const Mat& _weight_data = bottom_blobs[1]; + Mat& top_blob = top_blobs[0]; + + const int _num_input = bottom_blob.c * bottom_blob.elempack; + const int _kernel_w = _weight_data.w; + const int _kernel_h = _weight_data.h; + const int _num_output = _weight_data.d * group; + + Mat weight_data_flattened; + flatten(_weight_data, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + + // weight_data_flattened as pack1 + weight_data_flattened.w *= weight_data_flattened.elempack; + weight_data_flattened.elemsize /= weight_data_flattened.elempack; + weight_data_flattened.elempack = 1; + + // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw + Mat weight_data_transposed; + { + weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / group, 4u, opt.workspace_allocator); + if (weight_data_transposed.empty()) + return -100; + + const int outch_g = _num_output / group; + const int inch_g = _num_input / group; + const int maxk = _kernel_h * _kernel_w; + + for (int g = 0; g < group; g++) + { + // reorder weight from inch-outch to outch-inch + float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk; + const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk; + for (int i = 0; i < outch_g; i++) + { + for (int j = 0; j < inch_g; j++) + { + for (int k = 0; k < maxk; k++) + { + wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k]; + } + } + } + } + } + + Mat bias_data_flattened; + if (bias_term) + { + const Mat& _bias_data = bottom_blobs[2]; + flatten(_bias_data, bias_data_flattened, opt); + if (bias_data_flattened.empty()) + return -100; + + // bias_data_flattened as pack1 + bias_data_flattened.w *= bias_data_flattened.elempack; + bias_data_flattened.elemsize /= bias_data_flattened.elempack; + bias_data_flattened.elempack = 1; + } + + ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::DeconvolutionDepthWise); + + ncnn::ParamDict pd; + pd.set(0, _num_output); + pd.set(1, _kernel_w); + pd.set(11, _kernel_h); + pd.set(2, dilation_w); + pd.set(12, dilation_h); + pd.set(3, stride_w); + pd.set(13, stride_h); + pd.set(4, pad_left); + pd.set(15, pad_right); + pd.set(14, pad_top); + pd.set(16, pad_bottom); + pd.set(18, output_pad_right); + pd.set(19, output_pad_bottom); + pd.set(20, output_w); + pd.set(21, output_h); + pd.set(5, bias_term); + pd.set(6, weight_data_transposed.w); + pd.set(7, group); + pd.set(9, activation_type); + pd.set(10, activation_params); + + op->load_param(pd); + + ncnn::Mat weights[2]; + weights[0] = weight_data_transposed; + weights[1] = bias_data_flattened; + + op->load_model(ncnn::ModelBinFromMatArray(weights)); + + op->create_pipeline(opt); + + op->forward(bottom_blob, top_blob, opt); + + op->destroy_pipeline(opt); + + delete op; + + return 0; +} + } // namespace ncnn diff --git a/src/layer/mips/deconvolutiondepthwise_mips.h b/src/layer/mips/deconvolutiondepthwise_mips.h index 90cb7c3acbb..a033d7c11c3 100644 --- a/src/layer/mips/deconvolutiondepthwise_mips.h +++ b/src/layer/mips/deconvolutiondepthwise_mips.h @@ -29,6 +29,8 @@ class DeconvolutionDepthWise_mips : virtual public DeconvolutionDepthWise virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + protected: int create_group_ops(const Option& opt); diff --git a/src/layer/riscv/convolution_riscv.cpp b/src/layer/riscv/convolution_riscv.cpp index c66279e2c58..4c4d57c6a57 100644 --- a/src/layer/riscv/convolution_riscv.cpp +++ b/src/layer/riscv/convolution_riscv.cpp @@ -684,9 +684,9 @@ int Convolution_riscv::forward(const std::vector& bottom_blobs, std::vector pd.set(1, _kernel_w); pd.set(11, _kernel_h); pd.set(2, dilation_w); - pd.set(21, dilation_h); + pd.set(12, dilation_h); pd.set(3, stride_w); - pd.set(31, stride_h); + pd.set(13, stride_h); pd.set(4, pad_left); pd.set(15, pad_right); pd.set(14, pad_top); diff --git a/src/layer/riscv/deconvolution_riscv.cpp b/src/layer/riscv/deconvolution_riscv.cpp index 936b563f646..9202d367f93 100644 --- a/src/layer/riscv/deconvolution_riscv.cpp +++ b/src/layer/riscv/deconvolution_riscv.cpp @@ -14,6 +14,7 @@ #include "deconvolution_riscv.h" +#include "cpu.h" #include "layer_type.h" #if __riscv_vector @@ -50,6 +51,9 @@ Deconvolution_riscv::Deconvolution_riscv() int Deconvolution_riscv::create_pipeline(const Option& opt) { + if (dynamic_weight) + return 0; + #if __riscv_vector && __riscv_zfh if (opt.use_fp16_storage) { @@ -318,6 +322,130 @@ int Deconvolution_riscv::forward(const Mat& bottom_blob, Mat& top_blob, const Op return 0; } +int Deconvolution_riscv::forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const +{ + const Mat& bottom_blob = bottom_blobs[0]; + const Mat& _weight_data = bottom_blobs[1]; + Mat& top_blob = top_blobs[0]; + + const int _num_input = bottom_blob.c * bottom_blob.elempack; + const int _kernel_w = _weight_data.w; + const int _kernel_h = _weight_data.h; + const int _num_output = _weight_data.d * 1; + + Mat weight_data_flattened; + flatten(_weight_data, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + +#if NCNN_RVV + if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && weight_data_flattened.elembits() == 16) + { + Mat weight_data_flattened_fp32; + cast_float16_to_float32(weight_data_flattened, weight_data_flattened_fp32, opt); + weight_data_flattened = weight_data_flattened_fp32; + } +#endif // NCNN_RVV + + // weight_data_flattened as pack1 + weight_data_flattened.w *= weight_data_flattened.elempack; + weight_data_flattened.elemsize /= weight_data_flattened.elempack; + weight_data_flattened.elempack = 1; + + // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw + Mat weight_data_transposed; + { + weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / 1, 4u, opt.workspace_allocator); + if (weight_data_transposed.empty()) + return -100; + + const int outch_g = _num_output / 1; + const int inch_g = _num_input / 1; + const int maxk = _kernel_h * _kernel_w; + + for (int g = 0; g < 1; g++) + { + // reorder weight from inch-outch to outch-inch + float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk; + const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk; + for (int i = 0; i < outch_g; i++) + { + for (int j = 0; j < inch_g; j++) + { + for (int k = 0; k < maxk; k++) + { + wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k]; + } + } + } + } + } + + Mat bias_data_flattened; + if (bias_term) + { + const Mat& _bias_data = bottom_blobs[2]; + flatten(_bias_data, bias_data_flattened, opt); + if (bias_data_flattened.empty()) + return -100; + +#if NCNN_RVV + if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && bias_data_flattened.elembits() == 16) + { + Mat bias_data_flattened_fp32; + cast_float16_to_float32(bias_data_flattened, bias_data_flattened_fp32, opt); + bias_data_flattened = bias_data_flattened_fp32; + } +#endif // NCNN_RVV + + // bias_data_flattened as pack1 + bias_data_flattened.w *= bias_data_flattened.elempack; + bias_data_flattened.elemsize /= bias_data_flattened.elempack; + bias_data_flattened.elempack = 1; + } + + ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Deconvolution); + + ncnn::ParamDict pd; + pd.set(0, _num_output); + pd.set(1, _kernel_w); + pd.set(11, _kernel_h); + pd.set(2, dilation_w); + pd.set(12, dilation_h); + pd.set(3, stride_w); + pd.set(13, stride_h); + pd.set(4, pad_left); + pd.set(15, pad_right); + pd.set(14, pad_top); + pd.set(16, pad_bottom); + pd.set(18, output_pad_right); + pd.set(19, output_pad_bottom); + pd.set(20, output_w); + pd.set(21, output_h); + pd.set(5, bias_term); + pd.set(6, weight_data_transposed.w); + pd.set(9, activation_type); + pd.set(10, activation_params); + + op->load_param(pd); + + ncnn::Mat weights[2]; + weights[0] = weight_data_transposed; + weights[1] = bias_data_flattened; + + op->load_model(ncnn::ModelBinFromMatArray(weights)); + + op->create_pipeline(opt); + + op->forward(bottom_blob, top_blob, opt); + + op->destroy_pipeline(opt); + + delete op; + + return 0; +} + #if __riscv_vector && __riscv_zfh int Deconvolution_riscv::create_pipeline_fp16s(const Option& opt) { diff --git a/src/layer/riscv/deconvolution_riscv.h b/src/layer/riscv/deconvolution_riscv.h index 3574c09d10e..903a420427a 100644 --- a/src/layer/riscv/deconvolution_riscv.h +++ b/src/layer/riscv/deconvolution_riscv.h @@ -29,6 +29,8 @@ class Deconvolution_riscv : virtual public Deconvolution virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + protected: #if __riscv_vector && __riscv_zfh int create_pipeline_fp16s(const Option& opt); diff --git a/src/layer/riscv/deconvolutiondepthwise_riscv.cpp b/src/layer/riscv/deconvolutiondepthwise_riscv.cpp index b53e8962fd2..eee765c4ea6 100644 --- a/src/layer/riscv/deconvolutiondepthwise_riscv.cpp +++ b/src/layer/riscv/deconvolutiondepthwise_riscv.cpp @@ -14,6 +14,7 @@ #include "deconvolutiondepthwise_riscv.h" +#include "cpu.h" #include "layer_type.h" #if __riscv_vector @@ -37,6 +38,9 @@ DeconvolutionDepthWise_riscv::DeconvolutionDepthWise_riscv() int DeconvolutionDepthWise_riscv::create_pipeline(const Option& opt) { + if (dynamic_weight) + return 0; + #if __riscv_vector && __riscv_zfh if (opt.use_fp16_storage) { @@ -445,6 +449,131 @@ int DeconvolutionDepthWise_riscv::forward(const Mat& bottom_blob, Mat& top_blob, return 0; } +int DeconvolutionDepthWise_riscv::forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const +{ + const Mat& bottom_blob = bottom_blobs[0]; + const Mat& _weight_data = bottom_blobs[1]; + Mat& top_blob = top_blobs[0]; + + const int _num_input = bottom_blob.c * bottom_blob.elempack; + const int _kernel_w = _weight_data.w; + const int _kernel_h = _weight_data.h; + const int _num_output = _weight_data.d * group; + + Mat weight_data_flattened; + flatten(_weight_data, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + +#if NCNN_RVV + if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && weight_data_flattened.elembits() == 16) + { + Mat weight_data_flattened_fp32; + cast_float16_to_float32(weight_data_flattened, weight_data_flattened_fp32, opt); + weight_data_flattened = weight_data_flattened_fp32; + } +#endif // NCNN_RVV + + // weight_data_flattened as pack1 + weight_data_flattened.w *= weight_data_flattened.elempack; + weight_data_flattened.elemsize /= weight_data_flattened.elempack; + weight_data_flattened.elempack = 1; + + // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw + Mat weight_data_transposed; + { + weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / group, 4u, opt.workspace_allocator); + if (weight_data_transposed.empty()) + return -100; + + const int outch_g = _num_output / group; + const int inch_g = _num_input / group; + const int maxk = _kernel_h * _kernel_w; + + for (int g = 0; g < group; g++) + { + // reorder weight from inch-outch to outch-inch + float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk; + const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk; + for (int i = 0; i < outch_g; i++) + { + for (int j = 0; j < inch_g; j++) + { + for (int k = 0; k < maxk; k++) + { + wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k]; + } + } + } + } + } + + Mat bias_data_flattened; + if (bias_term) + { + const Mat& _bias_data = bottom_blobs[2]; + flatten(_bias_data, bias_data_flattened, opt); + if (bias_data_flattened.empty()) + return -100; + +#if NCNN_RVV + if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && bias_data_flattened.elembits() == 16) + { + Mat bias_data_flattened_fp32; + cast_float16_to_float32(bias_data_flattened, bias_data_flattened_fp32, opt); + bias_data_flattened = bias_data_flattened_fp32; + } +#endif // NCNN_RVV + + // bias_data_flattened as pack1 + bias_data_flattened.w *= bias_data_flattened.elempack; + bias_data_flattened.elemsize /= bias_data_flattened.elempack; + bias_data_flattened.elempack = 1; + } + + ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::DeconvolutionDepthWise); + + ncnn::ParamDict pd; + pd.set(0, _num_output); + pd.set(1, _kernel_w); + pd.set(11, _kernel_h); + pd.set(2, dilation_w); + pd.set(12, dilation_h); + pd.set(3, stride_w); + pd.set(13, stride_h); + pd.set(4, pad_left); + pd.set(15, pad_right); + pd.set(14, pad_top); + pd.set(16, pad_bottom); + pd.set(18, output_pad_right); + pd.set(19, output_pad_bottom); + pd.set(20, output_w); + pd.set(21, output_h); + pd.set(5, bias_term); + pd.set(6, weight_data_transposed.w); + pd.set(7, group); + pd.set(9, activation_type); + pd.set(10, activation_params); + + op->load_param(pd); + + ncnn::Mat weights[2]; + weights[0] = weight_data_transposed; + weights[1] = bias_data_flattened; + + op->load_model(ncnn::ModelBinFromMatArray(weights)); + + op->create_pipeline(opt); + + op->forward(bottom_blob, top_blob, opt); + + op->destroy_pipeline(opt); + + delete op; + + return 0; +} + #if __riscv_vector && __riscv_zfh int DeconvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt) { diff --git a/src/layer/riscv/deconvolutiondepthwise_riscv.h b/src/layer/riscv/deconvolutiondepthwise_riscv.h index ccda5f248e3..5cdbd0d0676 100644 --- a/src/layer/riscv/deconvolutiondepthwise_riscv.h +++ b/src/layer/riscv/deconvolutiondepthwise_riscv.h @@ -29,6 +29,8 @@ class DeconvolutionDepthWise_riscv : virtual public DeconvolutionDepthWise virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + protected: int create_group_ops(const Option& opt); #if __riscv_vector && __riscv_zfh diff --git a/src/layer/vulkan/deconvolution_vulkan.cpp b/src/layer/vulkan/deconvolution_vulkan.cpp index 30283d211c1..c53aedefc84 100644 --- a/src/layer/vulkan/deconvolution_vulkan.cpp +++ b/src/layer/vulkan/deconvolution_vulkan.cpp @@ -35,6 +35,13 @@ Deconvolution_vulkan::Deconvolution_vulkan() int Deconvolution_vulkan::create_pipeline(const Option& _opt) { + if (dynamic_weight) + { + support_vulkan = false; + support_image_storage = false; + return 0; + } + Option opt = _opt; const Mat& shape = bottom_shapes.empty() ? Mat() : bottom_shapes[0]; const Mat& out_shape = top_shapes.empty() ? Mat() : top_shapes[0]; diff --git a/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp b/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp index ee9d949d303..b24418fa428 100644 --- a/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp +++ b/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp @@ -44,6 +44,13 @@ DeconvolutionDepthWise_vulkan::DeconvolutionDepthWise_vulkan() int DeconvolutionDepthWise_vulkan::create_pipeline(const Option& _opt) { + if (dynamic_weight) + { + support_vulkan = false; + support_image_storage = false; + return 0; + } + Option opt = _opt; const Mat& shape = bottom_shapes.empty() ? Mat() : bottom_shapes[0]; const Mat& out_shape = top_shapes.empty() ? Mat() : top_shapes[0]; diff --git a/src/layer/x86/convolution_x86.cpp b/src/layer/x86/convolution_x86.cpp index 09008985f12..6e828ff0d21 100644 --- a/src/layer/x86/convolution_x86.cpp +++ b/src/layer/x86/convolution_x86.cpp @@ -1189,9 +1189,9 @@ int Convolution_x86::forward(const std::vector& bottom_blobs, std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const +{ + const Mat& bottom_blob = bottom_blobs[0]; + const Mat& _weight_data = bottom_blobs[1]; + Mat& top_blob = top_blobs[0]; + + const int _num_input = bottom_blob.c * bottom_blob.elempack; + const int _kernel_w = _weight_data.w; + const int _kernel_h = _weight_data.h; + const int _num_output = _weight_data.d * 1; + + Mat weight_data_flattened; + flatten(_weight_data, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + + // weight_data_flattened as pack1 + weight_data_flattened.w *= weight_data_flattened.elempack; + weight_data_flattened.elemsize /= weight_data_flattened.elempack; + weight_data_flattened.elempack = 1; + + // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw + Mat weight_data_transposed; + { + weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / 1, 4u, opt.workspace_allocator); + if (weight_data_transposed.empty()) + return -100; + + const int outch_g = _num_output / 1; + const int inch_g = _num_input / 1; + const int maxk = _kernel_h * _kernel_w; + + for (int g = 0; g < 1; g++) + { + // reorder weight from inch-outch to outch-inch + float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk; + const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk; + for (int i = 0; i < outch_g; i++) + { + for (int j = 0; j < inch_g; j++) + { + for (int k = 0; k < maxk; k++) + { + wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k]; + } + } + } + } + } + + Mat bias_data_flattened; + if (bias_term) + { + const Mat& _bias_data = bottom_blobs[2]; + flatten(_bias_data, bias_data_flattened, opt); + if (bias_data_flattened.empty()) + return -100; + + // bias_data_flattened as pack1 + bias_data_flattened.w *= bias_data_flattened.elempack; + bias_data_flattened.elemsize /= bias_data_flattened.elempack; + bias_data_flattened.elempack = 1; + } + + ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Deconvolution); + + ncnn::ParamDict pd; + pd.set(0, _num_output); + pd.set(1, _kernel_w); + pd.set(11, _kernel_h); + pd.set(2, dilation_w); + pd.set(12, dilation_h); + pd.set(3, stride_w); + pd.set(13, stride_h); + pd.set(4, pad_left); + pd.set(15, pad_right); + pd.set(14, pad_top); + pd.set(16, pad_bottom); + pd.set(18, output_pad_right); + pd.set(19, output_pad_bottom); + pd.set(20, output_w); + pd.set(21, output_h); + pd.set(5, bias_term); + pd.set(6, weight_data_transposed.w); + pd.set(9, activation_type); + pd.set(10, activation_params); + + op->load_param(pd); + + ncnn::Mat weights[2]; + weights[0] = weight_data_transposed; + weights[1] = bias_data_flattened; + + op->load_model(ncnn::ModelBinFromMatArray(weights)); + + op->create_pipeline(opt); + + op->forward(bottom_blob, top_blob, opt); + + op->destroy_pipeline(opt); + + delete op; + + return 0; +} + } // namespace ncnn diff --git a/src/layer/x86/deconvolution_x86.h b/src/layer/x86/deconvolution_x86.h index 2d620e279ea..4951870bcd0 100644 --- a/src/layer/x86/deconvolution_x86.h +++ b/src/layer/x86/deconvolution_x86.h @@ -29,6 +29,8 @@ class Deconvolution_x86 : virtual public Deconvolution virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + public: Layer* activation; Layer* gemm; diff --git a/src/layer/x86/deconvolutiondepthwise_x86.cpp b/src/layer/x86/deconvolutiondepthwise_x86.cpp index 03a24999190..43a573a64ef 100644 --- a/src/layer/x86/deconvolutiondepthwise_x86.cpp +++ b/src/layer/x86/deconvolutiondepthwise_x86.cpp @@ -37,6 +37,9 @@ DeconvolutionDepthWise_x86::DeconvolutionDepthWise_x86() int DeconvolutionDepthWise_x86::create_pipeline(const Option& opt) { + if (dynamic_weight) + return 0; + const int maxk = kernel_w * kernel_h; int channels = (weight_data_size / group) / maxk / (num_output / group) * group; @@ -574,4 +577,111 @@ int DeconvolutionDepthWise_x86::forward(const Mat& bottom_blob, Mat& top_blob, c return 0; } +int DeconvolutionDepthWise_x86::forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const +{ + const Mat& bottom_blob = bottom_blobs[0]; + const Mat& _weight_data = bottom_blobs[1]; + Mat& top_blob = top_blobs[0]; + + const int _num_input = bottom_blob.c * bottom_blob.elempack; + const int _kernel_w = _weight_data.w; + const int _kernel_h = _weight_data.h; + const int _num_output = _weight_data.d * group; + + Mat weight_data_flattened; + flatten(_weight_data, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + + // weight_data_flattened as pack1 + weight_data_flattened.w *= weight_data_flattened.elempack; + weight_data_flattened.elemsize /= weight_data_flattened.elempack; + weight_data_flattened.elempack = 1; + + // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw + Mat weight_data_transposed; + { + weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / group, 4u, opt.workspace_allocator); + if (weight_data_transposed.empty()) + return -100; + + const int outch_g = _num_output / group; + const int inch_g = _num_input / group; + const int maxk = _kernel_h * _kernel_w; + + for (int g = 0; g < group; g++) + { + // reorder weight from inch-outch to outch-inch + float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk; + const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk; + for (int i = 0; i < outch_g; i++) + { + for (int j = 0; j < inch_g; j++) + { + for (int k = 0; k < maxk; k++) + { + wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k]; + } + } + } + } + } + + Mat bias_data_flattened; + if (bias_term) + { + const Mat& _bias_data = bottom_blobs[2]; + flatten(_bias_data, bias_data_flattened, opt); + if (bias_data_flattened.empty()) + return -100; + + // bias_data_flattened as pack1 + bias_data_flattened.w *= bias_data_flattened.elempack; + bias_data_flattened.elemsize /= bias_data_flattened.elempack; + bias_data_flattened.elempack = 1; + } + + ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::DeconvolutionDepthWise); + + ncnn::ParamDict pd; + pd.set(0, _num_output); + pd.set(1, _kernel_w); + pd.set(11, _kernel_h); + pd.set(2, dilation_w); + pd.set(12, dilation_h); + pd.set(3, stride_w); + pd.set(13, stride_h); + pd.set(4, pad_left); + pd.set(15, pad_right); + pd.set(14, pad_top); + pd.set(16, pad_bottom); + pd.set(18, output_pad_right); + pd.set(19, output_pad_bottom); + pd.set(20, output_w); + pd.set(21, output_h); + pd.set(5, bias_term); + pd.set(6, weight_data_transposed.w); + pd.set(7, group); + pd.set(9, activation_type); + pd.set(10, activation_params); + + op->load_param(pd); + + ncnn::Mat weights[2]; + weights[0] = weight_data_transposed; + weights[1] = bias_data_flattened; + + op->load_model(ncnn::ModelBinFromMatArray(weights)); + + op->create_pipeline(opt); + + op->forward(bottom_blob, top_blob, opt); + + op->destroy_pipeline(opt); + + delete op; + + return 0; +} + } // namespace ncnn diff --git a/src/layer/x86/deconvolutiondepthwise_x86.h b/src/layer/x86/deconvolutiondepthwise_x86.h index 33139cfb563..07fb5e54f9b 100644 --- a/src/layer/x86/deconvolutiondepthwise_x86.h +++ b/src/layer/x86/deconvolutiondepthwise_x86.h @@ -29,6 +29,8 @@ class DeconvolutionDepthWise_x86 : virtual public DeconvolutionDepthWise virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + protected: int create_group_ops(const Option& opt); diff --git a/tests/test_deconvolution.cpp b/tests/test_deconvolution.cpp index b1028019508..4a0027b68c4 100644 --- a/tests/test_deconvolution.cpp +++ b/tests/test_deconvolution.cpp @@ -151,9 +151,95 @@ static int test_deconvolution_0() || test_deconvolution(7, 5, 32, 26, 4, 2, 2, 2, 1, 0, 0, 0, 0); } +static int test_deconvolution_dynamic(int w, int h, int c, int outch, int kernel, int dilation, int stride, int pad, int bias, int output_pad_right, int output_pad_bottom, int output_w, int output_h) +{ + ncnn::Mat a = RandomMat(w, h, c); + + if (output_w > 0 && output_h > 0 && pad != -233 && pad != -234) + { + pad = -233; + } + + ncnn::ParamDict pd; + pd.set(0, 0); + pd.set(1, 0); + pd.set(2, dilation); + pd.set(3, stride); + pd.set(4, pad); + pd.set(5, bias); + pd.set(6, 0); + pd.set(28, 1); // dynamic weight + + int activation_type = RAND() % 7; // 0 1 2 3 4 5 6 + ncnn::Mat activation_params(2); + activation_params[0] = (activation_type == 6) ? RandomFloat(0, 1) : RandomFloat(-1, 0); // alpha + activation_params[1] = RandomFloat(0, 1); // beta + pd.set(9, activation_type); + pd.set(10, activation_params); + + pd.set(18, output_pad_right); + pd.set(19, output_pad_bottom); + pd.set(20, output_w); + pd.set(21, output_h); + + std::vector as(bias ? 3 : 2); + as[0] = a; + as[1] = RandomMat(kernel, kernel, outch, c); + if (bias) + as[2] = RandomMat(outch); + + std::vector weights(0); + + int ret = test_layer("Deconvolution", pd, weights, as); + if (ret != 0) + { + fprintf(stderr, "test_deconvolution_dynamic failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d act=%d actparams=[%f,%f] output_pad_right=%d output_pad_bottom=%d output_w=%d output_h=%d\n", w, h, c, outch, kernel, dilation, stride, pad, bias, activation_type, activation_params[0], activation_params[1], output_pad_right, output_pad_bottom, output_w, output_h); + } + + return ret; +} + +static int test_deconvolution_1() +{ + static const int kdsp[7][4] = { + {1, 1, 1, 0}, + {1, 1, 2, 0}, + {2, 1, 1, 1}, + {2, 1, 2, -233}, + {3, 1, 1, 1}, + {3, 1, 2, 1}, + {3, 2, 1, -234}, + }; + + for (int i = 0; i < 7; i++) + { + const int k = kdsp[i][0]; + const int d = kdsp[i][1]; + const int s = kdsp[i][2]; + const int p = kdsp[i][3]; + + int ret = 0 + || test_deconvolution_dynamic(9, 7, 1, 1, k, d, s, p, 1, 0, 0, 0, 0) + || test_deconvolution_dynamic(9, 7, 4, 13, k, d, s, p, 0, 1, 1, 7, 5) + || test_deconvolution_dynamic(9, 7, 13, 4, k, d, s, p, 1, 1, 0, 0, 0) + || test_deconvolution_dynamic(9, 7, 4, 8, k, d, s, p, 0, 0, 1, 0, 0) + || test_deconvolution_dynamic(9, 7, 8, 4, k, d, s, p, 1, 0, 0, 7, 5) + || test_deconvolution_dynamic(7, 7, 12, 12, k, d, s, p, 1, 0, 1, 0, 0) + || test_deconvolution_dynamic(4, 5, 12, 11, k, d, s, p, 0, 0, 1, 1, 0) + || test_deconvolution_dynamic(9, 7, 8, 13, k, d, s, p, 0, 2, 2, 0, 0) + || test_deconvolution_dynamic(9, 7, 13, 8, k, d, s, p, 1, 2, 0, 0, 0) + || test_deconvolution_dynamic(9, 7, 16, 16, k, d, s, p, 0, 0, 2, 7, 5); + + if (ret != 0) + return -1; + } + + return 0; +} + int main() { SRAND(7767517); - return test_deconvolution_0(); + return test_deconvolution_0() || test_deconvolution_1(); } diff --git a/tests/test_deconvolution1d.cpp b/tests/test_deconvolution1d.cpp index 5b8cd6aec14..b1b24ee6af9 100644 --- a/tests/test_deconvolution1d.cpp +++ b/tests/test_deconvolution1d.cpp @@ -101,9 +101,100 @@ static int test_deconvolution1d_0() return 0; } +static int test_deconvolution1d_dynamic(int w, int h, int outh, int kernel, int dilation, int stride, int pad, int bias, int output_pad_right, int output_w) +{ + ncnn::Mat a = RandomMat(w, h); + + if (output_w > 0 && pad != -233 && pad != -234) + { + pad = -233; + } + + ncnn::ParamDict pd; + pd.set(0, 0); + pd.set(1, 0); + pd.set(2, dilation); + pd.set(3, stride); + pd.set(4, pad); + pd.set(5, bias); + pd.set(6, 0); + pd.set(28, 1); // dynamic weight + + int activation_type = RAND() % 5; // 0 1 2 3 4 + ncnn::Mat activation_params(2); + activation_params[0] = RandomFloat(-1, 0); // alpha + activation_params[1] = RandomFloat(0, 1); // beta + pd.set(9, activation_type); + pd.set(10, activation_params); + + pd.set(18, output_pad_right); + pd.set(20, output_w); + + std::vector as(bias ? 3 : 2); + as[0] = a; + as[1] = RandomMat(kernel, outh, h); + if (bias) + as[2] = RandomMat(outh); + + std::vector weights(0); + + int ret = test_layer("Deconvolution1D", pd, weights, as); + if (ret != 0) + { + fprintf(stderr, "test_deconvolution1d_dynamic failed w=%d h=%d outh=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d act=%d actparams=[%f,%f] output_pad_right=%d output_w=%d\n", w, h, outh, kernel, dilation, stride, pad, bias, activation_type, activation_params[0], activation_params[1], output_pad_right, output_w); + } + + return ret; +} + +static int test_deconvolution1d_1() +{ + static const int kdsp[16][4] = { + {1, 1, 1, 0}, + {1, 1, 2, 0}, + {2, 1, 1, 1}, + {2, 1, 2, -233}, + {3, 1, 1, 1}, + {3, 1, 2, 1}, + {3, 2, 1, 1}, + {4, 1, 1, -233}, + {4, 1, 2, -234}, + {4, 2, 1, -234}, + {5, 1, 1, 2}, + {5, 1, 2, 2}, + {5, 2, 2, 2}, + {7, 1, 1, 3}, + {7, 1, 2, 3}, + {7, 2, 1, -233}, + }; + + for (int i = 0; i < 16; i++) + { + const int k = kdsp[i][0]; + const int d = kdsp[i][1]; + const int s = kdsp[i][2]; + const int p = kdsp[i][3]; + + int ret = 0 + || test_deconvolution1d_dynamic(9, 1, 1, k, d, s, p, 1, 0, 0) + || test_deconvolution1d_dynamic(9, 4, 13, k, d, s, p, 0, 1, 7) + || test_deconvolution1d_dynamic(9, 13, 4, k, d, s, p, 1, 1, 0) + || test_deconvolution1d_dynamic(9, 4, 8, k, d, s, p, 0, 0, 0) + || test_deconvolution1d_dynamic(9, 8, 4, k, d, s, p, 1, 0, 7) + || test_deconvolution1d_dynamic(9, 8, 13, k, d, s, p, 0, 2, 0) + || test_deconvolution1d_dynamic(9, 13, 8, k, d, s, p, 1, 2, 0) + || test_deconvolution1d_dynamic(9, 16, 16, k, d, s, p, 0, 0, 7); + + if (ret != 0) + return -1; + } + + return 0; +} + int main() { SRAND(7767517); - return test_deconvolution1d_0(); + return test_deconvolution1d_0() || test_deconvolution1d_1(); } diff --git a/tests/test_deconvolutiondepthwise1d.cpp b/tests/test_deconvolutiondepthwise1d.cpp index 21e53d7638c..cadd149ff59 100644 --- a/tests/test_deconvolutiondepthwise1d.cpp +++ b/tests/test_deconvolutiondepthwise1d.cpp @@ -107,9 +107,106 @@ static int test_deconvolutiondepthwise1d_0() return 0; } +static int test_deconvolutiondepthwise1d_dynamic(int w, int h, int outh, int kernel, int dilation, int stride, int pad, int bias, int group, int output_pad_right, int output_w) +{ + ncnn::Mat a = RandomMat(w, h); + + if (output_w > 0 && pad != -233 && pad != -234) + { + pad = -233; + } + + ncnn::ParamDict pd; + pd.set(0, 0); + pd.set(1, 0); + pd.set(2, dilation); + pd.set(3, stride); + pd.set(4, pad); + pd.set(5, bias); + pd.set(6, 0); + pd.set(7, group); + pd.set(28, 1); // dynamic weight + + int activation_type = RAND() % 5; // 0 1 2 3 4 + ncnn::Mat activation_params(2); + activation_params[0] = RandomFloat(-1, 0); // alpha + activation_params[1] = RandomFloat(0, 1); // beta + pd.set(9, activation_type); + pd.set(10, activation_params); + + pd.set(18, output_pad_right); + pd.set(20, output_w); + + std::vector as(bias ? 3 : 2); + as[0] = a; + as[1] = RandomMat(kernel, outh / group, h); + if (bias) + as[2] = RandomMat(outh); + + std::vector weights(0); + + int ret = test_layer("DeconvolutionDepthWise1D", pd, weights, as); + if (ret != 0) + { + fprintf(stderr, "test_deconvolutiondepthwise1d_dynamic failed w=%d h=%d outh=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d group=%d act=%d actparams=[%f,%f] output_pad_right=%d output_w=%d\n", w, h, outh, kernel, dilation, stride, pad, bias, group, activation_type, activation_params[0], activation_params[1], output_pad_right, output_w); + } + + return ret; +} + +static int test_deconvolutiondepthwise1d_1() +{ + static const int kdsp[16][4] = { + {1, 1, 1, 0}, + {1, 1, 2, 0}, + {2, 1, 1, 1}, + {2, 1, 2, -233}, + {3, 1, 1, 1}, + {3, 1, 2, 1}, + {3, 2, 1, 1}, + {4, 1, 1, -233}, + {4, 1, 2, -234}, + {4, 2, 1, -234}, + {5, 1, 1, 2}, + {5, 1, 2, 2}, + {5, 2, 2, 2}, + {7, 1, 1, 3}, + {7, 1, 2, 3}, + {7, 2, 1, -233}, + }; + + for (int i = 0; i < 16; i++) + { + const int k = kdsp[i][0]; + const int d = kdsp[i][1]; + const int s = kdsp[i][2]; + const int p = kdsp[i][3]; + + int ret = 0 + || test_deconvolutiondepthwise1d_dynamic(15, 1, 1, k, d, s, p, 1, 1, 0, 0) + || test_deconvolutiondepthwise1d_dynamic(15, 2, 2, k, d, s, p, 0, 1, 1, 7) + || test_deconvolutiondepthwise1d_dynamic(15, 2, 2, k, d, s, p, 1, 2, 1, 0) + || test_deconvolutiondepthwise1d_dynamic(15, 3, 3, k, d, s, p, 0, 3, 0, 0) + || test_deconvolutiondepthwise1d_dynamic(15, 4, 2, k, d, s, p, 1, 2, 0, 7) + || test_deconvolutiondepthwise1d_dynamic(15, 4, 4, k, d, s, p, 0, 4, 2, 0) + || test_deconvolutiondepthwise1d_dynamic(15, 7, 7, k, d, s, p, 1, 7, 2, 0) + || test_deconvolutiondepthwise1d_dynamic(15, 8, 8, k, d, s, p, 0, 2, 0, 7) + || test_deconvolutiondepthwise1d_dynamic(15, 8, 8, k, d, s, p, 1, 8, 0, 0) + || test_deconvolutiondepthwise1d_dynamic(15, 12, 12, k, d, s, p, 0, 4, 3, 0) + || test_deconvolutiondepthwise1d_dynamic(15, 15, 15, k, d, s, p, 1, 15, 3, 7) + || test_deconvolutiondepthwise1d_dynamic(15, 16, 8, k, d, s, p, 0, 2, 0, 0) + || test_deconvolutiondepthwise1d_dynamic(15, 16, 16, k, d, s, p, 1, 16, 0, 0); + + if (ret != 0) + return -1; + } + + return 0; +} + int main() { SRAND(7767517); - return test_deconvolutiondepthwise1d_0(); + return test_deconvolutiondepthwise1d_0() || test_deconvolutiondepthwise1d_1(); } diff --git a/tests/test_deconvolutiondepthwise_1.cpp b/tests/test_deconvolutiondepthwise_1.cpp new file mode 100644 index 00000000000..02fc8f97351 --- /dev/null +++ b/tests/test_deconvolutiondepthwise_1.cpp @@ -0,0 +1,122 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "layer/deconvolutiondepthwise.h" +#include "testutil.h" + +static int test_deconvolutiondepthwise_dynamic(int w, int h, int c, int outch, int kernel, int dilation, int stride, int pad, int bias, int group, int output_pad_right, int output_pad_bottom, int output_w, int output_h) +{ + ncnn::Mat a = RandomMat(w, h, c); + + if (output_w > 0 && output_h > 0 && pad != -233 && pad != -234) + { + pad = -233; + } + + ncnn::ParamDict pd; + pd.set(0, 0); + pd.set(1, 0); + pd.set(2, dilation); + pd.set(3, stride); + pd.set(4, pad); + pd.set(5, bias); + pd.set(6, 0); + pd.set(7, group); + pd.set(28, 1); // dynamic weight + + int activation_type = RAND() % 5; // 0 1 2 3 4 + ncnn::Mat activation_params(2); + activation_params[0] = RandomFloat(-1, 0); // alpha + activation_params[1] = RandomFloat(0, 1); // beta + pd.set(9, activation_type); + pd.set(10, activation_params); + + pd.set(18, output_pad_right); + pd.set(19, output_pad_bottom); + pd.set(20, output_w); + pd.set(21, output_h); + + std::vector as(bias ? 3 : 2); + as[0] = a; + as[1] = RandomMat(kernel, kernel, outch / group, c); + if (bias) + as[2] = RandomMat(outch); + + std::vector weights(0); + + int ret = test_layer("DeconvolutionDepthWise", pd, weights, as); + if (ret != 0) + { + fprintf(stderr, "test_deconvolutiondepthwise_dynamic failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d group=%d act=%d actparams=[%f,%f] output_pad_right=%d output_pad_bottom=%d output_w=%d output_h=%d\n", w, h, c, outch, kernel, dilation, stride, pad, bias, group, activation_type, activation_params[0], activation_params[1], output_pad_right, output_pad_bottom, output_w, output_h); + } + + return ret; +} + +static int test_deconvolutiondepthwise_0() +{ + static const int kdsp[16][4] = { + {1, 1, 1, 0}, + {1, 1, 2, 0}, + {2, 1, 1, 1}, + {2, 1, 2, -233}, + {3, 1, 1, 1}, + {3, 1, 2, 1}, + {3, 2, 1, 1}, + {4, 1, 1, -233}, + {4, 1, 2, -234}, + {4, 2, 1, -234}, + {5, 1, 1, 2}, + {5, 1, 2, 2}, + {5, 2, 2, 2}, + {7, 1, 1, 3}, + {7, 1, 2, 3}, + {7, 2, 1, -233}, + }; + + for (int i = 0; i < 16; i++) + { + const int k = kdsp[i][0]; + const int d = kdsp[i][1]; + const int s = kdsp[i][2]; + const int p = kdsp[i][3]; + + int ret = 0 + || test_deconvolutiondepthwise_dynamic(15, 7, 1, 1, k, d, s, p, 1, 1, 0, 0, 0, 0) + || test_deconvolutiondepthwise_dynamic(15, 7, 2, 2, k, d, s, p, 0, 1, 1, 1, 7, 5) + || test_deconvolutiondepthwise_dynamic(15, 7, 2, 2, k, d, s, p, 1, 2, 1, 0, 0, 0) + || test_deconvolutiondepthwise_dynamic(15, 7, 3, 3, k, d, s, p, 0, 3, 0, 1, 0, 0) + || test_deconvolutiondepthwise_dynamic(15, 7, 4, 2, k, d, s, p, 1, 2, 0, 0, 7, 5) + || test_deconvolutiondepthwise_dynamic(15, 7, 4, 4, k, d, s, p, 0, 4, 2, 2, 0, 0) + || test_deconvolutiondepthwise_dynamic(15, 7, 7, 7, k, d, s, p, 1, 7, 2, 0, 0, 0) + || test_deconvolutiondepthwise_dynamic(15, 7, 8, 8, k, d, s, p, 0, 2, 0, 2, 7, 5) + || test_deconvolutiondepthwise_dynamic(15, 7, 8, 8, k, d, s, p, 1, 8, 0, 0, 0, 0) + || test_deconvolutiondepthwise_dynamic(15, 7, 12, 12, k, d, s, p, 0, 4, 3, 3, 0, 0) + || test_deconvolutiondepthwise_dynamic(15, 7, 15, 15, k, d, s, p, 1, 15, 3, 0, 7, 5) + || test_deconvolutiondepthwise_dynamic(15, 7, 16, 8, k, d, s, p, 0, 2, 0, 3, 0, 0) + || test_deconvolutiondepthwise_dynamic(15, 7, 16, 16, k, d, s, p, 1, 16, 0, 0, 0, 0); + + if (ret != 0) + return -1; + } + + return 0; +} + +int main() +{ + SRAND(7767517); + + return test_deconvolutiondepthwise_0(); +} diff --git a/tools/pnnx/src/pass_ncnn/F_conv_transpose1d.cpp b/tools/pnnx/src/pass_ncnn/F_conv_transpose1d.cpp index 5901522afca..7131557cef7 100644 --- a/tools/pnnx/src/pass_ncnn/F_conv_transpose1d.cpp +++ b/tools/pnnx/src/pass_ncnn/F_conv_transpose1d.cpp @@ -18,6 +18,194 @@ namespace pnnx { namespace ncnn { +class F_conv_transpose1d_4 : public GraphRewriterPass +{ +public: + const char* match_pattern_graph() const + { + return R"PNNXIR(7767517 +4 3 +pnnx.Input input 0 1 input +pnnx.Input weight 0 1 weight +F.conv_transpose1d op_0 2 1 input weight out bias=None stride=%stride output_padding=%output_padding padding=%padding dilation=%dilation groups=1 +pnnx.Output output 1 0 out +)PNNXIR"; + } + + const char* type_str() const + { + return "Deconvolution1D"; + } + + const char* name_str() const + { + return "deconv1d"; + } + + void write(Operator* op, const std::map& captured_params, const std::map& /*captured_attrs*/) const + { + std::vector weight_shape = op->inputs[1]->shape; + if (weight_shape.empty()) + { + weight_shape = {0, 0, 0}; + } + + op->params["0"] = weight_shape[1]; + op->params["1"] = weight_shape[2]; + op->params["2"] = captured_params.at("dilation").ai[0]; + op->params["3"] = captured_params.at("stride").ai[0]; + op->params["4"] = captured_params.at("padding").ai[0]; + op->params["18"] = captured_params.at("output_padding").ai[0]; + op->params["5"] = 0; + op->params["6"] = (int)(weight_shape[0] * weight_shape[1] * weight_shape[2]); + op->params["28"] = 1; // dynamic weight + } +}; + +REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_conv_transpose1d_4, 22) + +class F_conv_transpose1d_5 : public GraphRewriterPass +{ +public: + const char* match_pattern_graph() const + { + return R"PNNXIR(7767517 +5 4 +pnnx.Input input 0 1 input +pnnx.Input weight 0 1 weight +pnnx.Input bias 0 1 bias +F.conv_transpose1d op_0 3 1 input weight bias out stride=%stride output_padding=%output_padding padding=%padding dilation=%dilation groups=1 +pnnx.Output output 1 0 out +)PNNXIR"; + } + + const char* type_str() const + { + return "Deconvolution1D"; + } + + const char* name_str() const + { + return "deconv1d"; + } + + void write(Operator* op, const std::map& captured_params, const std::map& /*captured_attrs*/) const + { + std::vector weight_shape = op->inputs[1]->shape; + if (weight_shape.empty()) + { + weight_shape = {0, 0, 0}; + } + + op->params["0"] = weight_shape[1]; + op->params["1"] = weight_shape[2]; + op->params["2"] = captured_params.at("dilation").ai[0]; + op->params["3"] = captured_params.at("stride").ai[0]; + op->params["4"] = captured_params.at("padding").ai[0]; + op->params["18"] = captured_params.at("output_padding").ai[0]; + op->params["5"] = 1; + op->params["6"] = (int)(weight_shape[0] * weight_shape[1] * weight_shape[2]); + op->params["28"] = 1; // dynamic weight + } +}; + +REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_conv_transpose1d_5, 22) + +class F_conv_transpose1d_6 : public GraphRewriterPass +{ +public: + const char* match_pattern_graph() const + { + return R"PNNXIR(7767517 +4 3 +pnnx.Input input 0 1 input +pnnx.Input weight 0 1 weight +F.conv_transpose1d op_0 2 1 input weight out bias=None stride=%stride output_padding=%output_padding padding=%padding dilation=%dilation groups=%groups +pnnx.Output output 1 0 out +)PNNXIR"; + } + + const char* type_str() const + { + return "DeconvolutionDepthWise1D"; + } + + const char* name_str() const + { + return "deconvdw1d"; + } + + void write(Operator* op, const std::map& captured_params, const std::map& /*captured_attrs*/) const + { + std::vector weight_shape = op->inputs[1]->shape; + if (weight_shape.empty()) + { + weight_shape = {0, 0, 0}; + } + + op->params["0"] = weight_shape[1] * captured_params.at("groups").i; + op->params["1"] = weight_shape[2]; + op->params["2"] = captured_params.at("dilation").ai[0]; + op->params["3"] = captured_params.at("stride").ai[0]; + op->params["4"] = captured_params.at("padding").ai[0]; + op->params["18"] = captured_params.at("output_padding").ai[0]; + op->params["5"] = 0; + op->params["6"] = (int)(weight_shape[0] * weight_shape[1] * weight_shape[2]); + op->params["7"] = captured_params.at("groups"); + op->params["28"] = 1; // dynamic weight + } +}; + +REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_conv_transpose1d_6, 23) + +class F_conv_transpose1d_7 : public GraphRewriterPass +{ +public: + const char* match_pattern_graph() const + { + return R"PNNXIR(7767517 +5 4 +pnnx.Input input 0 1 input +pnnx.Input weight 0 1 weight +pnnx.Input bias 0 1 bias +F.conv_transpose1d op_0 3 1 input weight bias out stride=%stride output_padding=%output_padding padding=%padding dilation=%dilation groups=%groups +pnnx.Output output 1 0 out +)PNNXIR"; + } + + const char* type_str() const + { + return "DeconvolutionDepthWise1D"; + } + + const char* name_str() const + { + return "deconvdw1d"; + } + + void write(Operator* op, const std::map& captured_params, const std::map& /*captured_attrs*/) const + { + std::vector weight_shape = op->inputs[1]->shape; + if (weight_shape.empty()) + { + weight_shape = {0, 0, 0}; + } + + op->params["0"] = weight_shape[1] * captured_params.at("groups").i; + op->params["1"] = weight_shape[2]; + op->params["2"] = captured_params.at("dilation").ai[0]; + op->params["3"] = captured_params.at("stride").ai[0]; + op->params["4"] = captured_params.at("padding").ai[0]; + op->params["18"] = captured_params.at("output_padding").ai[0]; + op->params["5"] = 1; + op->params["6"] = (int)(weight_shape[0] * weight_shape[1] * weight_shape[2]); + op->params["7"] = captured_params.at("groups"); + op->params["28"] = 1; // dynamic weight + } +}; + +REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_conv_transpose1d_7, 23) + } // namespace ncnn } // namespace pnnx diff --git a/tools/pnnx/src/pass_ncnn/F_conv_transpose2d.cpp b/tools/pnnx/src/pass_ncnn/F_conv_transpose2d.cpp index 890f36cc92a..f0e578e7b3c 100644 --- a/tools/pnnx/src/pass_ncnn/F_conv_transpose2d.cpp +++ b/tools/pnnx/src/pass_ncnn/F_conv_transpose2d.cpp @@ -18,6 +18,214 @@ namespace pnnx { namespace ncnn { +class F_conv_transpose2d_4 : public GraphRewriterPass +{ +public: + const char* match_pattern_graph() const + { + return R"PNNXIR(7767517 +4 3 +pnnx.Input input 0 1 input +pnnx.Input weight 0 1 weight +F.conv_transpose2d op_0 2 1 input weight out bias=None stride=%stride output_padding=%output_padding padding=%padding dilation=%dilation groups=1 +pnnx.Output output 1 0 out +)PNNXIR"; + } + + const char* type_str() const + { + return "Deconvolution"; + } + + const char* name_str() const + { + return "deconv2d"; + } + + void write(Operator* op, const std::map& captured_params, const std::map& /*captured_attrs*/) const + { + std::vector weight_shape = op->inputs[1]->shape; + if (weight_shape.empty()) + { + weight_shape = {0, 0, 0, 0}; + } + + op->params["0"] = weight_shape[1]; + op->params["1"] = weight_shape[3]; + op->params["11"] = weight_shape[2]; + op->params["2"] = captured_params.at("dilation").ai[1]; + op->params["12"] = captured_params.at("dilation").ai[0]; + op->params["3"] = captured_params.at("stride").ai[1]; + op->params["13"] = captured_params.at("stride").ai[0]; + op->params["4"] = captured_params.at("padding").ai[1]; + op->params["14"] = captured_params.at("padding").ai[0]; + op->params["18"] = captured_params.at("output_padding").ai[1]; + op->params["19"] = captured_params.at("output_padding").ai[0]; + op->params["5"] = 0; + op->params["6"] = (int)(weight_shape[0] * weight_shape[1] * weight_shape[2] * weight_shape[3]); + op->params["28"] = 1; // dynamic weight + } +}; + +REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_conv_transpose2d_4, 22) + +class F_conv_transpose2d_5 : public GraphRewriterPass +{ +public: + const char* match_pattern_graph() const + { + return R"PNNXIR(7767517 +5 4 +pnnx.Input input 0 1 input +pnnx.Input weight 0 1 weight +pnnx.Input bias 0 1 bias +F.conv_transpose2d op_0 3 1 input weight bias out stride=%stride output_padding=%output_padding padding=%padding dilation=%dilation groups=1 +pnnx.Output output 1 0 out +)PNNXIR"; + } + + const char* type_str() const + { + return "Deconvolution"; + } + + const char* name_str() const + { + return "deconv2d"; + } + + void write(Operator* op, const std::map& captured_params, const std::map& /*captured_attrs*/) const + { + std::vector weight_shape = op->inputs[1]->shape; + if (weight_shape.empty()) + { + weight_shape = {0, 0, 0, 0}; + } + + op->params["0"] = weight_shape[1]; + op->params["1"] = weight_shape[3]; + op->params["11"] = weight_shape[2]; + op->params["2"] = captured_params.at("dilation").ai[1]; + op->params["12"] = captured_params.at("dilation").ai[0]; + op->params["3"] = captured_params.at("stride").ai[1]; + op->params["13"] = captured_params.at("stride").ai[0]; + op->params["4"] = captured_params.at("padding").ai[1]; + op->params["14"] = captured_params.at("padding").ai[0]; + op->params["18"] = captured_params.at("output_padding").ai[1]; + op->params["19"] = captured_params.at("output_padding").ai[0]; + op->params["5"] = 1; + op->params["6"] = (int)(weight_shape[0] * weight_shape[1] * weight_shape[2] * weight_shape[3]); + op->params["28"] = 1; // dynamic weight + } +}; + +REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_conv_transpose2d_5, 22) + +class F_conv_transpose2d_6 : public GraphRewriterPass +{ +public: + const char* match_pattern_graph() const + { + return R"PNNXIR(7767517 +4 3 +pnnx.Input input 0 1 input +pnnx.Input weight 0 1 weight +F.conv_transpose2d op_0 2 1 input weight out bias=None stride=%stride output_padding=%output_padding padding=%padding dilation=%dilation groups=%groups +pnnx.Output output 1 0 out +)PNNXIR"; + } + + const char* type_str() const + { + return "DeconvolutionDepthWise"; + } + + const char* name_str() const + { + return "deconvdw2d"; + } + + void write(Operator* op, const std::map& captured_params, const std::map& /*captured_attrs*/) const + { + std::vector weight_shape = op->inputs[1]->shape; + if (weight_shape.empty()) + { + weight_shape = {0, 0, 0, 0}; + } + + op->params["0"] = weight_shape[1] * captured_params.at("groups").i; + op->params["1"] = weight_shape[3]; + op->params["11"] = weight_shape[2]; + op->params["2"] = captured_params.at("dilation").ai[1]; + op->params["12"] = captured_params.at("dilation").ai[0]; + op->params["3"] = captured_params.at("stride").ai[1]; + op->params["13"] = captured_params.at("stride").ai[0]; + op->params["4"] = captured_params.at("padding").ai[1]; + op->params["14"] = captured_params.at("padding").ai[0]; + op->params["18"] = captured_params.at("output_padding").ai[1]; + op->params["19"] = captured_params.at("output_padding").ai[0]; + op->params["5"] = 0; + op->params["6"] = (int)(weight_shape[0] * weight_shape[1] * weight_shape[2] * weight_shape[3]); + op->params["7"] = captured_params.at("groups"); + op->params["28"] = 1; // dynamic weight + } +}; + +REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_conv_transpose2d_6, 23) + +class F_conv_transpose2d_7 : public GraphRewriterPass +{ +public: + const char* match_pattern_graph() const + { + return R"PNNXIR(7767517 +5 4 +pnnx.Input input 0 1 input +pnnx.Input weight 0 1 weight +pnnx.Input bias 0 1 bias +F.conv_transpose2d op_0 3 1 input weight bias out stride=%stride output_padding=%output_padding padding=%padding dilation=%dilation groups=%groups +pnnx.Output output 1 0 out +)PNNXIR"; + } + + const char* type_str() const + { + return "DeconvolutionDepthWise"; + } + + const char* name_str() const + { + return "deconvdw2d"; + } + + void write(Operator* op, const std::map& captured_params, const std::map& /*captured_attrs*/) const + { + std::vector weight_shape = op->inputs[1]->shape; + if (weight_shape.empty()) + { + weight_shape = {0, 0, 0, 0}; + } + + op->params["0"] = weight_shape[1] * captured_params.at("groups").i; + op->params["1"] = weight_shape[3]; + op->params["11"] = weight_shape[2]; + op->params["2"] = captured_params.at("dilation").ai[1]; + op->params["12"] = captured_params.at("dilation").ai[0]; + op->params["3"] = captured_params.at("stride").ai[1]; + op->params["13"] = captured_params.at("stride").ai[0]; + op->params["4"] = captured_params.at("padding").ai[1]; + op->params["14"] = captured_params.at("padding").ai[0]; + op->params["18"] = captured_params.at("output_padding").ai[1]; + op->params["19"] = captured_params.at("output_padding").ai[0]; + op->params["5"] = 1; + op->params["6"] = (int)(weight_shape[0] * weight_shape[1] * weight_shape[2] * weight_shape[3]); + op->params["7"] = captured_params.at("groups"); + op->params["28"] = 1; // dynamic weight + } +}; + +REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_conv_transpose2d_7, 23) + } // namespace ncnn } // namespace pnnx diff --git a/tools/pnnx/tests/ncnn/test_F_conv_transpose1d.py b/tools/pnnx/tests/ncnn/test_F_conv_transpose1d.py index 479a2d6daef..ff23f77ab27 100644 --- a/tools/pnnx/tests/ncnn/test_F_conv_transpose1d.py +++ b/tools/pnnx/tests/ncnn/test_F_conv_transpose1d.py @@ -24,33 +24,40 @@ def __init__(self): self.b2 = nn.Parameter(torch.rand(12)) self.w3 = nn.Parameter(torch.rand(12, 2, 3)) - def forward(self, y): + def forward(self, x, w0, w1, b1, y): + x = F.conv_transpose1d(x, w0, None, stride=2, padding=1, output_padding=1) + x = F.conv_transpose1d(x, w1, b1, stride=1, padding=2, dilation=2, groups=2) + y = F.conv_transpose1d(y, self.w2, self.b2, stride=2, padding=1, output_padding=1) y = F.conv_transpose1d(y, self.w3, None, stride=1, padding=2, dilation=2, groups=3) - return y + return x, y def test(): net = Model().half().float() net.eval() torch.manual_seed(0) + x = torch.rand(1, 12, 22) + w0 = torch.rand(12, 16, 3) + w1 = torch.rand(16, 8, 5) + b1 = torch.rand(16) y = torch.rand(1, 6, 5) - a = net(y) + a0, a1 = net(x, w0, w1, b1, y) # export torchscript - mod = torch.jit.trace(net, y) + mod = torch.jit.trace(net, (x, w0, w1, b1, y)) mod.save("test_F_conv_transpose1d.pt") # torchscript to pnnx import os - os.system("../../src/pnnx test_F_conv_transpose1d.pt inputshape=[1,6,5]") + os.system("../../src/pnnx test_F_conv_transpose1d.pt inputshape=[1,12,22],[12,16,3],[16,8,5],[16],[1,6,5]") # ncnn inference import test_F_conv_transpose1d_ncnn - b = test_F_conv_transpose1d_ncnn.test_inference() + b0, b1 = test_F_conv_transpose1d_ncnn.test_inference() - return torch.allclose(a, b, 1e-4, 1e-4) + return torch.allclose(a0, b0, 1e-4, 1e-4) and torch.allclose(a1, b1, 1e-4, 1e-4) if __name__ == "__main__": if test(): diff --git a/tools/pnnx/tests/ncnn/test_F_conv_transpose2d.py b/tools/pnnx/tests/ncnn/test_F_conv_transpose2d.py index 7dc2e1e2690..5913f07ab02 100644 --- a/tools/pnnx/tests/ncnn/test_F_conv_transpose2d.py +++ b/tools/pnnx/tests/ncnn/test_F_conv_transpose2d.py @@ -24,33 +24,40 @@ def __init__(self): self.b2 = nn.Parameter(torch.rand(12)) self.w3 = nn.Parameter(torch.rand(12, 2, 3, 3)) - def forward(self, y): + def forward(self, x, w0, w1, b1, y): + x = F.conv_transpose2d(x, w0, None, stride=(2,2), padding=(1,1), output_padding=(1,1)) + x = F.conv_transpose2d(x, w1, b1, stride=(1,2), padding=(2,1), dilation=(2,1), groups=2) + y = F.conv_transpose2d(y, self.w2, self.b2, stride=(2,2), padding=(1,1), output_padding=(1,1)) y = F.conv_transpose2d(y, self.w3, None, stride=(1,2), padding=(2,1), dilation=(2,1), groups=3) - return y + return x, y def test(): net = Model().half().float() net.eval() torch.manual_seed(0) + x = torch.rand(1, 12, 22, 32) + w0 = torch.rand(12, 16, 3, 3) + w1 = torch.rand(16, 8, 5, 5) + b1 = torch.rand(16) y = torch.rand(1, 6, 5, 6) - a = net(y) + a0, a1 = net(x, w0, w1, b1, y) # export torchscript - mod = torch.jit.trace(net, y) + mod = torch.jit.trace(net, (x, w0, w1, b1, y)) mod.save("test_F_conv_transpose2d.pt") # torchscript to pnnx import os - os.system("../../src/pnnx test_F_conv_transpose2d.pt inputshape=[1,6,5,6]") + os.system("../../src/pnnx test_F_conv_transpose2d.pt inputshape=[1,12,22,32],[12,16,3,3],[16,8,5,5],[16],[1,6,5,6]") # ncnn inference import test_F_conv_transpose2d_ncnn - b = test_F_conv_transpose2d_ncnn.test_inference() + b0, b1 = test_F_conv_transpose2d_ncnn.test_inference() - return torch.allclose(a, b, 1e-4, 1e-4) + return torch.allclose(a0, b0, 1e-4, 1e-4) and torch.allclose(a1, b1, 1e-4, 1e-4) if __name__ == "__main__": if test():