diff --git a/docs/developer-guide/operators.md b/docs/developer-guide/operators.md
index 11d41295f91..c0255375b38 100644
--- a/docs/developer-guide/operators.md
+++ b/docs/developer-guide/operators.md
@@ -529,6 +529,7 @@ y = activation(x3, act_type, act_params)
 | 19        | output_pad_bottom| int | output_pad_right |           |
 | 20        | output_w      | int   | 0         |                   |
 | 21        | output_h      | int   | output_w  |                   |
+| 28        | dynamic_weight| int   | 0         |                   |
 
 | weight        | type  | shape                 |
 | ------------- | ----- | --------------------- |
@@ -558,6 +559,7 @@ y = activation(x3, act_type, act_params)
 | 15        | pad_right     | int   | pad_left  |                   |
 | 18        | output_pad_right| int | 0         |                   |
 | 20        | output_w      | int   | 0         |                   |
+| 28        | dynamic_weight| int   | 0         |                   |
 
 | weight        | type  | shape                 |
 | ------------- | ----- | --------------------- |
@@ -638,6 +640,7 @@ y = activation(x3, act_type, act_params)
 | 19        | output_pad_bottom| int | output_pad_right |           |
 | 20        | output_w      | int   | 0         |                   |
 | 21        | output_h      | int   | output_w  |                   |
+| 28        | dynamic_weight| int   | 0         |                   |
 
 | weight        | type  | shape                 |
 | ------------- | ----- | --------------------- |
@@ -668,6 +671,7 @@ y = activation(x3, act_type, act_params)
 | 15        | pad_right     | int   | pad_left  |                   |
 | 18        | output_pad_right| int | 0         |                   |
 | 20        | output_w      | int   | 0         |                   |
+| 28        | dynamic_weight| int   | 0         |                   |
 
 | weight        | type  | shape                 |
 | ------------- | ----- | --------------------- |
diff --git a/src/layer/arm/convolution_arm.cpp b/src/layer/arm/convolution_arm.cpp
index 849a8daea6b..4e424c86c91 100644
--- a/src/layer/arm/convolution_arm.cpp
+++ b/src/layer/arm/convolution_arm.cpp
@@ -806,9 +806,9 @@ int Convolution_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<M
     pd.set(1, _kernel_w);
     pd.set(11, _kernel_h);
     pd.set(2, dilation_w);
-    pd.set(21, dilation_h);
+    pd.set(12, dilation_h);
     pd.set(3, stride_w);
-    pd.set(31, stride_h);
+    pd.set(13, stride_h);
     pd.set(4, pad_left);
     pd.set(15, pad_right);
     pd.set(14, pad_top);
diff --git a/src/layer/arm/deconvolution_arm.cpp b/src/layer/arm/deconvolution_arm.cpp
index 7744624b1c6..ef8a56f9931 100644
--- a/src/layer/arm/deconvolution_arm.cpp
+++ b/src/layer/arm/deconvolution_arm.cpp
@@ -49,6 +49,9 @@ Deconvolution_arm::Deconvolution_arm()
 
 int Deconvolution_arm::create_pipeline(const Option& opt)
 {
+    if (dynamic_weight)
+        return 0;
+
     activation = create_activation_layer(activation_type, activation_params, opt);
 
 #if NCNN_ARM82
@@ -750,6 +753,146 @@ int Deconvolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opti
     return 0;
 }
 
+int Deconvolution_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
+{
+    const Mat& bottom_blob = bottom_blobs[0];
+    const Mat& _weight_data = bottom_blobs[1];
+    Mat& top_blob = top_blobs[0];
+
+    const int _num_input = bottom_blob.c * bottom_blob.elempack;
+    const int _kernel_w = _weight_data.w;
+    const int _kernel_h = _weight_data.h;
+    const int _num_output = _weight_data.d * 1;
+
+    Mat weight_data_flattened;
+    flatten(_weight_data, weight_data_flattened, opt);
+    if (weight_data_flattened.empty())
+        return -100;
+
+#if NCNN_ARM82
+    if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && weight_data_flattened.elembits() == 16)
+    {
+        Mat weight_data_flattened_fp32;
+        cast_float16_to_float32(weight_data_flattened, weight_data_flattened_fp32, opt);
+        weight_data_flattened = weight_data_flattened_fp32;
+    }
+#endif // NCNN_ARM82
+#if NCNN_BF16
+    if (opt.use_bf16_storage && weight_data_flattened.elembits() == 16)
+    {
+        Mat weight_data_flattened_fp32;
+        cast_bfloat16_to_float32(weight_data_flattened, weight_data_flattened_fp32, opt);
+        weight_data_flattened = weight_data_flattened_fp32;
+    }
+#endif // NCNN_BF16
+
+    // weight_data_flattened as pack1
+    weight_data_flattened.w *= weight_data_flattened.elempack;
+    weight_data_flattened.elemsize /= weight_data_flattened.elempack;
+    weight_data_flattened.elempack = 1;
+
+    // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw
+    Mat weight_data_transposed;
+    {
+        weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / 1, 4u, opt.workspace_allocator);
+        if (weight_data_transposed.empty())
+            return -100;
+
+        const int outch_g = _num_output / 1;
+        const int inch_g = _num_input / 1;
+        const int maxk = _kernel_h * _kernel_w;
+
+        for (int g = 0; g < 1; g++)
+        {
+            // reorder weight from inch-outch to outch-inch
+            float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
+            const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
+            for (int i = 0; i < outch_g; i++)
+            {
+                for (int j = 0; j < inch_g; j++)
+                {
+                    for (int k = 0; k < maxk; k++)
+                    {
+                        wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
+                    }
+                }
+            }
+        }
+    }
+
+    Mat bias_data_flattened;
+    if (bias_term)
+    {
+        const Mat& _bias_data = bottom_blobs[2];
+        flatten(_bias_data, bias_data_flattened, opt);
+        if (bias_data_flattened.empty())
+            return -100;
+
+#if NCNN_ARM82
+        if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && bias_data_flattened.elembits() == 16)
+        {
+            Mat bias_data_flattened_fp32;
+            cast_float16_to_float32(bias_data_flattened, bias_data_flattened_fp32, opt);
+            bias_data_flattened = bias_data_flattened_fp32;
+        }
+#endif // NCNN_ARM82
+#if NCNN_BF16
+        if (opt.use_bf16_storage && bias_data_flattened.elembits() == 16)
+        {
+            Mat bias_data_flattened_fp32;
+            cast_bfloat16_to_float32(bias_data_flattened, bias_data_flattened_fp32, opt);
+            bias_data_flattened = bias_data_flattened_fp32;
+        }
+#endif // NCNN_BF16
+
+        // bias_data_flattened as pack1
+        bias_data_flattened.w *= bias_data_flattened.elempack;
+        bias_data_flattened.elemsize /= bias_data_flattened.elempack;
+        bias_data_flattened.elempack = 1;
+    }
+
+    ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Deconvolution);
+
+    ncnn::ParamDict pd;
+    pd.set(0, _num_output);
+    pd.set(1, _kernel_w);
+    pd.set(11, _kernel_h);
+    pd.set(2, dilation_w);
+    pd.set(12, dilation_h);
+    pd.set(3, stride_w);
+    pd.set(13, stride_h);
+    pd.set(4, pad_left);
+    pd.set(15, pad_right);
+    pd.set(14, pad_top);
+    pd.set(16, pad_bottom);
+    pd.set(18, output_pad_right);
+    pd.set(19, output_pad_bottom);
+    pd.set(20, output_w);
+    pd.set(21, output_h);
+    pd.set(5, bias_term);
+    pd.set(6, weight_data_transposed.w);
+    pd.set(9, activation_type);
+    pd.set(10, activation_params);
+
+    op->load_param(pd);
+
+    ncnn::Mat weights[2];
+    weights[0] = weight_data_transposed;
+    weights[1] = bias_data_flattened;
+
+    op->load_model(ncnn::ModelBinFromMatArray(weights));
+
+    op->create_pipeline(opt);
+
+    op->forward(bottom_blob, top_blob, opt);
+
+    op->destroy_pipeline(opt);
+
+    delete op;
+
+    return 0;
+}
+
 #if NCNN_BF16
 int Deconvolution_arm::create_pipeline_bf16s(const Option& opt)
 {
@@ -1167,28 +1310,7 @@ int Deconvolution_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, cons
                             kptr += maxk;
                         }
 
-                        if (activation_type == 1)
-                        {
-                            sum = std::max(sum, 0.f);
-                        }
-                        else if (activation_type == 2)
-                        {
-                            float slope = activation_params[0];
-                            sum = sum > 0.f ? sum : sum * slope;
-                        }
-                        else if (activation_type == 3)
-                        {
-                            float min = activation_params[0];
-                            float max = activation_params[1];
-                            if (sum < min)
-                                sum = min;
-                            if (sum > max)
-                                sum = max;
-                        }
-                        else if (activation_type == 4)
-                        {
-                            sum = 1.f / (1.f + expf(-sum));
-                        }
+                        sum = activation_ss(sum, activation_type, activation_params);
 
                         outptr[j] = float32_to_bfloat16(sum);
                     }
diff --git a/src/layer/arm/deconvolution_arm.h b/src/layer/arm/deconvolution_arm.h
index bbc1e6faa53..3c7979687cb 100644
--- a/src/layer/arm/deconvolution_arm.h
+++ b/src/layer/arm/deconvolution_arm.h
@@ -29,6 +29,8 @@ class Deconvolution_arm : virtual public Deconvolution
 
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
+    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+
 protected:
 #if NCNN_ARM82
     int create_pipeline_fp16s(const Option& opt);
diff --git a/src/layer/arm/deconvolutiondepthwise_arm.cpp b/src/layer/arm/deconvolutiondepthwise_arm.cpp
index dd3b4f8bd0e..478bd1740dc 100644
--- a/src/layer/arm/deconvolutiondepthwise_arm.cpp
+++ b/src/layer/arm/deconvolutiondepthwise_arm.cpp
@@ -43,6 +43,9 @@ DeconvolutionDepthWise_arm::DeconvolutionDepthWise_arm()
 
 int DeconvolutionDepthWise_arm::create_pipeline(const Option& opt)
 {
+    if (dynamic_weight)
+        return 0;
+
 #if NCNN_ARM82
     if (support_fp16_storage && opt.use_fp16_storage)
     {
@@ -386,28 +389,7 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c
                             }
                         }
 
-                        if (activation_type == 1)
-                        {
-                            sum = std::max(sum, 0.f);
-                        }
-                        else if (activation_type == 2)
-                        {
-                            float slope = activation_params[0];
-                            sum = sum > 0.f ? sum : sum * slope;
-                        }
-                        else if (activation_type == 3)
-                        {
-                            float min = activation_params[0];
-                            float max = activation_params[1];
-                            if (sum < min)
-                                sum = min;
-                            if (sum > max)
-                                sum = max;
-                        }
-                        else if (activation_type == 4)
-                        {
-                            sum = 1.f / (1.f + expf(-sum));
-                        }
+                        sum = activation_ss(sum, activation_type, activation_params);
 
                         outptr[j] = sum;
                     }
@@ -482,6 +464,147 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c
     return 0;
 }
 
+int DeconvolutionDepthWise_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
+{
+    const Mat& bottom_blob = bottom_blobs[0];
+    const Mat& _weight_data = bottom_blobs[1];
+    Mat& top_blob = top_blobs[0];
+
+    const int _num_input = bottom_blob.c * bottom_blob.elempack;
+    const int _kernel_w = _weight_data.w;
+    const int _kernel_h = _weight_data.h;
+    const int _num_output = _weight_data.d * group;
+
+    Mat weight_data_flattened;
+    flatten(_weight_data, weight_data_flattened, opt);
+    if (weight_data_flattened.empty())
+        return -100;
+
+#if NCNN_ARM82
+    if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && weight_data_flattened.elembits() == 16)
+    {
+        Mat weight_data_flattened_fp32;
+        cast_float16_to_float32(weight_data_flattened, weight_data_flattened_fp32, opt);
+        weight_data_flattened = weight_data_flattened_fp32;
+    }
+#endif // NCNN_ARM82
+#if NCNN_BF16
+    if (opt.use_bf16_storage && weight_data_flattened.elembits() == 16)
+    {
+        Mat weight_data_flattened_fp32;
+        cast_bfloat16_to_float32(weight_data_flattened, weight_data_flattened_fp32, opt);
+        weight_data_flattened = weight_data_flattened_fp32;
+    }
+#endif // NCNN_BF16
+
+    // weight_data_flattened as pack1
+    weight_data_flattened.w *= weight_data_flattened.elempack;
+    weight_data_flattened.elemsize /= weight_data_flattened.elempack;
+    weight_data_flattened.elempack = 1;
+
+    // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw
+    Mat weight_data_transposed;
+    {
+        weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / group, 4u, opt.workspace_allocator);
+        if (weight_data_transposed.empty())
+            return -100;
+
+        const int outch_g = _num_output / group;
+        const int inch_g = _num_input / group;
+        const int maxk = _kernel_h * _kernel_w;
+
+        for (int g = 0; g < group; g++)
+        {
+            // reorder weight from inch-outch to outch-inch
+            float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
+            const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
+            for (int i = 0; i < outch_g; i++)
+            {
+                for (int j = 0; j < inch_g; j++)
+                {
+                    for (int k = 0; k < maxk; k++)
+                    {
+                        wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
+                    }
+                }
+            }
+        }
+    }
+
+    Mat bias_data_flattened;
+    if (bias_term)
+    {
+        const Mat& _bias_data = bottom_blobs[2];
+        flatten(_bias_data, bias_data_flattened, opt);
+        if (bias_data_flattened.empty())
+            return -100;
+
+#if NCNN_ARM82
+        if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && bias_data_flattened.elembits() == 16)
+        {
+            Mat bias_data_flattened_fp32;
+            cast_float16_to_float32(bias_data_flattened, bias_data_flattened_fp32, opt);
+            bias_data_flattened = bias_data_flattened_fp32;
+        }
+#endif // NCNN_ARM82
+#if NCNN_BF16
+        if (opt.use_bf16_storage && bias_data_flattened.elembits() == 16)
+        {
+            Mat bias_data_flattened_fp32;
+            cast_bfloat16_to_float32(bias_data_flattened, bias_data_flattened_fp32, opt);
+            bias_data_flattened = bias_data_flattened_fp32;
+        }
+#endif // NCNN_BF16
+
+        // bias_data_flattened as pack1
+        bias_data_flattened.w *= bias_data_flattened.elempack;
+        bias_data_flattened.elemsize /= bias_data_flattened.elempack;
+        bias_data_flattened.elempack = 1;
+    }
+
+    ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::DeconvolutionDepthWise);
+
+    ncnn::ParamDict pd;
+    pd.set(0, _num_output);
+    pd.set(1, _kernel_w);
+    pd.set(11, _kernel_h);
+    pd.set(2, dilation_w);
+    pd.set(12, dilation_h);
+    pd.set(3, stride_w);
+    pd.set(13, stride_h);
+    pd.set(4, pad_left);
+    pd.set(15, pad_right);
+    pd.set(14, pad_top);
+    pd.set(16, pad_bottom);
+    pd.set(18, output_pad_right);
+    pd.set(19, output_pad_bottom);
+    pd.set(20, output_w);
+    pd.set(21, output_h);
+    pd.set(5, bias_term);
+    pd.set(6, weight_data_transposed.w);
+    pd.set(7, group);
+    pd.set(9, activation_type);
+    pd.set(10, activation_params);
+
+    op->load_param(pd);
+
+    ncnn::Mat weights[2];
+    weights[0] = weight_data_transposed;
+    weights[1] = bias_data_flattened;
+
+    op->load_model(ncnn::ModelBinFromMatArray(weights));
+
+    op->create_pipeline(opt);
+
+    op->forward(bottom_blob, top_blob, opt);
+
+    op->destroy_pipeline(opt);
+
+    delete op;
+
+    return 0;
+}
+
 #if NCNN_BF16
 int DeconvolutionDepthWise_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
@@ -639,28 +762,7 @@ int DeconvolutionDepthWise_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_b
                             }
                         }
 
-                        if (activation_type == 1)
-                        {
-                            sum = std::max(sum, 0.f);
-                        }
-                        else if (activation_type == 2)
-                        {
-                            float slope = activation_params[0];
-                            sum = sum > 0.f ? sum : sum * slope;
-                        }
-                        else if (activation_type == 3)
-                        {
-                            float min = activation_params[0];
-                            float max = activation_params[1];
-                            if (sum < min)
-                                sum = min;
-                            if (sum > max)
-                                sum = max;
-                        }
-                        else if (activation_type == 4)
-                        {
-                            sum = 1.f / (1.f + expf(-sum));
-                        }
+                        sum = activation_ss(sum, activation_type, activation_params);
 
                         outptr[j] = float32_to_bfloat16(sum);
                     }
diff --git a/src/layer/arm/deconvolutiondepthwise_arm.h b/src/layer/arm/deconvolutiondepthwise_arm.h
index 2022d2a54cb..6eff45ede3a 100644
--- a/src/layer/arm/deconvolutiondepthwise_arm.h
+++ b/src/layer/arm/deconvolutiondepthwise_arm.h
@@ -29,6 +29,8 @@ class DeconvolutionDepthWise_arm : virtual public DeconvolutionDepthWise
 
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
+    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+
 protected:
 #if NCNN_ARM82
     int create_pipeline_fp16s(const Option& opt);
diff --git a/src/layer/deconvolution.cpp b/src/layer/deconvolution.cpp
index 411395f961e..438e2bf7282 100644
--- a/src/layer/deconvolution.cpp
+++ b/src/layer/deconvolution.cpp
@@ -46,11 +46,21 @@ int Deconvolution::load_param(const ParamDict& pd)
     activation_type = pd.get(9, 0);
     activation_params = pd.get(10, Mat());
 
+    dynamic_weight = pd.get(28, 0);
+
+    if (dynamic_weight)
+    {
+        one_blob_only = false;
+    }
+
     return 0;
 }
 
 int Deconvolution::load_model(const ModelBin& mb)
 {
+    if (dynamic_weight)
+        return 0;
+
     weight_data = mb.load(weight_data_size, 0);
     if (weight_data.empty())
         return -100;
@@ -180,6 +190,93 @@ int Deconvolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
     return 0;
 }
 
+int Deconvolution::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
+{
+    const Mat& bottom_blob = bottom_blobs[0];
+    const Mat& _weight_data = bottom_blobs[1];
+    Mat& top_blob = top_blobs[0];
+
+    const int _num_input = bottom_blob.c;
+    const int _kernel_w = _weight_data.w;
+    const int _kernel_h = _weight_data.h;
+    const int _num_output = _weight_data.d * 1;
+
+    Mat weight_data_flattened;
+    flatten(_weight_data, weight_data_flattened, opt);
+    if (weight_data_flattened.empty())
+        return -100;
+
+    // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw
+    Mat weight_data_transposed;
+    {
+        weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / 1, 4u, opt.workspace_allocator);
+        if (weight_data_transposed.empty())
+            return -100;
+
+        const int outch_g = _num_output / 1;
+        const int inch_g = _num_input / 1;
+        const int maxk = _kernel_h * _kernel_w;
+
+        for (int g = 0; g < 1; g++)
+        {
+            // reorder weight from inch-outch to outch-inch
+            float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
+            const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
+            for (int i = 0; i < outch_g; i++)
+            {
+                for (int j = 0; j < inch_g; j++)
+                {
+                    for (int k = 0; k < maxk; k++)
+                    {
+                        wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
+                    }
+                }
+            }
+        }
+    }
+
+    Mat bias_data_flattened;
+    if (bias_term)
+    {
+        const Mat& _bias_data = bottom_blobs[2];
+        flatten(_bias_data, bias_data_flattened, opt);
+        if (bias_data_flattened.empty())
+            return -100;
+    }
+
+    const int w = bottom_blob.w;
+    const int h = bottom_blob.h;
+
+    const int kernel_extent_w = dilation_w * (_kernel_w - 1) + 1;
+    const int kernel_extent_h = dilation_h * (_kernel_h - 1) + 1;
+
+    int outw = (w - 1) * stride_w + kernel_extent_w + output_pad_right;
+    int outh = (h - 1) * stride_h + kernel_extent_h + output_pad_bottom;
+
+    Mat top_blob_bordered;
+    if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || (output_w > 0 && output_h > 0))
+    {
+        top_blob_bordered.create(outw, outh, _num_output, 4u, opt.workspace_allocator);
+    }
+    else
+    {
+        top_blob_bordered = top_blob;
+        top_blob_bordered.create(outw, outh, _num_output, 4u, opt.blob_allocator);
+    }
+    if (top_blob_bordered.empty())
+        return -100;
+
+    int ret = deconvolution(bottom_blob, top_blob_bordered, weight_data_transposed, bias_data_flattened, _kernel_w, _kernel_h, stride_w, stride_h, dilation_w, dilation_h, activation_type, activation_params, opt);
+    if (ret != 0)
+        return ret;
+
+    cut_padding(top_blob_bordered, top_blob, opt);
+    if (top_blob.empty())
+        return -100;
+
+    return 0;
+}
+
 void Deconvolution::cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const
 {
     if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
diff --git a/src/layer/deconvolution.h b/src/layer/deconvolution.h
index b0025a63ef4..ece5e1e5010 100644
--- a/src/layer/deconvolution.h
+++ b/src/layer/deconvolution.h
@@ -30,6 +30,8 @@ class Deconvolution : public Layer
 
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
+    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+
 protected:
     void cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const;
 
@@ -58,6 +60,8 @@ class Deconvolution : public Layer
     int activation_type;
     Mat activation_params;
 
+    int dynamic_weight;
+
     // model
     Mat weight_data;
     Mat bias_data;
diff --git a/src/layer/deconvolution1d.cpp b/src/layer/deconvolution1d.cpp
index 50cb5600950..0dd7ae47028 100644
--- a/src/layer/deconvolution1d.cpp
+++ b/src/layer/deconvolution1d.cpp
@@ -39,11 +39,21 @@ int Deconvolution1D::load_param(const ParamDict& pd)
     activation_type = pd.get(9, 0);
     activation_params = pd.get(10, Mat());
 
+    dynamic_weight = pd.get(28, 0);
+
+    if (dynamic_weight)
+    {
+        one_blob_only = false;
+    }
+
     return 0;
 }
 
 int Deconvolution1D::load_model(const ModelBin& mb)
 {
+    if (dynamic_weight)
+        return 0;
+
     weight_data = mb.load(weight_data_size, 0);
     if (weight_data.empty())
         return -100;
@@ -143,6 +153,89 @@ int Deconvolution1D::forward(const Mat& bottom_blob, Mat& top_blob, const Option
     return 0;
 }
 
+int Deconvolution1D::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
+{
+    const Mat& bottom_blob = bottom_blobs[0];
+    const Mat& _weight_data = bottom_blobs[1];
+    Mat& top_blob = top_blobs[0];
+
+    const int _num_input = bottom_blob.h;
+    const int _kernel_w = _weight_data.w;
+    const int _num_output = _weight_data.h * 1;
+
+    Mat weight_data_flattened;
+    flatten(_weight_data, weight_data_flattened, opt);
+    if (weight_data_flattened.empty())
+        return -100;
+
+    // transpose group-inch/group-outch/group-kw to group-outch/group-inch/group-kw
+    Mat weight_data_transposed;
+    {
+        weight_data_transposed.create(_kernel_w * _num_output * _num_input / 1, 4u, opt.workspace_allocator);
+        if (weight_data_transposed.empty())
+            return -100;
+
+        const int outch_g = _num_output / 1;
+        const int inch_g = _num_input / 1;
+        const int maxk = _kernel_w;
+
+        for (int g = 0; g < 1; g++)
+        {
+            // reorder weight from inch-outch to outch-inch
+            float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
+            const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
+            for (int i = 0; i < outch_g; i++)
+            {
+                for (int j = 0; j < inch_g; j++)
+                {
+                    for (int k = 0; k < maxk; k++)
+                    {
+                        wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
+                    }
+                }
+            }
+        }
+    }
+
+    Mat bias_data_flattened;
+    if (bias_term)
+    {
+        const Mat& _bias_data = bottom_blobs[2];
+        flatten(_bias_data, bias_data_flattened, opt);
+        if (bias_data_flattened.empty())
+            return -100;
+    }
+
+    const int w = bottom_blob.w;
+
+    const int kernel_extent_w = dilation_w * (_kernel_w - 1) + 1;
+
+    int outw = (w - 1) * stride_w + kernel_extent_w + output_pad_right;
+
+    Mat top_blob_bordered;
+    if (pad_left > 0 || pad_right > 0 || output_w > 0)
+    {
+        top_blob_bordered.create(outw, _num_output, 4u, opt.workspace_allocator);
+    }
+    else
+    {
+        top_blob_bordered = top_blob;
+        top_blob_bordered.create(outw, _num_output, 4u, opt.blob_allocator);
+    }
+    if (top_blob_bordered.empty())
+        return -100;
+
+    int ret = deconvolution1d(bottom_blob, top_blob_bordered, weight_data_transposed, bias_data_flattened, _kernel_w, stride_w, dilation_w, activation_type, activation_params, opt);
+    if (ret != 0)
+        return ret;
+
+    cut_padding(top_blob_bordered, top_blob, opt);
+    if (top_blob.empty())
+        return -100;
+
+    return 0;
+}
+
 void Deconvolution1D::cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const
 {
     if (pad_left > 0 || pad_right > 0)
diff --git a/src/layer/deconvolution1d.h b/src/layer/deconvolution1d.h
index 4f681beee90..c3b64dd561f 100644
--- a/src/layer/deconvolution1d.h
+++ b/src/layer/deconvolution1d.h
@@ -30,6 +30,8 @@ class Deconvolution1D : public Layer
 
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
+    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+
 protected:
     void cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const;
 
@@ -50,6 +52,8 @@ class Deconvolution1D : public Layer
     int activation_type;
     Mat activation_params;
 
+    int dynamic_weight;
+
     // model
     Mat weight_data;
     Mat bias_data;
diff --git a/src/layer/deconvolutiondepthwise.cpp b/src/layer/deconvolutiondepthwise.cpp
index cd0ef36ea19..462ade2a21f 100644
--- a/src/layer/deconvolutiondepthwise.cpp
+++ b/src/layer/deconvolutiondepthwise.cpp
@@ -47,11 +47,21 @@ int DeconvolutionDepthWise::load_param(const ParamDict& pd)
     activation_type = pd.get(9, 0);
     activation_params = pd.get(10, Mat());
 
+    dynamic_weight = pd.get(28, 0);
+
+    if (dynamic_weight)
+    {
+        one_blob_only = false;
+    }
+
     return 0;
 }
 
 int DeconvolutionDepthWise::load_model(const ModelBin& mb)
 {
+    if (dynamic_weight)
+        return 0;
+
     weight_data = mb.load(weight_data_size, 0);
     if (weight_data.empty())
         return -100;
@@ -243,6 +253,93 @@ int DeconvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const
     return 0;
 }
 
+int DeconvolutionDepthWise::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
+{
+    const Mat& bottom_blob = bottom_blobs[0];
+    const Mat& _weight_data = bottom_blobs[1];
+    Mat& top_blob = top_blobs[0];
+
+    const int _num_input = bottom_blob.c;
+    const int _kernel_w = _weight_data.w;
+    const int _kernel_h = _weight_data.h;
+    const int _num_output = _weight_data.d * group;
+
+    Mat weight_data_flattened;
+    flatten(_weight_data, weight_data_flattened, opt);
+    if (weight_data_flattened.empty())
+        return -100;
+
+    // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw
+    Mat weight_data_transposed;
+    {
+        weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / group, 4u, opt.workspace_allocator);
+        if (weight_data_transposed.empty())
+            return -100;
+
+        const int outch_g = _num_output / group;
+        const int inch_g = _num_input / group;
+        const int maxk = _kernel_h * _kernel_w;
+
+        for (int g = 0; g < group; g++)
+        {
+            // reorder weight from inch-outch to outch-inch
+            float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
+            const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
+            for (int i = 0; i < outch_g; i++)
+            {
+                for (int j = 0; j < inch_g; j++)
+                {
+                    for (int k = 0; k < maxk; k++)
+                    {
+                        wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
+                    }
+                }
+            }
+        }
+    }
+
+    Mat bias_data_flattened;
+    if (bias_term)
+    {
+        const Mat& _bias_data = bottom_blobs[2];
+        flatten(_bias_data, bias_data_flattened, opt);
+        if (bias_data_flattened.empty())
+            return -100;
+    }
+
+    const int w = bottom_blob.w;
+    const int h = bottom_blob.h;
+
+    const int kernel_extent_w = dilation_w * (_kernel_w - 1) + 1;
+    const int kernel_extent_h = dilation_h * (_kernel_h - 1) + 1;
+
+    int outw = (w - 1) * stride_w + kernel_extent_w + output_pad_right;
+    int outh = (h - 1) * stride_h + kernel_extent_h + output_pad_bottom;
+
+    Mat top_blob_bordered;
+    if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || (output_w > 0 && output_h > 0))
+    {
+        top_blob_bordered.create(outw, outh, _num_output, 4u, opt.workspace_allocator);
+    }
+    else
+    {
+        top_blob_bordered = top_blob;
+        top_blob_bordered.create(outw, outh, _num_output, 4u, opt.blob_allocator);
+    }
+    if (top_blob_bordered.empty())
+        return -100;
+
+    int ret = deconvolutiondepthwise(bottom_blob, top_blob_bordered, weight_data_transposed, bias_data_flattened, _kernel_w, _kernel_h, stride_w, stride_h, dilation_w, dilation_h, group, activation_type, activation_params, opt);
+    if (ret != 0)
+        return ret;
+
+    cut_padding(top_blob_bordered, top_blob, opt);
+    if (top_blob.empty())
+        return -100;
+
+    return 0;
+}
+
 void DeconvolutionDepthWise::cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const
 {
     if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
diff --git a/src/layer/deconvolutiondepthwise.h b/src/layer/deconvolutiondepthwise.h
index 7d99f0064a5..8e6507d6434 100644
--- a/src/layer/deconvolutiondepthwise.h
+++ b/src/layer/deconvolutiondepthwise.h
@@ -30,6 +30,8 @@ class DeconvolutionDepthWise : public Layer
 
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
+    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+
 protected:
     void cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const;
 
@@ -59,6 +61,8 @@ class DeconvolutionDepthWise : public Layer
     int activation_type;
     Mat activation_params;
 
+    int dynamic_weight;
+
     // model
     Mat weight_data;
     Mat bias_data;
diff --git a/src/layer/deconvolutiondepthwise1d.cpp b/src/layer/deconvolutiondepthwise1d.cpp
index 15b6c88539b..955795eb788 100644
--- a/src/layer/deconvolutiondepthwise1d.cpp
+++ b/src/layer/deconvolutiondepthwise1d.cpp
@@ -40,11 +40,21 @@ int DeconvolutionDepthWise1D::load_param(const ParamDict& pd)
     activation_type = pd.get(9, 0);
     activation_params = pd.get(10, Mat());
 
+    dynamic_weight = pd.get(28, 0);
+
+    if (dynamic_weight)
+    {
+        one_blob_only = false;
+    }
+
     return 0;
 }
 
 int DeconvolutionDepthWise1D::load_model(const ModelBin& mb)
 {
+    if (dynamic_weight)
+        return 0;
+
     weight_data = mb.load(weight_data_size, 0);
     if (weight_data.empty())
         return -100;
@@ -195,6 +205,89 @@ int DeconvolutionDepthWise1D::forward(const Mat& bottom_blob, Mat& top_blob, con
     return 0;
 }
 
+int DeconvolutionDepthWise1D::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
+{
+    const Mat& bottom_blob = bottom_blobs[0];
+    const Mat& _weight_data = bottom_blobs[1];
+    Mat& top_blob = top_blobs[0];
+
+    const int _num_input = bottom_blob.h;
+    const int _kernel_w = _weight_data.w;
+    const int _num_output = _weight_data.h * group;
+
+    Mat weight_data_flattened;
+    flatten(_weight_data, weight_data_flattened, opt);
+    if (weight_data_flattened.empty())
+        return -100;
+
+    // transpose group-inch/group-outch/group-kw to group-outch/group-inch/group-kw
+    Mat weight_data_transposed;
+    {
+        weight_data_transposed.create(_kernel_w * _num_output * _num_input / group, 4u, opt.workspace_allocator);
+        if (weight_data_transposed.empty())
+            return -100;
+
+        const int outch_g = _num_output / group;
+        const int inch_g = _num_input / group;
+        const int maxk = _kernel_w;
+
+        for (int g = 0; g < group; g++)
+        {
+            // reorder weight from inch-outch to outch-inch
+            float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
+            const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
+            for (int i = 0; i < outch_g; i++)
+            {
+                for (int j = 0; j < inch_g; j++)
+                {
+                    for (int k = 0; k < maxk; k++)
+                    {
+                        wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
+                    }
+                }
+            }
+        }
+    }
+
+    Mat bias_data_flattened;
+    if (bias_term)
+    {
+        const Mat& _bias_data = bottom_blobs[2];
+        flatten(_bias_data, bias_data_flattened, opt);
+        if (bias_data_flattened.empty())
+            return -100;
+    }
+
+    const int w = bottom_blob.w;
+
+    const int kernel_extent_w = dilation_w * (_kernel_w - 1) + 1;
+
+    int outw = (w - 1) * stride_w + kernel_extent_w + output_pad_right;
+
+    Mat top_blob_bordered;
+    if (pad_left > 0 || pad_right > 0 || output_w > 0)
+    {
+        top_blob_bordered.create(outw, _num_output, 4u, opt.workspace_allocator);
+    }
+    else
+    {
+        top_blob_bordered = top_blob;
+        top_blob_bordered.create(outw, _num_output, 4u, opt.blob_allocator);
+    }
+    if (top_blob_bordered.empty())
+        return -100;
+
+    int ret = deconvolutiondepthwise1d(bottom_blob, top_blob_bordered, weight_data_transposed, bias_data_flattened, _kernel_w, stride_w, dilation_w, group, activation_type, activation_params, opt);
+    if (ret != 0)
+        return ret;
+
+    cut_padding(top_blob_bordered, top_blob, opt);
+    if (top_blob.empty())
+        return -100;
+
+    return 0;
+}
+
 void DeconvolutionDepthWise1D::cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const
 {
     if (pad_left > 0 || pad_right > 0)
diff --git a/src/layer/deconvolutiondepthwise1d.h b/src/layer/deconvolutiondepthwise1d.h
index 50332f11fc6..1ed1513d256 100644
--- a/src/layer/deconvolutiondepthwise1d.h
+++ b/src/layer/deconvolutiondepthwise1d.h
@@ -30,6 +30,8 @@ class DeconvolutionDepthWise1D : public Layer
 
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
+    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+
 protected:
     void cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const;
 
@@ -51,6 +53,8 @@ class DeconvolutionDepthWise1D : public Layer
     int activation_type;
     Mat activation_params;
 
+    int dynamic_weight;
+
     // model
     Mat weight_data;
     Mat bias_data;
diff --git a/src/layer/loongarch/convolution_loongarch.cpp b/src/layer/loongarch/convolution_loongarch.cpp
index 31719b3de92..7816d1c66d2 100644
--- a/src/layer/loongarch/convolution_loongarch.cpp
+++ b/src/layer/loongarch/convolution_loongarch.cpp
@@ -600,9 +600,9 @@ int Convolution_loongarch::forward(const std::vector<Mat>& bottom_blobs, std::ve
     pd.set(1, _kernel_w);
     pd.set(11, _kernel_h);
     pd.set(2, dilation_w);
-    pd.set(21, dilation_h);
+    pd.set(12, dilation_h);
     pd.set(3, stride_w);
-    pd.set(31, stride_h);
+    pd.set(13, stride_h);
     pd.set(4, pad_left);
     pd.set(15, pad_right);
     pd.set(14, pad_top);
diff --git a/src/layer/loongarch/deconvolution_loongarch.cpp b/src/layer/loongarch/deconvolution_loongarch.cpp
index bb913909b55..2d934bccb06 100644
--- a/src/layer/loongarch/deconvolution_loongarch.cpp
+++ b/src/layer/loongarch/deconvolution_loongarch.cpp
@@ -40,6 +40,9 @@ Deconvolution_loongarch::Deconvolution_loongarch()
 
 int Deconvolution_loongarch::create_pipeline(const Option& opt)
 {
+    if (dynamic_weight)
+        return 0;
+
     const int maxk = kernel_w * kernel_h;
     int num_input = weight_data_size / maxk / num_output;
 
@@ -281,4 +284,110 @@ int Deconvolution_loongarch::forward(const Mat& bottom_blob, Mat& top_blob, cons
     return 0;
 }
 
+int Deconvolution_loongarch::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
+{
+    const Mat& bottom_blob = bottom_blobs[0];
+    const Mat& _weight_data = bottom_blobs[1];
+    Mat& top_blob = top_blobs[0];
+
+    const int _num_input = bottom_blob.c * bottom_blob.elempack;
+    const int _kernel_w = _weight_data.w;
+    const int _kernel_h = _weight_data.h;
+    const int _num_output = _weight_data.d * 1;
+
+    Mat weight_data_flattened;
+    flatten(_weight_data, weight_data_flattened, opt);
+    if (weight_data_flattened.empty())
+        return -100;
+
+    // weight_data_flattened as pack1
+    weight_data_flattened.w *= weight_data_flattened.elempack;
+    weight_data_flattened.elemsize /= weight_data_flattened.elempack;
+    weight_data_flattened.elempack = 1;
+
+    // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw
+    Mat weight_data_transposed;
+    {
+        weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / 1, 4u, opt.workspace_allocator);
+        if (weight_data_transposed.empty())
+            return -100;
+
+        const int outch_g = _num_output / 1;
+        const int inch_g = _num_input / 1;
+        const int maxk = _kernel_h * _kernel_w;
+
+        for (int g = 0; g < 1; g++)
+        {
+            // reorder weight from inch-outch to outch-inch
+            float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
+            const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
+            for (int i = 0; i < outch_g; i++)
+            {
+                for (int j = 0; j < inch_g; j++)
+                {
+                    for (int k = 0; k < maxk; k++)
+                    {
+                        wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
+                    }
+                }
+            }
+        }
+    }
+
+    Mat bias_data_flattened;
+    if (bias_term)
+    {
+        const Mat& _bias_data = bottom_blobs[2];
+        flatten(_bias_data, bias_data_flattened, opt);
+        if (bias_data_flattened.empty())
+            return -100;
+
+        // bias_data_flattened as pack1
+        bias_data_flattened.w *= bias_data_flattened.elempack;
+        bias_data_flattened.elemsize /= bias_data_flattened.elempack;
+        bias_data_flattened.elempack = 1;
+    }
+
+    ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Deconvolution);
+
+    ncnn::ParamDict pd;
+    pd.set(0, _num_output);
+    pd.set(1, _kernel_w);
+    pd.set(11, _kernel_h);
+    pd.set(2, dilation_w);
+    pd.set(12, dilation_h);
+    pd.set(3, stride_w);
+    pd.set(13, stride_h);
+    pd.set(4, pad_left);
+    pd.set(15, pad_right);
+    pd.set(14, pad_top);
+    pd.set(16, pad_bottom);
+    pd.set(18, output_pad_right);
+    pd.set(19, output_pad_bottom);
+    pd.set(20, output_w);
+    pd.set(21, output_h);
+    pd.set(5, bias_term);
+    pd.set(6, weight_data_transposed.w);
+    pd.set(9, activation_type);
+    pd.set(10, activation_params);
+
+    op->load_param(pd);
+
+    ncnn::Mat weights[2];
+    weights[0] = weight_data_transposed;
+    weights[1] = bias_data_flattened;
+
+    op->load_model(ncnn::ModelBinFromMatArray(weights));
+
+    op->create_pipeline(opt);
+
+    op->forward(bottom_blob, top_blob, opt);
+
+    op->destroy_pipeline(opt);
+
+    delete op;
+
+    return 0;
+}
+
 } // namespace ncnn
diff --git a/src/layer/loongarch/deconvolution_loongarch.h b/src/layer/loongarch/deconvolution_loongarch.h
index bb7653b563f..f67b5d7e4e1 100644
--- a/src/layer/loongarch/deconvolution_loongarch.h
+++ b/src/layer/loongarch/deconvolution_loongarch.h
@@ -29,6 +29,8 @@ class Deconvolution_loongarch : virtual public Deconvolution
 
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
+    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+
 public:
     Mat weight_data_tm;
 };
diff --git a/src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp b/src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp
index a141dd70360..f4f4d37bf7f 100644
--- a/src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp
+++ b/src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp
@@ -34,6 +34,9 @@ DeconvolutionDepthWise_loongarch::DeconvolutionDepthWise_loongarch()
 
 int DeconvolutionDepthWise_loongarch::create_pipeline(const Option& opt)
 {
+    if (dynamic_weight)
+        return 0;
+
     const int maxk = kernel_w * kernel_h;
     int channels = (weight_data_size / group) / maxk / (num_output / group) * group;
 
@@ -409,4 +412,111 @@ int DeconvolutionDepthWise_loongarch::forward(const Mat& bottom_blob, Mat& top_b
     return 0;
 }
 
+int DeconvolutionDepthWise_loongarch::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
+{
+    const Mat& bottom_blob = bottom_blobs[0];
+    const Mat& _weight_data = bottom_blobs[1];
+    Mat& top_blob = top_blobs[0];
+
+    const int _num_input = bottom_blob.c * bottom_blob.elempack;
+    const int _kernel_w = _weight_data.w;
+    const int _kernel_h = _weight_data.h;
+    const int _num_output = _weight_data.d * group;
+
+    Mat weight_data_flattened;
+    flatten(_weight_data, weight_data_flattened, opt);
+    if (weight_data_flattened.empty())
+        return -100;
+
+    // weight_data_flattened as pack1
+    weight_data_flattened.w *= weight_data_flattened.elempack;
+    weight_data_flattened.elemsize /= weight_data_flattened.elempack;
+    weight_data_flattened.elempack = 1;
+
+    // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw
+    Mat weight_data_transposed;
+    {
+        weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / group, 4u, opt.workspace_allocator);
+        if (weight_data_transposed.empty())
+            return -100;
+
+        const int outch_g = _num_output / group;
+        const int inch_g = _num_input / group;
+        const int maxk = _kernel_h * _kernel_w;
+
+        for (int g = 0; g < group; g++)
+        {
+            // reorder weight from inch-outch to outch-inch
+            float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
+            const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
+            for (int i = 0; i < outch_g; i++)
+            {
+                for (int j = 0; j < inch_g; j++)
+                {
+                    for (int k = 0; k < maxk; k++)
+                    {
+                        wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
+                    }
+                }
+            }
+        }
+    }
+
+    Mat bias_data_flattened;
+    if (bias_term)
+    {
+        const Mat& _bias_data = bottom_blobs[2];
+        flatten(_bias_data, bias_data_flattened, opt);
+        if (bias_data_flattened.empty())
+            return -100;
+
+        // bias_data_flattened as pack1
+        bias_data_flattened.w *= bias_data_flattened.elempack;
+        bias_data_flattened.elemsize /= bias_data_flattened.elempack;
+        bias_data_flattened.elempack = 1;
+    }
+
+    ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::DeconvolutionDepthWise);
+
+    ncnn::ParamDict pd;
+    pd.set(0, _num_output);
+    pd.set(1, _kernel_w);
+    pd.set(11, _kernel_h);
+    pd.set(2, dilation_w);
+    pd.set(12, dilation_h);
+    pd.set(3, stride_w);
+    pd.set(13, stride_h);
+    pd.set(4, pad_left);
+    pd.set(15, pad_right);
+    pd.set(14, pad_top);
+    pd.set(16, pad_bottom);
+    pd.set(18, output_pad_right);
+    pd.set(19, output_pad_bottom);
+    pd.set(20, output_w);
+    pd.set(21, output_h);
+    pd.set(5, bias_term);
+    pd.set(6, weight_data_transposed.w);
+    pd.set(7, group);
+    pd.set(9, activation_type);
+    pd.set(10, activation_params);
+
+    op->load_param(pd);
+
+    ncnn::Mat weights[2];
+    weights[0] = weight_data_transposed;
+    weights[1] = bias_data_flattened;
+
+    op->load_model(ncnn::ModelBinFromMatArray(weights));
+
+    op->create_pipeline(opt);
+
+    op->forward(bottom_blob, top_blob, opt);
+
+    op->destroy_pipeline(opt);
+
+    delete op;
+
+    return 0;
+}
+
 } // namespace ncnn
diff --git a/src/layer/loongarch/deconvolutiondepthwise_loongarch.h b/src/layer/loongarch/deconvolutiondepthwise_loongarch.h
index e41e7cac9e1..b710f07ecf3 100644
--- a/src/layer/loongarch/deconvolutiondepthwise_loongarch.h
+++ b/src/layer/loongarch/deconvolutiondepthwise_loongarch.h
@@ -29,6 +29,8 @@ class DeconvolutionDepthWise_loongarch : virtual public DeconvolutionDepthWise
 
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
+    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+
 protected:
     int create_group_ops(const Option& opt);
 
diff --git a/src/layer/mips/convolution_mips.cpp b/src/layer/mips/convolution_mips.cpp
index 5dd648fa3df..bc547e4a667 100644
--- a/src/layer/mips/convolution_mips.cpp
+++ b/src/layer/mips/convolution_mips.cpp
@@ -600,9 +600,9 @@ int Convolution_mips::forward(const std::vector<Mat>& bottom_blobs, std::vector<
     pd.set(1, _kernel_w);
     pd.set(11, _kernel_h);
     pd.set(2, dilation_w);
-    pd.set(21, dilation_h);
+    pd.set(12, dilation_h);
     pd.set(3, stride_w);
-    pd.set(31, stride_h);
+    pd.set(13, stride_h);
     pd.set(4, pad_left);
     pd.set(15, pad_right);
     pd.set(14, pad_top);
diff --git a/src/layer/mips/deconvolution_mips.cpp b/src/layer/mips/deconvolution_mips.cpp
index c3838f3ea69..506d3072096 100644
--- a/src/layer/mips/deconvolution_mips.cpp
+++ b/src/layer/mips/deconvolution_mips.cpp
@@ -40,6 +40,9 @@ Deconvolution_mips::Deconvolution_mips()
 
 int Deconvolution_mips::create_pipeline(const Option& opt)
 {
+    if (dynamic_weight)
+        return 0;
+
     const int maxk = kernel_w * kernel_h;
     int num_input = weight_data_size / maxk / num_output;
 
@@ -281,4 +284,110 @@ int Deconvolution_mips::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
     return 0;
 }
 
+int Deconvolution_mips::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
+{
+    const Mat& bottom_blob = bottom_blobs[0];
+    const Mat& _weight_data = bottom_blobs[1];
+    Mat& top_blob = top_blobs[0];
+
+    const int _num_input = bottom_blob.c * bottom_blob.elempack;
+    const int _kernel_w = _weight_data.w;
+    const int _kernel_h = _weight_data.h;
+    const int _num_output = _weight_data.d * 1;
+
+    Mat weight_data_flattened;
+    flatten(_weight_data, weight_data_flattened, opt);
+    if (weight_data_flattened.empty())
+        return -100;
+
+    // weight_data_flattened as pack1
+    weight_data_flattened.w *= weight_data_flattened.elempack;
+    weight_data_flattened.elemsize /= weight_data_flattened.elempack;
+    weight_data_flattened.elempack = 1;
+
+    // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw
+    Mat weight_data_transposed;
+    {
+        weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / 1, 4u, opt.workspace_allocator);
+        if (weight_data_transposed.empty())
+            return -100;
+
+        const int outch_g = _num_output / 1;
+        const int inch_g = _num_input / 1;
+        const int maxk = _kernel_h * _kernel_w;
+
+        for (int g = 0; g < 1; g++)
+        {
+            // reorder weight from inch-outch to outch-inch
+            float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
+            const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
+            for (int i = 0; i < outch_g; i++)
+            {
+                for (int j = 0; j < inch_g; j++)
+                {
+                    for (int k = 0; k < maxk; k++)
+                    {
+                        wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
+                    }
+                }
+            }
+        }
+    }
+
+    Mat bias_data_flattened;
+    if (bias_term)
+    {
+        const Mat& _bias_data = bottom_blobs[2];
+        flatten(_bias_data, bias_data_flattened, opt);
+        if (bias_data_flattened.empty())
+            return -100;
+
+        // bias_data_flattened as pack1
+        bias_data_flattened.w *= bias_data_flattened.elempack;
+        bias_data_flattened.elemsize /= bias_data_flattened.elempack;
+        bias_data_flattened.elempack = 1;
+    }
+
+    ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Deconvolution);
+
+    ncnn::ParamDict pd;
+    pd.set(0, _num_output);
+    pd.set(1, _kernel_w);
+    pd.set(11, _kernel_h);
+    pd.set(2, dilation_w);
+    pd.set(12, dilation_h);
+    pd.set(3, stride_w);
+    pd.set(13, stride_h);
+    pd.set(4, pad_left);
+    pd.set(15, pad_right);
+    pd.set(14, pad_top);
+    pd.set(16, pad_bottom);
+    pd.set(18, output_pad_right);
+    pd.set(19, output_pad_bottom);
+    pd.set(20, output_w);
+    pd.set(21, output_h);
+    pd.set(5, bias_term);
+    pd.set(6, weight_data_transposed.w);
+    pd.set(9, activation_type);
+    pd.set(10, activation_params);
+
+    op->load_param(pd);
+
+    ncnn::Mat weights[2];
+    weights[0] = weight_data_transposed;
+    weights[1] = bias_data_flattened;
+
+    op->load_model(ncnn::ModelBinFromMatArray(weights));
+
+    op->create_pipeline(opt);
+
+    op->forward(bottom_blob, top_blob, opt);
+
+    op->destroy_pipeline(opt);
+
+    delete op;
+
+    return 0;
+}
+
 } // namespace ncnn
diff --git a/src/layer/mips/deconvolution_mips.h b/src/layer/mips/deconvolution_mips.h
index 4d5e1ad985a..218bd812672 100644
--- a/src/layer/mips/deconvolution_mips.h
+++ b/src/layer/mips/deconvolution_mips.h
@@ -29,6 +29,8 @@ class Deconvolution_mips : virtual public Deconvolution
 
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
+    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+
 public:
     Mat weight_data_tm;
 };
diff --git a/src/layer/mips/deconvolutiondepthwise_mips.cpp b/src/layer/mips/deconvolutiondepthwise_mips.cpp
index da76b5801cd..533bf522ad9 100644
--- a/src/layer/mips/deconvolutiondepthwise_mips.cpp
+++ b/src/layer/mips/deconvolutiondepthwise_mips.cpp
@@ -34,6 +34,9 @@ DeconvolutionDepthWise_mips::DeconvolutionDepthWise_mips()
 
 int DeconvolutionDepthWise_mips::create_pipeline(const Option& opt)
 {
+    if (dynamic_weight)
+        return 0;
+
     const int maxk = kernel_w * kernel_h;
     int channels = (weight_data_size / group) / maxk / (num_output / group) * group;
 
@@ -409,4 +412,111 @@ int DeconvolutionDepthWise_mips::forward(const Mat& bottom_blob, Mat& top_blob,
     return 0;
 }
 
+int DeconvolutionDepthWise_mips::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
+{
+    const Mat& bottom_blob = bottom_blobs[0];
+    const Mat& _weight_data = bottom_blobs[1];
+    Mat& top_blob = top_blobs[0];
+
+    const int _num_input = bottom_blob.c * bottom_blob.elempack;
+    const int _kernel_w = _weight_data.w;
+    const int _kernel_h = _weight_data.h;
+    const int _num_output = _weight_data.d * group;
+
+    Mat weight_data_flattened;
+    flatten(_weight_data, weight_data_flattened, opt);
+    if (weight_data_flattened.empty())
+        return -100;
+
+    // weight_data_flattened as pack1
+    weight_data_flattened.w *= weight_data_flattened.elempack;
+    weight_data_flattened.elemsize /= weight_data_flattened.elempack;
+    weight_data_flattened.elempack = 1;
+
+    // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw
+    Mat weight_data_transposed;
+    {
+        weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / group, 4u, opt.workspace_allocator);
+        if (weight_data_transposed.empty())
+            return -100;
+
+        const int outch_g = _num_output / group;
+        const int inch_g = _num_input / group;
+        const int maxk = _kernel_h * _kernel_w;
+
+        for (int g = 0; g < group; g++)
+        {
+            // reorder weight from inch-outch to outch-inch
+            float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
+            const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
+            for (int i = 0; i < outch_g; i++)
+            {
+                for (int j = 0; j < inch_g; j++)
+                {
+                    for (int k = 0; k < maxk; k++)
+                    {
+                        wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
+                    }
+                }
+            }
+        }
+    }
+
+    Mat bias_data_flattened;
+    if (bias_term)
+    {
+        const Mat& _bias_data = bottom_blobs[2];
+        flatten(_bias_data, bias_data_flattened, opt);
+        if (bias_data_flattened.empty())
+            return -100;
+
+        // bias_data_flattened as pack1
+        bias_data_flattened.w *= bias_data_flattened.elempack;
+        bias_data_flattened.elemsize /= bias_data_flattened.elempack;
+        bias_data_flattened.elempack = 1;
+    }
+
+    ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::DeconvolutionDepthWise);
+
+    ncnn::ParamDict pd;
+    pd.set(0, _num_output);
+    pd.set(1, _kernel_w);
+    pd.set(11, _kernel_h);
+    pd.set(2, dilation_w);
+    pd.set(12, dilation_h);
+    pd.set(3, stride_w);
+    pd.set(13, stride_h);
+    pd.set(4, pad_left);
+    pd.set(15, pad_right);
+    pd.set(14, pad_top);
+    pd.set(16, pad_bottom);
+    pd.set(18, output_pad_right);
+    pd.set(19, output_pad_bottom);
+    pd.set(20, output_w);
+    pd.set(21, output_h);
+    pd.set(5, bias_term);
+    pd.set(6, weight_data_transposed.w);
+    pd.set(7, group);
+    pd.set(9, activation_type);
+    pd.set(10, activation_params);
+
+    op->load_param(pd);
+
+    ncnn::Mat weights[2];
+    weights[0] = weight_data_transposed;
+    weights[1] = bias_data_flattened;
+
+    op->load_model(ncnn::ModelBinFromMatArray(weights));
+
+    op->create_pipeline(opt);
+
+    op->forward(bottom_blob, top_blob, opt);
+
+    op->destroy_pipeline(opt);
+
+    delete op;
+
+    return 0;
+}
+
 } // namespace ncnn
diff --git a/src/layer/mips/deconvolutiondepthwise_mips.h b/src/layer/mips/deconvolutiondepthwise_mips.h
index 90cb7c3acbb..a033d7c11c3 100644
--- a/src/layer/mips/deconvolutiondepthwise_mips.h
+++ b/src/layer/mips/deconvolutiondepthwise_mips.h
@@ -29,6 +29,8 @@ class DeconvolutionDepthWise_mips : virtual public DeconvolutionDepthWise
 
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
+    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+
 protected:
     int create_group_ops(const Option& opt);
 
diff --git a/src/layer/riscv/convolution_riscv.cpp b/src/layer/riscv/convolution_riscv.cpp
index c66279e2c58..4c4d57c6a57 100644
--- a/src/layer/riscv/convolution_riscv.cpp
+++ b/src/layer/riscv/convolution_riscv.cpp
@@ -684,9 +684,9 @@ int Convolution_riscv::forward(const std::vector<Mat>& bottom_blobs, std::vector
     pd.set(1, _kernel_w);
     pd.set(11, _kernel_h);
     pd.set(2, dilation_w);
-    pd.set(21, dilation_h);
+    pd.set(12, dilation_h);
     pd.set(3, stride_w);
-    pd.set(31, stride_h);
+    pd.set(13, stride_h);
     pd.set(4, pad_left);
     pd.set(15, pad_right);
     pd.set(14, pad_top);
diff --git a/src/layer/riscv/deconvolution_riscv.cpp b/src/layer/riscv/deconvolution_riscv.cpp
index 936b563f646..9202d367f93 100644
--- a/src/layer/riscv/deconvolution_riscv.cpp
+++ b/src/layer/riscv/deconvolution_riscv.cpp
@@ -14,6 +14,7 @@
 
 #include "deconvolution_riscv.h"
 
+#include "cpu.h"
 #include "layer_type.h"
 
 #if __riscv_vector
@@ -50,6 +51,9 @@ Deconvolution_riscv::Deconvolution_riscv()
 
 int Deconvolution_riscv::create_pipeline(const Option& opt)
 {
+    if (dynamic_weight)
+        return 0;
+
 #if __riscv_vector && __riscv_zfh
     if (opt.use_fp16_storage)
     {
@@ -318,6 +322,130 @@ int Deconvolution_riscv::forward(const Mat& bottom_blob, Mat& top_blob, const Op
     return 0;
 }
 
+int Deconvolution_riscv::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
+{
+    const Mat& bottom_blob = bottom_blobs[0];
+    const Mat& _weight_data = bottom_blobs[1];
+    Mat& top_blob = top_blobs[0];
+
+    const int _num_input = bottom_blob.c * bottom_blob.elempack;
+    const int _kernel_w = _weight_data.w;
+    const int _kernel_h = _weight_data.h;
+    const int _num_output = _weight_data.d * 1;
+
+    Mat weight_data_flattened;
+    flatten(_weight_data, weight_data_flattened, opt);
+    if (weight_data_flattened.empty())
+        return -100;
+
+#if NCNN_RVV
+    if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && weight_data_flattened.elembits() == 16)
+    {
+        Mat weight_data_flattened_fp32;
+        cast_float16_to_float32(weight_data_flattened, weight_data_flattened_fp32, opt);
+        weight_data_flattened = weight_data_flattened_fp32;
+    }
+#endif // NCNN_RVV
+
+    // weight_data_flattened as pack1
+    weight_data_flattened.w *= weight_data_flattened.elempack;
+    weight_data_flattened.elemsize /= weight_data_flattened.elempack;
+    weight_data_flattened.elempack = 1;
+
+    // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw
+    Mat weight_data_transposed;
+    {
+        weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / 1, 4u, opt.workspace_allocator);
+        if (weight_data_transposed.empty())
+            return -100;
+
+        const int outch_g = _num_output / 1;
+        const int inch_g = _num_input / 1;
+        const int maxk = _kernel_h * _kernel_w;
+
+        for (int g = 0; g < 1; g++)
+        {
+            // reorder weight from inch-outch to outch-inch
+            float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
+            const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
+            for (int i = 0; i < outch_g; i++)
+            {
+                for (int j = 0; j < inch_g; j++)
+                {
+                    for (int k = 0; k < maxk; k++)
+                    {
+                        wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
+                    }
+                }
+            }
+        }
+    }
+
+    Mat bias_data_flattened;
+    if (bias_term)
+    {
+        const Mat& _bias_data = bottom_blobs[2];
+        flatten(_bias_data, bias_data_flattened, opt);
+        if (bias_data_flattened.empty())
+            return -100;
+
+#if NCNN_RVV
+        if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && bias_data_flattened.elembits() == 16)
+        {
+            Mat bias_data_flattened_fp32;
+            cast_float16_to_float32(bias_data_flattened, bias_data_flattened_fp32, opt);
+            bias_data_flattened = bias_data_flattened_fp32;
+        }
+#endif // NCNN_RVV
+
+        // bias_data_flattened as pack1
+        bias_data_flattened.w *= bias_data_flattened.elempack;
+        bias_data_flattened.elemsize /= bias_data_flattened.elempack;
+        bias_data_flattened.elempack = 1;
+    }
+
+    ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Deconvolution);
+
+    ncnn::ParamDict pd;
+    pd.set(0, _num_output);
+    pd.set(1, _kernel_w);
+    pd.set(11, _kernel_h);
+    pd.set(2, dilation_w);
+    pd.set(12, dilation_h);
+    pd.set(3, stride_w);
+    pd.set(13, stride_h);
+    pd.set(4, pad_left);
+    pd.set(15, pad_right);
+    pd.set(14, pad_top);
+    pd.set(16, pad_bottom);
+    pd.set(18, output_pad_right);
+    pd.set(19, output_pad_bottom);
+    pd.set(20, output_w);
+    pd.set(21, output_h);
+    pd.set(5, bias_term);
+    pd.set(6, weight_data_transposed.w);
+    pd.set(9, activation_type);
+    pd.set(10, activation_params);
+
+    op->load_param(pd);
+
+    ncnn::Mat weights[2];
+    weights[0] = weight_data_transposed;
+    weights[1] = bias_data_flattened;
+
+    op->load_model(ncnn::ModelBinFromMatArray(weights));
+
+    op->create_pipeline(opt);
+
+    op->forward(bottom_blob, top_blob, opt);
+
+    op->destroy_pipeline(opt);
+
+    delete op;
+
+    return 0;
+}
+
 #if __riscv_vector && __riscv_zfh
 int Deconvolution_riscv::create_pipeline_fp16s(const Option& opt)
 {
diff --git a/src/layer/riscv/deconvolution_riscv.h b/src/layer/riscv/deconvolution_riscv.h
index 3574c09d10e..903a420427a 100644
--- a/src/layer/riscv/deconvolution_riscv.h
+++ b/src/layer/riscv/deconvolution_riscv.h
@@ -29,6 +29,8 @@ class Deconvolution_riscv : virtual public Deconvolution
 
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
+    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+
 protected:
 #if __riscv_vector && __riscv_zfh
     int create_pipeline_fp16s(const Option& opt);
diff --git a/src/layer/riscv/deconvolutiondepthwise_riscv.cpp b/src/layer/riscv/deconvolutiondepthwise_riscv.cpp
index b53e8962fd2..eee765c4ea6 100644
--- a/src/layer/riscv/deconvolutiondepthwise_riscv.cpp
+++ b/src/layer/riscv/deconvolutiondepthwise_riscv.cpp
@@ -14,6 +14,7 @@
 
 #include "deconvolutiondepthwise_riscv.h"
 
+#include "cpu.h"
 #include "layer_type.h"
 
 #if __riscv_vector
@@ -37,6 +38,9 @@ DeconvolutionDepthWise_riscv::DeconvolutionDepthWise_riscv()
 
 int DeconvolutionDepthWise_riscv::create_pipeline(const Option& opt)
 {
+    if (dynamic_weight)
+        return 0;
+
 #if __riscv_vector && __riscv_zfh
     if (opt.use_fp16_storage)
     {
@@ -445,6 +449,131 @@ int DeconvolutionDepthWise_riscv::forward(const Mat& bottom_blob, Mat& top_blob,
     return 0;
 }
 
+int DeconvolutionDepthWise_riscv::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
+{
+    const Mat& bottom_blob = bottom_blobs[0];
+    const Mat& _weight_data = bottom_blobs[1];
+    Mat& top_blob = top_blobs[0];
+
+    const int _num_input = bottom_blob.c * bottom_blob.elempack;
+    const int _kernel_w = _weight_data.w;
+    const int _kernel_h = _weight_data.h;
+    const int _num_output = _weight_data.d * group;
+
+    Mat weight_data_flattened;
+    flatten(_weight_data, weight_data_flattened, opt);
+    if (weight_data_flattened.empty())
+        return -100;
+
+#if NCNN_RVV
+    if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && weight_data_flattened.elembits() == 16)
+    {
+        Mat weight_data_flattened_fp32;
+        cast_float16_to_float32(weight_data_flattened, weight_data_flattened_fp32, opt);
+        weight_data_flattened = weight_data_flattened_fp32;
+    }
+#endif // NCNN_RVV
+
+    // weight_data_flattened as pack1
+    weight_data_flattened.w *= weight_data_flattened.elempack;
+    weight_data_flattened.elemsize /= weight_data_flattened.elempack;
+    weight_data_flattened.elempack = 1;
+
+    // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw
+    Mat weight_data_transposed;
+    {
+        weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / group, 4u, opt.workspace_allocator);
+        if (weight_data_transposed.empty())
+            return -100;
+
+        const int outch_g = _num_output / group;
+        const int inch_g = _num_input / group;
+        const int maxk = _kernel_h * _kernel_w;
+
+        for (int g = 0; g < group; g++)
+        {
+            // reorder weight from inch-outch to outch-inch
+            float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
+            const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
+            for (int i = 0; i < outch_g; i++)
+            {
+                for (int j = 0; j < inch_g; j++)
+                {
+                    for (int k = 0; k < maxk; k++)
+                    {
+                        wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
+                    }
+                }
+            }
+        }
+    }
+
+    Mat bias_data_flattened;
+    if (bias_term)
+    {
+        const Mat& _bias_data = bottom_blobs[2];
+        flatten(_bias_data, bias_data_flattened, opt);
+        if (bias_data_flattened.empty())
+            return -100;
+
+#if NCNN_RVV
+        if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && bias_data_flattened.elembits() == 16)
+        {
+            Mat bias_data_flattened_fp32;
+            cast_float16_to_float32(bias_data_flattened, bias_data_flattened_fp32, opt);
+            bias_data_flattened = bias_data_flattened_fp32;
+        }
+#endif // NCNN_RVV
+
+        // bias_data_flattened as pack1
+        bias_data_flattened.w *= bias_data_flattened.elempack;
+        bias_data_flattened.elemsize /= bias_data_flattened.elempack;
+        bias_data_flattened.elempack = 1;
+    }
+
+    ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::DeconvolutionDepthWise);
+
+    ncnn::ParamDict pd;
+    pd.set(0, _num_output);
+    pd.set(1, _kernel_w);
+    pd.set(11, _kernel_h);
+    pd.set(2, dilation_w);
+    pd.set(12, dilation_h);
+    pd.set(3, stride_w);
+    pd.set(13, stride_h);
+    pd.set(4, pad_left);
+    pd.set(15, pad_right);
+    pd.set(14, pad_top);
+    pd.set(16, pad_bottom);
+    pd.set(18, output_pad_right);
+    pd.set(19, output_pad_bottom);
+    pd.set(20, output_w);
+    pd.set(21, output_h);
+    pd.set(5, bias_term);
+    pd.set(6, weight_data_transposed.w);
+    pd.set(7, group);
+    pd.set(9, activation_type);
+    pd.set(10, activation_params);
+
+    op->load_param(pd);
+
+    ncnn::Mat weights[2];
+    weights[0] = weight_data_transposed;
+    weights[1] = bias_data_flattened;
+
+    op->load_model(ncnn::ModelBinFromMatArray(weights));
+
+    op->create_pipeline(opt);
+
+    op->forward(bottom_blob, top_blob, opt);
+
+    op->destroy_pipeline(opt);
+
+    delete op;
+
+    return 0;
+}
+
 #if __riscv_vector && __riscv_zfh
 int DeconvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt)
 {
diff --git a/src/layer/riscv/deconvolutiondepthwise_riscv.h b/src/layer/riscv/deconvolutiondepthwise_riscv.h
index ccda5f248e3..5cdbd0d0676 100644
--- a/src/layer/riscv/deconvolutiondepthwise_riscv.h
+++ b/src/layer/riscv/deconvolutiondepthwise_riscv.h
@@ -29,6 +29,8 @@ class DeconvolutionDepthWise_riscv : virtual public DeconvolutionDepthWise
 
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
+    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+
 protected:
     int create_group_ops(const Option& opt);
 #if __riscv_vector && __riscv_zfh
diff --git a/src/layer/vulkan/deconvolution_vulkan.cpp b/src/layer/vulkan/deconvolution_vulkan.cpp
index 30283d211c1..c53aedefc84 100644
--- a/src/layer/vulkan/deconvolution_vulkan.cpp
+++ b/src/layer/vulkan/deconvolution_vulkan.cpp
@@ -35,6 +35,13 @@ Deconvolution_vulkan::Deconvolution_vulkan()
 
 int Deconvolution_vulkan::create_pipeline(const Option& _opt)
 {
+    if (dynamic_weight)
+    {
+        support_vulkan = false;
+        support_image_storage = false;
+        return 0;
+    }
+
     Option opt = _opt;
     const Mat& shape = bottom_shapes.empty() ? Mat() : bottom_shapes[0];
     const Mat& out_shape = top_shapes.empty() ? Mat() : top_shapes[0];
diff --git a/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp b/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp
index ee9d949d303..b24418fa428 100644
--- a/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp
+++ b/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp
@@ -44,6 +44,13 @@ DeconvolutionDepthWise_vulkan::DeconvolutionDepthWise_vulkan()
 
 int DeconvolutionDepthWise_vulkan::create_pipeline(const Option& _opt)
 {
+    if (dynamic_weight)
+    {
+        support_vulkan = false;
+        support_image_storage = false;
+        return 0;
+    }
+
     Option opt = _opt;
     const Mat& shape = bottom_shapes.empty() ? Mat() : bottom_shapes[0];
     const Mat& out_shape = top_shapes.empty() ? Mat() : top_shapes[0];
diff --git a/src/layer/x86/convolution_x86.cpp b/src/layer/x86/convolution_x86.cpp
index 09008985f12..6e828ff0d21 100644
--- a/src/layer/x86/convolution_x86.cpp
+++ b/src/layer/x86/convolution_x86.cpp
@@ -1189,9 +1189,9 @@ int Convolution_x86::forward(const std::vector<Mat>& bottom_blobs, std::vector<M
     pd.set(1, _kernel_w);
     pd.set(11, _kernel_h);
     pd.set(2, dilation_w);
-    pd.set(21, dilation_h);
+    pd.set(12, dilation_h);
     pd.set(3, stride_w);
-    pd.set(31, stride_h);
+    pd.set(13, stride_h);
     pd.set(4, pad_left);
     pd.set(15, pad_right);
     pd.set(14, pad_top);
diff --git a/src/layer/x86/deconvolution_x86.cpp b/src/layer/x86/deconvolution_x86.cpp
index fa959dcafe1..46bdca2a397 100644
--- a/src/layer/x86/deconvolution_x86.cpp
+++ b/src/layer/x86/deconvolution_x86.cpp
@@ -64,6 +64,9 @@ Deconvolution_x86::Deconvolution_x86()
 
 int Deconvolution_x86::create_pipeline(const Option& opt)
 {
+    if (dynamic_weight)
+        return 0;
+
     activation = create_activation_layer(activation_type, activation_params, opt);
 
     const int maxk = kernel_w * kernel_h;
@@ -627,4 +630,110 @@ int Deconvolution_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opti
     return 0;
 }
 
+int Deconvolution_x86::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
+{
+    const Mat& bottom_blob = bottom_blobs[0];
+    const Mat& _weight_data = bottom_blobs[1];
+    Mat& top_blob = top_blobs[0];
+
+    const int _num_input = bottom_blob.c * bottom_blob.elempack;
+    const int _kernel_w = _weight_data.w;
+    const int _kernel_h = _weight_data.h;
+    const int _num_output = _weight_data.d * 1;
+
+    Mat weight_data_flattened;
+    flatten(_weight_data, weight_data_flattened, opt);
+    if (weight_data_flattened.empty())
+        return -100;
+
+    // weight_data_flattened as pack1
+    weight_data_flattened.w *= weight_data_flattened.elempack;
+    weight_data_flattened.elemsize /= weight_data_flattened.elempack;
+    weight_data_flattened.elempack = 1;
+
+    // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw
+    Mat weight_data_transposed;
+    {
+        weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / 1, 4u, opt.workspace_allocator);
+        if (weight_data_transposed.empty())
+            return -100;
+
+        const int outch_g = _num_output / 1;
+        const int inch_g = _num_input / 1;
+        const int maxk = _kernel_h * _kernel_w;
+
+        for (int g = 0; g < 1; g++)
+        {
+            // reorder weight from inch-outch to outch-inch
+            float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
+            const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
+            for (int i = 0; i < outch_g; i++)
+            {
+                for (int j = 0; j < inch_g; j++)
+                {
+                    for (int k = 0; k < maxk; k++)
+                    {
+                        wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
+                    }
+                }
+            }
+        }
+    }
+
+    Mat bias_data_flattened;
+    if (bias_term)
+    {
+        const Mat& _bias_data = bottom_blobs[2];
+        flatten(_bias_data, bias_data_flattened, opt);
+        if (bias_data_flattened.empty())
+            return -100;
+
+        // bias_data_flattened as pack1
+        bias_data_flattened.w *= bias_data_flattened.elempack;
+        bias_data_flattened.elemsize /= bias_data_flattened.elempack;
+        bias_data_flattened.elempack = 1;
+    }
+
+    ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Deconvolution);
+
+    ncnn::ParamDict pd;
+    pd.set(0, _num_output);
+    pd.set(1, _kernel_w);
+    pd.set(11, _kernel_h);
+    pd.set(2, dilation_w);
+    pd.set(12, dilation_h);
+    pd.set(3, stride_w);
+    pd.set(13, stride_h);
+    pd.set(4, pad_left);
+    pd.set(15, pad_right);
+    pd.set(14, pad_top);
+    pd.set(16, pad_bottom);
+    pd.set(18, output_pad_right);
+    pd.set(19, output_pad_bottom);
+    pd.set(20, output_w);
+    pd.set(21, output_h);
+    pd.set(5, bias_term);
+    pd.set(6, weight_data_transposed.w);
+    pd.set(9, activation_type);
+    pd.set(10, activation_params);
+
+    op->load_param(pd);
+
+    ncnn::Mat weights[2];
+    weights[0] = weight_data_transposed;
+    weights[1] = bias_data_flattened;
+
+    op->load_model(ncnn::ModelBinFromMatArray(weights));
+
+    op->create_pipeline(opt);
+
+    op->forward(bottom_blob, top_blob, opt);
+
+    op->destroy_pipeline(opt);
+
+    delete op;
+
+    return 0;
+}
+
 } // namespace ncnn
diff --git a/src/layer/x86/deconvolution_x86.h b/src/layer/x86/deconvolution_x86.h
index 2d620e279ea..4951870bcd0 100644
--- a/src/layer/x86/deconvolution_x86.h
+++ b/src/layer/x86/deconvolution_x86.h
@@ -29,6 +29,8 @@ class Deconvolution_x86 : virtual public Deconvolution
 
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
+    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+
 public:
     Layer* activation;
     Layer* gemm;
diff --git a/src/layer/x86/deconvolutiondepthwise_x86.cpp b/src/layer/x86/deconvolutiondepthwise_x86.cpp
index 03a24999190..43a573a64ef 100644
--- a/src/layer/x86/deconvolutiondepthwise_x86.cpp
+++ b/src/layer/x86/deconvolutiondepthwise_x86.cpp
@@ -37,6 +37,9 @@ DeconvolutionDepthWise_x86::DeconvolutionDepthWise_x86()
 
 int DeconvolutionDepthWise_x86::create_pipeline(const Option& opt)
 {
+    if (dynamic_weight)
+        return 0;
+
     const int maxk = kernel_w * kernel_h;
     int channels = (weight_data_size / group) / maxk / (num_output / group) * group;
 
@@ -574,4 +577,111 @@ int DeconvolutionDepthWise_x86::forward(const Mat& bottom_blob, Mat& top_blob, c
     return 0;
 }
 
+int DeconvolutionDepthWise_x86::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
+{
+    const Mat& bottom_blob = bottom_blobs[0];
+    const Mat& _weight_data = bottom_blobs[1];
+    Mat& top_blob = top_blobs[0];
+
+    const int _num_input = bottom_blob.c * bottom_blob.elempack;
+    const int _kernel_w = _weight_data.w;
+    const int _kernel_h = _weight_data.h;
+    const int _num_output = _weight_data.d * group;
+
+    Mat weight_data_flattened;
+    flatten(_weight_data, weight_data_flattened, opt);
+    if (weight_data_flattened.empty())
+        return -100;
+
+    // weight_data_flattened as pack1
+    weight_data_flattened.w *= weight_data_flattened.elempack;
+    weight_data_flattened.elemsize /= weight_data_flattened.elempack;
+    weight_data_flattened.elempack = 1;
+
+    // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw
+    Mat weight_data_transposed;
+    {
+        weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / group, 4u, opt.workspace_allocator);
+        if (weight_data_transposed.empty())
+            return -100;
+
+        const int outch_g = _num_output / group;
+        const int inch_g = _num_input / group;
+        const int maxk = _kernel_h * _kernel_w;
+
+        for (int g = 0; g < group; g++)
+        {
+            // reorder weight from inch-outch to outch-inch
+            float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
+            const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
+            for (int i = 0; i < outch_g; i++)
+            {
+                for (int j = 0; j < inch_g; j++)
+                {
+                    for (int k = 0; k < maxk; k++)
+                    {
+                        wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
+                    }
+                }
+            }
+        }
+    }
+
+    Mat bias_data_flattened;
+    if (bias_term)
+    {
+        const Mat& _bias_data = bottom_blobs[2];
+        flatten(_bias_data, bias_data_flattened, opt);
+        if (bias_data_flattened.empty())
+            return -100;
+
+        // bias_data_flattened as pack1
+        bias_data_flattened.w *= bias_data_flattened.elempack;
+        bias_data_flattened.elemsize /= bias_data_flattened.elempack;
+        bias_data_flattened.elempack = 1;
+    }
+
+    ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::DeconvolutionDepthWise);
+
+    ncnn::ParamDict pd;
+    pd.set(0, _num_output);
+    pd.set(1, _kernel_w);
+    pd.set(11, _kernel_h);
+    pd.set(2, dilation_w);
+    pd.set(12, dilation_h);
+    pd.set(3, stride_w);
+    pd.set(13, stride_h);
+    pd.set(4, pad_left);
+    pd.set(15, pad_right);
+    pd.set(14, pad_top);
+    pd.set(16, pad_bottom);
+    pd.set(18, output_pad_right);
+    pd.set(19, output_pad_bottom);
+    pd.set(20, output_w);
+    pd.set(21, output_h);
+    pd.set(5, bias_term);
+    pd.set(6, weight_data_transposed.w);
+    pd.set(7, group);
+    pd.set(9, activation_type);
+    pd.set(10, activation_params);
+
+    op->load_param(pd);
+
+    ncnn::Mat weights[2];
+    weights[0] = weight_data_transposed;
+    weights[1] = bias_data_flattened;
+
+    op->load_model(ncnn::ModelBinFromMatArray(weights));
+
+    op->create_pipeline(opt);
+
+    op->forward(bottom_blob, top_blob, opt);
+
+    op->destroy_pipeline(opt);
+
+    delete op;
+
+    return 0;
+}
+
 } // namespace ncnn
diff --git a/src/layer/x86/deconvolutiondepthwise_x86.h b/src/layer/x86/deconvolutiondepthwise_x86.h
index 33139cfb563..07fb5e54f9b 100644
--- a/src/layer/x86/deconvolutiondepthwise_x86.h
+++ b/src/layer/x86/deconvolutiondepthwise_x86.h
@@ -29,6 +29,8 @@ class DeconvolutionDepthWise_x86 : virtual public DeconvolutionDepthWise
 
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
+    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+
 protected:
     int create_group_ops(const Option& opt);
 
diff --git a/tests/test_deconvolution.cpp b/tests/test_deconvolution.cpp
index b1028019508..4a0027b68c4 100644
--- a/tests/test_deconvolution.cpp
+++ b/tests/test_deconvolution.cpp
@@ -151,9 +151,95 @@ static int test_deconvolution_0()
            || test_deconvolution(7, 5, 32, 26, 4, 2, 2, 2, 1, 0, 0, 0, 0);
 }
 
+static int test_deconvolution_dynamic(int w, int h, int c, int outch, int kernel, int dilation, int stride, int pad, int bias, int output_pad_right, int output_pad_bottom, int output_w, int output_h)
+{
+    ncnn::Mat a = RandomMat(w, h, c);
+
+    if (output_w > 0 && output_h > 0 && pad != -233 && pad != -234)
+    {
+        pad = -233;
+    }
+
+    ncnn::ParamDict pd;
+    pd.set(0, 0);
+    pd.set(1, 0);
+    pd.set(2, dilation);
+    pd.set(3, stride);
+    pd.set(4, pad);
+    pd.set(5, bias);
+    pd.set(6, 0);
+    pd.set(28, 1); // dynamic weight
+
+    int activation_type = RAND() % 7; // 0 1 2 3 4 5 6
+    ncnn::Mat activation_params(2);
+    activation_params[0] = (activation_type == 6) ? RandomFloat(0, 1) : RandomFloat(-1, 0); // alpha
+    activation_params[1] = RandomFloat(0, 1);                                               // beta
+    pd.set(9, activation_type);
+    pd.set(10, activation_params);
+
+    pd.set(18, output_pad_right);
+    pd.set(19, output_pad_bottom);
+    pd.set(20, output_w);
+    pd.set(21, output_h);
+
+    std::vector<ncnn::Mat> as(bias ? 3 : 2);
+    as[0] = a;
+    as[1] = RandomMat(kernel, kernel, outch, c);
+    if (bias)
+        as[2] = RandomMat(outch);
+
+    std::vector<ncnn::Mat> weights(0);
+
+    int ret = test_layer<ncnn::Deconvolution>("Deconvolution", pd, weights, as);
+    if (ret != 0)
+    {
+        fprintf(stderr, "test_deconvolution_dynamic failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d act=%d actparams=[%f,%f] output_pad_right=%d output_pad_bottom=%d output_w=%d output_h=%d\n", w, h, c, outch, kernel, dilation, stride, pad, bias, activation_type, activation_params[0], activation_params[1], output_pad_right, output_pad_bottom, output_w, output_h);
+    }
+
+    return ret;
+}
+
+static int test_deconvolution_1()
+{
+    static const int kdsp[7][4] = {
+        {1, 1, 1, 0},
+        {1, 1, 2, 0},
+        {2, 1, 1, 1},
+        {2, 1, 2, -233},
+        {3, 1, 1, 1},
+        {3, 1, 2, 1},
+        {3, 2, 1, -234},
+    };
+
+    for (int i = 0; i < 7; i++)
+    {
+        const int k = kdsp[i][0];
+        const int d = kdsp[i][1];
+        const int s = kdsp[i][2];
+        const int p = kdsp[i][3];
+
+        int ret = 0
+                  || test_deconvolution_dynamic(9, 7, 1, 1, k, d, s, p, 1, 0, 0, 0, 0)
+                  || test_deconvolution_dynamic(9, 7, 4, 13, k, d, s, p, 0, 1, 1, 7, 5)
+                  || test_deconvolution_dynamic(9, 7, 13, 4, k, d, s, p, 1, 1, 0, 0, 0)
+                  || test_deconvolution_dynamic(9, 7, 4, 8, k, d, s, p, 0, 0, 1, 0, 0)
+                  || test_deconvolution_dynamic(9, 7, 8, 4, k, d, s, p, 1, 0, 0, 7, 5)
+                  || test_deconvolution_dynamic(7, 7, 12, 12, k, d, s, p, 1, 0, 1, 0, 0)
+                  || test_deconvolution_dynamic(4, 5, 12, 11, k, d, s, p, 0, 0, 1, 1, 0)
+                  || test_deconvolution_dynamic(9, 7, 8, 13, k, d, s, p, 0, 2, 2, 0, 0)
+                  || test_deconvolution_dynamic(9, 7, 13, 8, k, d, s, p, 1, 2, 0, 0, 0)
+                  || test_deconvolution_dynamic(9, 7, 16, 16, k, d, s, p, 0, 0, 2, 7, 5);
+
+        if (ret != 0)
+            return -1;
+    }
+
+    return 0;
+}
+
 int main()
 {
     SRAND(7767517);
 
-    return test_deconvolution_0();
+    return test_deconvolution_0() || test_deconvolution_1();
 }
diff --git a/tests/test_deconvolution1d.cpp b/tests/test_deconvolution1d.cpp
index 5b8cd6aec14..b1b24ee6af9 100644
--- a/tests/test_deconvolution1d.cpp
+++ b/tests/test_deconvolution1d.cpp
@@ -101,9 +101,100 @@ static int test_deconvolution1d_0()
     return 0;
 }
 
+static int test_deconvolution1d_dynamic(int w, int h, int outh, int kernel, int dilation, int stride, int pad, int bias, int output_pad_right, int output_w)
+{
+    ncnn::Mat a = RandomMat(w, h);
+
+    if (output_w > 0 && pad != -233 && pad != -234)
+    {
+        pad = -233;
+    }
+
+    ncnn::ParamDict pd;
+    pd.set(0, 0);
+    pd.set(1, 0);
+    pd.set(2, dilation);
+    pd.set(3, stride);
+    pd.set(4, pad);
+    pd.set(5, bias);
+    pd.set(6, 0);
+    pd.set(28, 1); // dynamic weight
+
+    int activation_type = RAND() % 5; // 0 1 2 3 4
+    ncnn::Mat activation_params(2);
+    activation_params[0] = RandomFloat(-1, 0); // alpha
+    activation_params[1] = RandomFloat(0, 1);  // beta
+    pd.set(9, activation_type);
+    pd.set(10, activation_params);
+
+    pd.set(18, output_pad_right);
+    pd.set(20, output_w);
+
+    std::vector<ncnn::Mat> as(bias ? 3 : 2);
+    as[0] = a;
+    as[1] = RandomMat(kernel, outh, h);
+    if (bias)
+        as[2] = RandomMat(outh);
+
+    std::vector<ncnn::Mat> weights(0);
+
+    int ret = test_layer<ncnn::Deconvolution1D>("Deconvolution1D", pd, weights, as);
+    if (ret != 0)
+    {
+        fprintf(stderr, "test_deconvolution1d_dynamic failed w=%d h=%d outh=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d act=%d actparams=[%f,%f] output_pad_right=%d output_w=%d\n", w, h, outh, kernel, dilation, stride, pad, bias, activation_type, activation_params[0], activation_params[1], output_pad_right, output_w);
+    }
+
+    return ret;
+}
+
+static int test_deconvolution1d_1()
+{
+    static const int kdsp[16][4] = {
+        {1, 1, 1, 0},
+        {1, 1, 2, 0},
+        {2, 1, 1, 1},
+        {2, 1, 2, -233},
+        {3, 1, 1, 1},
+        {3, 1, 2, 1},
+        {3, 2, 1, 1},
+        {4, 1, 1, -233},
+        {4, 1, 2, -234},
+        {4, 2, 1, -234},
+        {5, 1, 1, 2},
+        {5, 1, 2, 2},
+        {5, 2, 2, 2},
+        {7, 1, 1, 3},
+        {7, 1, 2, 3},
+        {7, 2, 1, -233},
+    };
+
+    for (int i = 0; i < 16; i++)
+    {
+        const int k = kdsp[i][0];
+        const int d = kdsp[i][1];
+        const int s = kdsp[i][2];
+        const int p = kdsp[i][3];
+
+        int ret = 0
+                  || test_deconvolution1d_dynamic(9, 1, 1, k, d, s, p, 1, 0, 0)
+                  || test_deconvolution1d_dynamic(9, 4, 13, k, d, s, p, 0, 1, 7)
+                  || test_deconvolution1d_dynamic(9, 13, 4, k, d, s, p, 1, 1, 0)
+                  || test_deconvolution1d_dynamic(9, 4, 8, k, d, s, p, 0, 0, 0)
+                  || test_deconvolution1d_dynamic(9, 8, 4, k, d, s, p, 1, 0, 7)
+                  || test_deconvolution1d_dynamic(9, 8, 13, k, d, s, p, 0, 2, 0)
+                  || test_deconvolution1d_dynamic(9, 13, 8, k, d, s, p, 1, 2, 0)
+                  || test_deconvolution1d_dynamic(9, 16, 16, k, d, s, p, 0, 0, 7);
+
+        if (ret != 0)
+            return -1;
+    }
+
+    return 0;
+}
+
 int main()
 {
     SRAND(7767517);
 
-    return test_deconvolution1d_0();
+    return test_deconvolution1d_0() || test_deconvolution1d_1();
 }
diff --git a/tests/test_deconvolutiondepthwise1d.cpp b/tests/test_deconvolutiondepthwise1d.cpp
index 21e53d7638c..cadd149ff59 100644
--- a/tests/test_deconvolutiondepthwise1d.cpp
+++ b/tests/test_deconvolutiondepthwise1d.cpp
@@ -107,9 +107,106 @@ static int test_deconvolutiondepthwise1d_0()
     return 0;
 }
 
+static int test_deconvolutiondepthwise1d_dynamic(int w, int h, int outh, int kernel, int dilation, int stride, int pad, int bias, int group, int output_pad_right, int output_w)
+{
+    ncnn::Mat a = RandomMat(w, h);
+
+    if (output_w > 0 && pad != -233 && pad != -234)
+    {
+        pad = -233;
+    }
+
+    ncnn::ParamDict pd;
+    pd.set(0, 0);
+    pd.set(1, 0);
+    pd.set(2, dilation);
+    pd.set(3, stride);
+    pd.set(4, pad);
+    pd.set(5, bias);
+    pd.set(6, 0);
+    pd.set(7, group);
+    pd.set(28, 1); // dynamic weight
+
+    int activation_type = RAND() % 5; // 0 1 2 3 4
+    ncnn::Mat activation_params(2);
+    activation_params[0] = RandomFloat(-1, 0); // alpha
+    activation_params[1] = RandomFloat(0, 1);  // beta
+    pd.set(9, activation_type);
+    pd.set(10, activation_params);
+
+    pd.set(18, output_pad_right);
+    pd.set(20, output_w);
+
+    std::vector<ncnn::Mat> as(bias ? 3 : 2);
+    as[0] = a;
+    as[1] = RandomMat(kernel, outh / group, h);
+    if (bias)
+        as[2] = RandomMat(outh);
+
+    std::vector<ncnn::Mat> weights(0);
+
+    int ret = test_layer<ncnn::DeconvolutionDepthWise1D>("DeconvolutionDepthWise1D", pd, weights, as);
+    if (ret != 0)
+    {
+        fprintf(stderr, "test_deconvolutiondepthwise1d_dynamic failed w=%d h=%d outh=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d group=%d act=%d actparams=[%f,%f] output_pad_right=%d output_w=%d\n", w, h, outh, kernel, dilation, stride, pad, bias, group, activation_type, activation_params[0], activation_params[1], output_pad_right, output_w);
+    }
+
+    return ret;
+}
+
+static int test_deconvolutiondepthwise1d_1()
+{
+    static const int kdsp[16][4] = {
+        {1, 1, 1, 0},
+        {1, 1, 2, 0},
+        {2, 1, 1, 1},
+        {2, 1, 2, -233},
+        {3, 1, 1, 1},
+        {3, 1, 2, 1},
+        {3, 2, 1, 1},
+        {4, 1, 1, -233},
+        {4, 1, 2, -234},
+        {4, 2, 1, -234},
+        {5, 1, 1, 2},
+        {5, 1, 2, 2},
+        {5, 2, 2, 2},
+        {7, 1, 1, 3},
+        {7, 1, 2, 3},
+        {7, 2, 1, -233},
+    };
+
+    for (int i = 0; i < 16; i++)
+    {
+        const int k = kdsp[i][0];
+        const int d = kdsp[i][1];
+        const int s = kdsp[i][2];
+        const int p = kdsp[i][3];
+
+        int ret = 0
+                  || test_deconvolutiondepthwise1d_dynamic(15, 1, 1, k, d, s, p, 1, 1, 0, 0)
+                  || test_deconvolutiondepthwise1d_dynamic(15, 2, 2, k, d, s, p, 0, 1, 1, 7)
+                  || test_deconvolutiondepthwise1d_dynamic(15, 2, 2, k, d, s, p, 1, 2, 1, 0)
+                  || test_deconvolutiondepthwise1d_dynamic(15, 3, 3, k, d, s, p, 0, 3, 0, 0)
+                  || test_deconvolutiondepthwise1d_dynamic(15, 4, 2, k, d, s, p, 1, 2, 0, 7)
+                  || test_deconvolutiondepthwise1d_dynamic(15, 4, 4, k, d, s, p, 0, 4, 2, 0)
+                  || test_deconvolutiondepthwise1d_dynamic(15, 7, 7, k, d, s, p, 1, 7, 2, 0)
+                  || test_deconvolutiondepthwise1d_dynamic(15, 8, 8, k, d, s, p, 0, 2, 0, 7)
+                  || test_deconvolutiondepthwise1d_dynamic(15, 8, 8, k, d, s, p, 1, 8, 0, 0)
+                  || test_deconvolutiondepthwise1d_dynamic(15, 12, 12, k, d, s, p, 0, 4, 3, 0)
+                  || test_deconvolutiondepthwise1d_dynamic(15, 15, 15, k, d, s, p, 1, 15, 3, 7)
+                  || test_deconvolutiondepthwise1d_dynamic(15, 16, 8, k, d, s, p, 0, 2, 0, 0)
+                  || test_deconvolutiondepthwise1d_dynamic(15, 16, 16, k, d, s, p, 1, 16, 0, 0);
+
+        if (ret != 0)
+            return -1;
+    }
+
+    return 0;
+}
+
 int main()
 {
     SRAND(7767517);
 
-    return test_deconvolutiondepthwise1d_0();
+    return test_deconvolutiondepthwise1d_0() || test_deconvolutiondepthwise1d_1();
 }
diff --git a/tests/test_deconvolutiondepthwise_1.cpp b/tests/test_deconvolutiondepthwise_1.cpp
new file mode 100644
index 00000000000..02fc8f97351
--- /dev/null
+++ b/tests/test_deconvolutiondepthwise_1.cpp
@@ -0,0 +1,122 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#include "layer/deconvolutiondepthwise.h"
+#include "testutil.h"
+
+static int test_deconvolutiondepthwise_dynamic(int w, int h, int c, int outch, int kernel, int dilation, int stride, int pad, int bias, int group, int output_pad_right, int output_pad_bottom, int output_w, int output_h)
+{
+    ncnn::Mat a = RandomMat(w, h, c);
+
+    if (output_w > 0 && output_h > 0 && pad != -233 && pad != -234)
+    {
+        pad = -233;
+    }
+
+    ncnn::ParamDict pd;
+    pd.set(0, 0);
+    pd.set(1, 0);
+    pd.set(2, dilation);
+    pd.set(3, stride);
+    pd.set(4, pad);
+    pd.set(5, bias);
+    pd.set(6, 0);
+    pd.set(7, group);
+    pd.set(28, 1); // dynamic weight
+
+    int activation_type = RAND() % 5; // 0 1 2 3 4
+    ncnn::Mat activation_params(2);
+    activation_params[0] = RandomFloat(-1, 0); // alpha
+    activation_params[1] = RandomFloat(0, 1);  // beta
+    pd.set(9, activation_type);
+    pd.set(10, activation_params);
+
+    pd.set(18, output_pad_right);
+    pd.set(19, output_pad_bottom);
+    pd.set(20, output_w);
+    pd.set(21, output_h);
+
+    std::vector<ncnn::Mat> as(bias ? 3 : 2);
+    as[0] = a;
+    as[1] = RandomMat(kernel, kernel, outch / group, c);
+    if (bias)
+        as[2] = RandomMat(outch);
+
+    std::vector<ncnn::Mat> weights(0);
+
+    int ret = test_layer<ncnn::DeconvolutionDepthWise>("DeconvolutionDepthWise", pd, weights, as);
+    if (ret != 0)
+    {
+        fprintf(stderr, "test_deconvolutiondepthwise_dynamic failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d group=%d act=%d actparams=[%f,%f] output_pad_right=%d output_pad_bottom=%d output_w=%d output_h=%d\n", w, h, c, outch, kernel, dilation, stride, pad, bias, group, activation_type, activation_params[0], activation_params[1], output_pad_right, output_pad_bottom, output_w, output_h);
+    }
+
+    return ret;
+}
+
+static int test_deconvolutiondepthwise_0()
+{
+    static const int kdsp[16][4] = {
+        {1, 1, 1, 0},
+        {1, 1, 2, 0},
+        {2, 1, 1, 1},
+        {2, 1, 2, -233},
+        {3, 1, 1, 1},
+        {3, 1, 2, 1},
+        {3, 2, 1, 1},
+        {4, 1, 1, -233},
+        {4, 1, 2, -234},
+        {4, 2, 1, -234},
+        {5, 1, 1, 2},
+        {5, 1, 2, 2},
+        {5, 2, 2, 2},
+        {7, 1, 1, 3},
+        {7, 1, 2, 3},
+        {7, 2, 1, -233},
+    };
+
+    for (int i = 0; i < 16; i++)
+    {
+        const int k = kdsp[i][0];
+        const int d = kdsp[i][1];
+        const int s = kdsp[i][2];
+        const int p = kdsp[i][3];
+
+        int ret = 0
+                  || test_deconvolutiondepthwise_dynamic(15, 7, 1, 1, k, d, s, p, 1, 1, 0, 0, 0, 0)
+                  || test_deconvolutiondepthwise_dynamic(15, 7, 2, 2, k, d, s, p, 0, 1, 1, 1, 7, 5)
+                  || test_deconvolutiondepthwise_dynamic(15, 7, 2, 2, k, d, s, p, 1, 2, 1, 0, 0, 0)
+                  || test_deconvolutiondepthwise_dynamic(15, 7, 3, 3, k, d, s, p, 0, 3, 0, 1, 0, 0)
+                  || test_deconvolutiondepthwise_dynamic(15, 7, 4, 2, k, d, s, p, 1, 2, 0, 0, 7, 5)
+                  || test_deconvolutiondepthwise_dynamic(15, 7, 4, 4, k, d, s, p, 0, 4, 2, 2, 0, 0)
+                  || test_deconvolutiondepthwise_dynamic(15, 7, 7, 7, k, d, s, p, 1, 7, 2, 0, 0, 0)
+                  || test_deconvolutiondepthwise_dynamic(15, 7, 8, 8, k, d, s, p, 0, 2, 0, 2, 7, 5)
+                  || test_deconvolutiondepthwise_dynamic(15, 7, 8, 8, k, d, s, p, 1, 8, 0, 0, 0, 0)
+                  || test_deconvolutiondepthwise_dynamic(15, 7, 12, 12, k, d, s, p, 0, 4, 3, 3, 0, 0)
+                  || test_deconvolutiondepthwise_dynamic(15, 7, 15, 15, k, d, s, p, 1, 15, 3, 0, 7, 5)
+                  || test_deconvolutiondepthwise_dynamic(15, 7, 16, 8, k, d, s, p, 0, 2, 0, 3, 0, 0)
+                  || test_deconvolutiondepthwise_dynamic(15, 7, 16, 16, k, d, s, p, 1, 16, 0, 0, 0, 0);
+
+        if (ret != 0)
+            return -1;
+    }
+
+    return 0;
+}
+
+int main()
+{
+    SRAND(7767517);
+
+    return test_deconvolutiondepthwise_0();
+}
diff --git a/tools/pnnx/src/pass_ncnn/F_conv_transpose1d.cpp b/tools/pnnx/src/pass_ncnn/F_conv_transpose1d.cpp
index 5901522afca..7131557cef7 100644
--- a/tools/pnnx/src/pass_ncnn/F_conv_transpose1d.cpp
+++ b/tools/pnnx/src/pass_ncnn/F_conv_transpose1d.cpp
@@ -18,6 +18,194 @@ namespace pnnx {
 
 namespace ncnn {
 
+class F_conv_transpose1d_4 : public GraphRewriterPass
+{
+public:
+    const char* match_pattern_graph() const
+    {
+        return R"PNNXIR(7767517
+4 3
+pnnx.Input              input       0 1 input
+pnnx.Input              weight      0 1 weight
+F.conv_transpose1d      op_0        2 1 input weight out bias=None stride=%stride output_padding=%output_padding padding=%padding dilation=%dilation groups=1
+pnnx.Output             output      1 0 out
+)PNNXIR";
+    }
+
+    const char* type_str() const
+    {
+        return "Deconvolution1D";
+    }
+
+    const char* name_str() const
+    {
+        return "deconv1d";
+    }
+
+    void write(Operator* op, const std::map<std::string, Parameter>& captured_params, const std::map<std::string, Attribute>& /*captured_attrs*/) const
+    {
+        std::vector<int> weight_shape = op->inputs[1]->shape;
+        if (weight_shape.empty())
+        {
+            weight_shape = {0, 0, 0};
+        }
+
+        op->params["0"] = weight_shape[1];
+        op->params["1"] = weight_shape[2];
+        op->params["2"] = captured_params.at("dilation").ai[0];
+        op->params["3"] = captured_params.at("stride").ai[0];
+        op->params["4"] = captured_params.at("padding").ai[0];
+        op->params["18"] = captured_params.at("output_padding").ai[0];
+        op->params["5"] = 0;
+        op->params["6"] = (int)(weight_shape[0] * weight_shape[1] * weight_shape[2]);
+        op->params["28"] = 1; // dynamic weight
+    }
+};
+
+REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_conv_transpose1d_4, 22)
+
+class F_conv_transpose1d_5 : public GraphRewriterPass
+{
+public:
+    const char* match_pattern_graph() const
+    {
+        return R"PNNXIR(7767517
+5 4
+pnnx.Input              input       0 1 input
+pnnx.Input              weight      0 1 weight
+pnnx.Input              bias        0 1 bias
+F.conv_transpose1d      op_0        3 1 input weight bias out stride=%stride output_padding=%output_padding padding=%padding dilation=%dilation groups=1
+pnnx.Output             output      1 0 out
+)PNNXIR";
+    }
+
+    const char* type_str() const
+    {
+        return "Deconvolution1D";
+    }
+
+    const char* name_str() const
+    {
+        return "deconv1d";
+    }
+
+    void write(Operator* op, const std::map<std::string, Parameter>& captured_params, const std::map<std::string, Attribute>& /*captured_attrs*/) const
+    {
+        std::vector<int> weight_shape = op->inputs[1]->shape;
+        if (weight_shape.empty())
+        {
+            weight_shape = {0, 0, 0};
+        }
+
+        op->params["0"] = weight_shape[1];
+        op->params["1"] = weight_shape[2];
+        op->params["2"] = captured_params.at("dilation").ai[0];
+        op->params["3"] = captured_params.at("stride").ai[0];
+        op->params["4"] = captured_params.at("padding").ai[0];
+        op->params["18"] = captured_params.at("output_padding").ai[0];
+        op->params["5"] = 1;
+        op->params["6"] = (int)(weight_shape[0] * weight_shape[1] * weight_shape[2]);
+        op->params["28"] = 1; // dynamic weight
+    }
+};
+
+REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_conv_transpose1d_5, 22)
+
+class F_conv_transpose1d_6 : public GraphRewriterPass
+{
+public:
+    const char* match_pattern_graph() const
+    {
+        return R"PNNXIR(7767517
+4 3
+pnnx.Input              input       0 1 input
+pnnx.Input              weight      0 1 weight
+F.conv_transpose1d      op_0        2 1 input weight out bias=None stride=%stride output_padding=%output_padding padding=%padding dilation=%dilation groups=%groups
+pnnx.Output             output      1 0 out
+)PNNXIR";
+    }
+
+    const char* type_str() const
+    {
+        return "DeconvolutionDepthWise1D";
+    }
+
+    const char* name_str() const
+    {
+        return "deconvdw1d";
+    }
+
+    void write(Operator* op, const std::map<std::string, Parameter>& captured_params, const std::map<std::string, Attribute>& /*captured_attrs*/) const
+    {
+        std::vector<int> weight_shape = op->inputs[1]->shape;
+        if (weight_shape.empty())
+        {
+            weight_shape = {0, 0, 0};
+        }
+
+        op->params["0"] = weight_shape[1] * captured_params.at("groups").i;
+        op->params["1"] = weight_shape[2];
+        op->params["2"] = captured_params.at("dilation").ai[0];
+        op->params["3"] = captured_params.at("stride").ai[0];
+        op->params["4"] = captured_params.at("padding").ai[0];
+        op->params["18"] = captured_params.at("output_padding").ai[0];
+        op->params["5"] = 0;
+        op->params["6"] = (int)(weight_shape[0] * weight_shape[1] * weight_shape[2]);
+        op->params["7"] = captured_params.at("groups");
+        op->params["28"] = 1; // dynamic weight
+    }
+};
+
+REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_conv_transpose1d_6, 23)
+
+class F_conv_transpose1d_7 : public GraphRewriterPass
+{
+public:
+    const char* match_pattern_graph() const
+    {
+        return R"PNNXIR(7767517
+5 4
+pnnx.Input              input       0 1 input
+pnnx.Input              weight      0 1 weight
+pnnx.Input              bias        0 1 bias
+F.conv_transpose1d      op_0        3 1 input weight bias out stride=%stride output_padding=%output_padding padding=%padding dilation=%dilation groups=%groups
+pnnx.Output             output      1 0 out
+)PNNXIR";
+    }
+
+    const char* type_str() const
+    {
+        return "DeconvolutionDepthWise1D";
+    }
+
+    const char* name_str() const
+    {
+        return "deconvdw1d";
+    }
+
+    void write(Operator* op, const std::map<std::string, Parameter>& captured_params, const std::map<std::string, Attribute>& /*captured_attrs*/) const
+    {
+        std::vector<int> weight_shape = op->inputs[1]->shape;
+        if (weight_shape.empty())
+        {
+            weight_shape = {0, 0, 0};
+        }
+
+        op->params["0"] = weight_shape[1] * captured_params.at("groups").i;
+        op->params["1"] = weight_shape[2];
+        op->params["2"] = captured_params.at("dilation").ai[0];
+        op->params["3"] = captured_params.at("stride").ai[0];
+        op->params["4"] = captured_params.at("padding").ai[0];
+        op->params["18"] = captured_params.at("output_padding").ai[0];
+        op->params["5"] = 1;
+        op->params["6"] = (int)(weight_shape[0] * weight_shape[1] * weight_shape[2]);
+        op->params["7"] = captured_params.at("groups");
+        op->params["28"] = 1; // dynamic weight
+    }
+};
+
+REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_conv_transpose1d_7, 23)
+
 } // namespace ncnn
 
 } // namespace pnnx
diff --git a/tools/pnnx/src/pass_ncnn/F_conv_transpose2d.cpp b/tools/pnnx/src/pass_ncnn/F_conv_transpose2d.cpp
index 890f36cc92a..f0e578e7b3c 100644
--- a/tools/pnnx/src/pass_ncnn/F_conv_transpose2d.cpp
+++ b/tools/pnnx/src/pass_ncnn/F_conv_transpose2d.cpp
@@ -18,6 +18,214 @@ namespace pnnx {
 
 namespace ncnn {
 
+class F_conv_transpose2d_4 : public GraphRewriterPass
+{
+public:
+    const char* match_pattern_graph() const
+    {
+        return R"PNNXIR(7767517
+4 3
+pnnx.Input              input       0 1 input
+pnnx.Input              weight      0 1 weight
+F.conv_transpose2d      op_0        2 1 input weight out bias=None stride=%stride output_padding=%output_padding padding=%padding dilation=%dilation groups=1
+pnnx.Output             output      1 0 out
+)PNNXIR";
+    }
+
+    const char* type_str() const
+    {
+        return "Deconvolution";
+    }
+
+    const char* name_str() const
+    {
+        return "deconv2d";
+    }
+
+    void write(Operator* op, const std::map<std::string, Parameter>& captured_params, const std::map<std::string, Attribute>& /*captured_attrs*/) const
+    {
+        std::vector<int> weight_shape = op->inputs[1]->shape;
+        if (weight_shape.empty())
+        {
+            weight_shape = {0, 0, 0, 0};
+        }
+
+        op->params["0"] = weight_shape[1];
+        op->params["1"] = weight_shape[3];
+        op->params["11"] = weight_shape[2];
+        op->params["2"] = captured_params.at("dilation").ai[1];
+        op->params["12"] = captured_params.at("dilation").ai[0];
+        op->params["3"] = captured_params.at("stride").ai[1];
+        op->params["13"] = captured_params.at("stride").ai[0];
+        op->params["4"] = captured_params.at("padding").ai[1];
+        op->params["14"] = captured_params.at("padding").ai[0];
+        op->params["18"] = captured_params.at("output_padding").ai[1];
+        op->params["19"] = captured_params.at("output_padding").ai[0];
+        op->params["5"] = 0;
+        op->params["6"] = (int)(weight_shape[0] * weight_shape[1] * weight_shape[2] * weight_shape[3]);
+        op->params["28"] = 1; // dynamic weight
+    }
+};
+
+REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_conv_transpose2d_4, 22)
+
+class F_conv_transpose2d_5 : public GraphRewriterPass
+{
+public:
+    const char* match_pattern_graph() const
+    {
+        return R"PNNXIR(7767517
+5 4
+pnnx.Input              input       0 1 input
+pnnx.Input              weight      0 1 weight
+pnnx.Input              bias        0 1 bias
+F.conv_transpose2d      op_0        3 1 input weight bias out stride=%stride output_padding=%output_padding padding=%padding dilation=%dilation groups=1
+pnnx.Output             output      1 0 out
+)PNNXIR";
+    }
+
+    const char* type_str() const
+    {
+        return "Deconvolution";
+    }
+
+    const char* name_str() const
+    {
+        return "deconv2d";
+    }
+
+    void write(Operator* op, const std::map<std::string, Parameter>& captured_params, const std::map<std::string, Attribute>& /*captured_attrs*/) const
+    {
+        std::vector<int> weight_shape = op->inputs[1]->shape;
+        if (weight_shape.empty())
+        {
+            weight_shape = {0, 0, 0, 0};
+        }
+
+        op->params["0"] = weight_shape[1];
+        op->params["1"] = weight_shape[3];
+        op->params["11"] = weight_shape[2];
+        op->params["2"] = captured_params.at("dilation").ai[1];
+        op->params["12"] = captured_params.at("dilation").ai[0];
+        op->params["3"] = captured_params.at("stride").ai[1];
+        op->params["13"] = captured_params.at("stride").ai[0];
+        op->params["4"] = captured_params.at("padding").ai[1];
+        op->params["14"] = captured_params.at("padding").ai[0];
+        op->params["18"] = captured_params.at("output_padding").ai[1];
+        op->params["19"] = captured_params.at("output_padding").ai[0];
+        op->params["5"] = 1;
+        op->params["6"] = (int)(weight_shape[0] * weight_shape[1] * weight_shape[2] * weight_shape[3]);
+        op->params["28"] = 1; // dynamic weight
+    }
+};
+
+REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_conv_transpose2d_5, 22)
+
+class F_conv_transpose2d_6 : public GraphRewriterPass
+{
+public:
+    const char* match_pattern_graph() const
+    {
+        return R"PNNXIR(7767517
+4 3
+pnnx.Input              input       0 1 input
+pnnx.Input              weight      0 1 weight
+F.conv_transpose2d      op_0        2 1 input weight out bias=None stride=%stride output_padding=%output_padding padding=%padding dilation=%dilation groups=%groups
+pnnx.Output             output      1 0 out
+)PNNXIR";
+    }
+
+    const char* type_str() const
+    {
+        return "DeconvolutionDepthWise";
+    }
+
+    const char* name_str() const
+    {
+        return "deconvdw2d";
+    }
+
+    void write(Operator* op, const std::map<std::string, Parameter>& captured_params, const std::map<std::string, Attribute>& /*captured_attrs*/) const
+    {
+        std::vector<int> weight_shape = op->inputs[1]->shape;
+        if (weight_shape.empty())
+        {
+            weight_shape = {0, 0, 0, 0};
+        }
+
+        op->params["0"] = weight_shape[1] * captured_params.at("groups").i;
+        op->params["1"] = weight_shape[3];
+        op->params["11"] = weight_shape[2];
+        op->params["2"] = captured_params.at("dilation").ai[1];
+        op->params["12"] = captured_params.at("dilation").ai[0];
+        op->params["3"] = captured_params.at("stride").ai[1];
+        op->params["13"] = captured_params.at("stride").ai[0];
+        op->params["4"] = captured_params.at("padding").ai[1];
+        op->params["14"] = captured_params.at("padding").ai[0];
+        op->params["18"] = captured_params.at("output_padding").ai[1];
+        op->params["19"] = captured_params.at("output_padding").ai[0];
+        op->params["5"] = 0;
+        op->params["6"] = (int)(weight_shape[0] * weight_shape[1] * weight_shape[2] * weight_shape[3]);
+        op->params["7"] = captured_params.at("groups");
+        op->params["28"] = 1; // dynamic weight
+    }
+};
+
+REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_conv_transpose2d_6, 23)
+
+class F_conv_transpose2d_7 : public GraphRewriterPass
+{
+public:
+    const char* match_pattern_graph() const
+    {
+        return R"PNNXIR(7767517
+5 4
+pnnx.Input              input       0 1 input
+pnnx.Input              weight      0 1 weight
+pnnx.Input              bias        0 1 bias
+F.conv_transpose2d      op_0        3 1 input weight bias out stride=%stride output_padding=%output_padding padding=%padding dilation=%dilation groups=%groups
+pnnx.Output             output      1 0 out
+)PNNXIR";
+    }
+
+    const char* type_str() const
+    {
+        return "DeconvolutionDepthWise";
+    }
+
+    const char* name_str() const
+    {
+        return "deconvdw2d";
+    }
+
+    void write(Operator* op, const std::map<std::string, Parameter>& captured_params, const std::map<std::string, Attribute>& /*captured_attrs*/) const
+    {
+        std::vector<int> weight_shape = op->inputs[1]->shape;
+        if (weight_shape.empty())
+        {
+            weight_shape = {0, 0, 0, 0};
+        }
+
+        op->params["0"] = weight_shape[1] * captured_params.at("groups").i;
+        op->params["1"] = weight_shape[3];
+        op->params["11"] = weight_shape[2];
+        op->params["2"] = captured_params.at("dilation").ai[1];
+        op->params["12"] = captured_params.at("dilation").ai[0];
+        op->params["3"] = captured_params.at("stride").ai[1];
+        op->params["13"] = captured_params.at("stride").ai[0];
+        op->params["4"] = captured_params.at("padding").ai[1];
+        op->params["14"] = captured_params.at("padding").ai[0];
+        op->params["18"] = captured_params.at("output_padding").ai[1];
+        op->params["19"] = captured_params.at("output_padding").ai[0];
+        op->params["5"] = 1;
+        op->params["6"] = (int)(weight_shape[0] * weight_shape[1] * weight_shape[2] * weight_shape[3]);
+        op->params["7"] = captured_params.at("groups");
+        op->params["28"] = 1; // dynamic weight
+    }
+};
+
+REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_conv_transpose2d_7, 23)
+
 } // namespace ncnn
 
 } // namespace pnnx
diff --git a/tools/pnnx/tests/ncnn/test_F_conv_transpose1d.py b/tools/pnnx/tests/ncnn/test_F_conv_transpose1d.py
index 479a2d6daef..ff23f77ab27 100644
--- a/tools/pnnx/tests/ncnn/test_F_conv_transpose1d.py
+++ b/tools/pnnx/tests/ncnn/test_F_conv_transpose1d.py
@@ -24,33 +24,40 @@ def __init__(self):
         self.b2 = nn.Parameter(torch.rand(12))
         self.w3 = nn.Parameter(torch.rand(12, 2, 3))
 
-    def forward(self, y):
+    def forward(self, x, w0, w1, b1, y):
+        x = F.conv_transpose1d(x, w0, None, stride=2, padding=1, output_padding=1)
+        x = F.conv_transpose1d(x, w1, b1, stride=1, padding=2, dilation=2, groups=2)
+
         y = F.conv_transpose1d(y, self.w2, self.b2, stride=2, padding=1, output_padding=1)
         y = F.conv_transpose1d(y, self.w3, None, stride=1, padding=2, dilation=2, groups=3)
-        return y
+        return x, y
 
 def test():
     net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
+    x = torch.rand(1, 12, 22)
+    w0 = torch.rand(12, 16, 3)
+    w1 = torch.rand(16, 8, 5)
+    b1 = torch.rand(16)
     y = torch.rand(1, 6, 5)
 
-    a = net(y)
+    a0, a1 = net(x, w0, w1, b1, y)
 
     # export torchscript
-    mod = torch.jit.trace(net, y)
+    mod = torch.jit.trace(net, (x, w0, w1, b1, y))
     mod.save("test_F_conv_transpose1d.pt")
 
     # torchscript to pnnx
     import os
-    os.system("../../src/pnnx test_F_conv_transpose1d.pt inputshape=[1,6,5]")
+    os.system("../../src/pnnx test_F_conv_transpose1d.pt inputshape=[1,12,22],[12,16,3],[16,8,5],[16],[1,6,5]")
 
     # ncnn inference
     import test_F_conv_transpose1d_ncnn
-    b = test_F_conv_transpose1d_ncnn.test_inference()
+    b0, b1 = test_F_conv_transpose1d_ncnn.test_inference()
 
-    return torch.allclose(a, b, 1e-4, 1e-4)
+    return torch.allclose(a0, b0, 1e-4, 1e-4) and torch.allclose(a1, b1, 1e-4, 1e-4)
 
 if __name__ == "__main__":
     if test():
diff --git a/tools/pnnx/tests/ncnn/test_F_conv_transpose2d.py b/tools/pnnx/tests/ncnn/test_F_conv_transpose2d.py
index 7dc2e1e2690..5913f07ab02 100644
--- a/tools/pnnx/tests/ncnn/test_F_conv_transpose2d.py
+++ b/tools/pnnx/tests/ncnn/test_F_conv_transpose2d.py
@@ -24,33 +24,40 @@ def __init__(self):
         self.b2 = nn.Parameter(torch.rand(12))
         self.w3 = nn.Parameter(torch.rand(12, 2, 3, 3))
 
-    def forward(self, y):
+    def forward(self, x, w0, w1, b1, y):
+        x = F.conv_transpose2d(x, w0, None, stride=(2,2), padding=(1,1), output_padding=(1,1))
+        x = F.conv_transpose2d(x, w1, b1, stride=(1,2), padding=(2,1), dilation=(2,1), groups=2)
+
         y = F.conv_transpose2d(y, self.w2, self.b2, stride=(2,2), padding=(1,1), output_padding=(1,1))
         y = F.conv_transpose2d(y, self.w3, None, stride=(1,2), padding=(2,1), dilation=(2,1), groups=3)
-        return y
+        return x, y
 
 def test():
     net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
+    x = torch.rand(1, 12, 22, 32)
+    w0 = torch.rand(12, 16, 3, 3)
+    w1 = torch.rand(16, 8, 5, 5)
+    b1 = torch.rand(16)
     y = torch.rand(1, 6, 5, 6)
 
-    a = net(y)
+    a0, a1 = net(x, w0, w1, b1, y)
 
     # export torchscript
-    mod = torch.jit.trace(net, y)
+    mod = torch.jit.trace(net, (x, w0, w1, b1, y))
     mod.save("test_F_conv_transpose2d.pt")
 
     # torchscript to pnnx
     import os
-    os.system("../../src/pnnx test_F_conv_transpose2d.pt inputshape=[1,6,5,6]")
+    os.system("../../src/pnnx test_F_conv_transpose2d.pt inputshape=[1,12,22,32],[12,16,3,3],[16,8,5,5],[16],[1,6,5,6]")
 
     # ncnn inference
     import test_F_conv_transpose2d_ncnn
-    b = test_F_conv_transpose2d_ncnn.test_inference()
+    b0, b1 = test_F_conv_transpose2d_ncnn.test_inference()
 
-    return torch.allclose(a, b, 1e-4, 1e-4)
+    return torch.allclose(a0, b0, 1e-4, 1e-4) and torch.allclose(a1, b1, 1e-4, 1e-4)
 
 if __name__ == "__main__":
     if test():