deconvolution dynamic weight

Tencent · Oct 30, 2023 · 1623ec9 · 1623ec9
1 parent 009f5ea
commit 1623ec9
Show file tree

Hide file tree

Showing 17 changed files with 658 additions and 11 deletions.
diff --git a/src/layer/arm/convolution_arm.cpp b/src/layer/arm/convolution_arm.cpp
@@ -806,9 +806,9 @@ int Convolution_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<M
     pd.set(1, _kernel_w);
     pd.set(11, _kernel_h);
     pd.set(2, dilation_w);
-    pd.set(21, dilation_h);
+    pd.set(12, dilation_h);
     pd.set(3, stride_w);
-    pd.set(31, stride_h);
+    pd.set(13, stride_h);
     pd.set(4, pad_left);
     pd.set(15, pad_right);
     pd.set(14, pad_top);

diff --git a/src/layer/deconvolution.cpp b/src/layer/deconvolution.cpp
@@ -46,11 +46,21 @@ int Deconvolution::load_param(const ParamDict& pd)
     activation_type = pd.get(9, 0);
     activation_params = pd.get(10, Mat());
 
+    dynamic_weight = pd.get(28, 0);
+
+    if (dynamic_weight)
+    {
+        one_blob_only = false;
+    }
+
     return 0;
 }
 
 int Deconvolution::load_model(const ModelBin& mb)
 {
+    if (dynamic_weight)
+        return 0;
+
     weight_data = mb.load(weight_data_size, 0);
     if (weight_data.empty())
         return -100;
@@ -180,6 +190,93 @@ int Deconvolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
     return 0;
 }
 
+int Deconvolution::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
+{
+    const Mat& bottom_blob = bottom_blobs[0];
+    const Mat& _weight_data = bottom_blobs[1];
+    Mat& top_blob = top_blobs[0];
+
+    const int _num_input = bottom_blob.c;
+    const int _kernel_w = _weight_data.w;
+    const int _kernel_h = _weight_data.h;
+    const int _num_output = _weight_data.d * 1;
+
+    Mat weight_data_flattened;
+    flatten(_weight_data, weight_data_flattened, opt);
+    if (weight_data_flattened.empty())
+        return -100;
+
+    // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw
+    Mat weight_data_transposed;
+    {
+        weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / 1, 4u, opt.workspace_allocator);
+        if (weight_data_transposed.empty())
+            return -100;
+
+        const int outch_g = _num_output / 1;
+        const int inch_g = _num_input / 1;
+        const int maxk = _kernel_h * _kernel_w;
+
+        for (int g = 0; g < 1; g++)
+        {
+            // reorder weight from inch-outch to outch-inch
+            float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
+            const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
+            for (int i = 0; i < outch_g; i++)
+            {
+                for (int j = 0; j < inch_g; j++)
+                {
+                    for (int k = 0; k < maxk; k++)
+                    {
+                        wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
+                    }
+                }
+            }
+        }
+    }
+
+    Mat bias_data_flattened;
+    if (bias_term)
+    {
+        const Mat& _bias_data = bottom_blobs[2];
+        flatten(_bias_data, bias_data_flattened, opt);
+        if (bias_data_flattened.empty())
+            return -100;
+    }
+
+    const int w = bottom_blob.w;
+    const int h = bottom_blob.h;
+
+    const int kernel_extent_w = dilation_w * (_kernel_w - 1) + 1;
+    const int kernel_extent_h = dilation_h * (_kernel_h - 1) + 1;
+
+    int outw = (w - 1) * stride_w + kernel_extent_w + output_pad_right;
+    int outh = (h - 1) * stride_h + kernel_extent_h + output_pad_bottom;
+
+    Mat top_blob_bordered;
+    if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || (output_w > 0 && output_h > 0))
+    {
+        top_blob_bordered.create(outw, outh, _num_output, 4u, opt.workspace_allocator);
+    }
+    else
+    {
+        top_blob_bordered = top_blob;
+        top_blob_bordered.create(outw, outh, _num_output, 4u, opt.blob_allocator);
+    }
+    if (top_blob_bordered.empty())
+        return -100;
+
+    int ret = deconvolution(bottom_blob, top_blob_bordered, weight_data_transposed, bias_data_flattened, _kernel_w, _kernel_h, stride_w, stride_h, dilation_w, dilation_h, activation_type, activation_params, opt);
+    if (ret != 0)
+        return ret;
+
+    cut_padding(top_blob_bordered, top_blob, opt);
+    if (top_blob.empty())
+        return -100;
+
+    return 0;
+}
+
 void Deconvolution::cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const
 {
     if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)

diff --git a/src/layer/deconvolution.h b/src/layer/deconvolution.h
@@ -30,6 +30,8 @@ class Deconvolution : public Layer
 
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
+    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+
 protected:
     void cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const;
 
@@ -58,6 +60,8 @@ class Deconvolution : public Layer
     int activation_type;
     Mat activation_params;
 
+    int dynamic_weight;
+
     // model
     Mat weight_data;
     Mat bias_data;

diff --git a/src/layer/deconvolutiondepthwise.cpp b/src/layer/deconvolutiondepthwise.cpp
@@ -47,11 +47,21 @@ int DeconvolutionDepthWise::load_param(const ParamDict& pd)
     activation_type = pd.get(9, 0);
     activation_params = pd.get(10, Mat());
 
+    dynamic_weight = pd.get(28, 0);
+
+    if (dynamic_weight)
+    {
+        one_blob_only = false;
+    }
+
     return 0;
 }
 
 int DeconvolutionDepthWise::load_model(const ModelBin& mb)
 {
+    if (dynamic_weight)
+        return 0;
+
     weight_data = mb.load(weight_data_size, 0);
     if (weight_data.empty())
         return -100;
@@ -243,6 +253,93 @@ int DeconvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const
     return 0;
 }
 
+int DeconvolutionDepthWise::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
+{
+    const Mat& bottom_blob = bottom_blobs[0];
+    const Mat& _weight_data = bottom_blobs[1];
+    Mat& top_blob = top_blobs[0];
+
+    const int _num_input = bottom_blob.c;
+    const int _kernel_w = _weight_data.w;
+    const int _kernel_h = _weight_data.h;
+    const int _num_output = _weight_data.d * group;
+
+    Mat weight_data_flattened;
+    flatten(_weight_data, weight_data_flattened, opt);
+    if (weight_data_flattened.empty())
+        return -100;
+
+    // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw
+    Mat weight_data_transposed;
+    {
+        weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / group, 4u, opt.workspace_allocator);
+        if (weight_data_transposed.empty())
+            return -100;
+
+        const int outch_g = _num_output / group;
+        const int inch_g = _num_input / group;
+        const int maxk = _kernel_h * _kernel_w;
+
+        for (int g = 0; g < group; g++)
+        {
+            // reorder weight from inch-outch to outch-inch
+            float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
+            const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
+            for (int i = 0; i < outch_g; i++)
+            {
+                for (int j = 0; j < inch_g; j++)
+                {
+                    for (int k = 0; k < maxk; k++)
+                    {
+                        wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
+                    }
+                }
+            }
+        }
+    }
+
+    Mat bias_data_flattened;
+    if (bias_term)
+    {
+        const Mat& _bias_data = bottom_blobs[2];
+        flatten(_bias_data, bias_data_flattened, opt);
+        if (bias_data_flattened.empty())
+            return -100;
+    }
+
+    const int w = bottom_blob.w;
+    const int h = bottom_blob.h;
+
+    const int kernel_extent_w = dilation_w * (_kernel_w - 1) + 1;
+    const int kernel_extent_h = dilation_h * (_kernel_h - 1) + 1;
+
+    int outw = (w - 1) * stride_w + kernel_extent_w + output_pad_right;
+    int outh = (h - 1) * stride_h + kernel_extent_h + output_pad_bottom;
+
+    Mat top_blob_bordered;
+    if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || (output_w > 0 && output_h > 0))
+    {
+        top_blob_bordered.create(outw, outh, _num_output, 4u, opt.workspace_allocator);
+    }
+    else
+    {
+        top_blob_bordered = top_blob;
+        top_blob_bordered.create(outw, outh, _num_output, 4u, opt.blob_allocator);
+    }
+    if (top_blob_bordered.empty())
+        return -100;
+
+    int ret = deconvolutiondepthwise(bottom_blob, top_blob_bordered, weight_data_transposed, bias_data_flattened, _kernel_w, _kernel_h, stride_w, stride_h, dilation_w, dilation_h, group, activation_type, activation_params, opt);
+    if (ret != 0)
+        return ret;
+
+    cut_padding(top_blob_bordered, top_blob, opt);
+    if (top_blob.empty())
+        return -100;
+
+    return 0;
+}
+
 void DeconvolutionDepthWise::cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const
 {
     if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)

diff --git a/src/layer/deconvolutiondepthwise.h b/src/layer/deconvolutiondepthwise.h
@@ -30,6 +30,8 @@ class DeconvolutionDepthWise : public Layer
 
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
+    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+
 protected:
     void cut_padding(const Mat& top_blob_bordered, Mat& top_blob, const Option& opt) const;
 
@@ -59,6 +61,8 @@ class DeconvolutionDepthWise : public Layer
     int activation_type;
     Mat activation_params;
 
+    int dynamic_weight;
+
     // model
     Mat weight_data;
     Mat bias_data;

diff --git a/src/layer/loongarch/convolution_loongarch.cpp b/src/layer/loongarch/convolution_loongarch.cpp
@@ -600,9 +600,9 @@ int Convolution_loongarch::forward(const std::vector<Mat>& bottom_blobs, std::ve
     pd.set(1, _kernel_w);
     pd.set(11, _kernel_h);
     pd.set(2, dilation_w);
-    pd.set(21, dilation_h);
+    pd.set(12, dilation_h);
     pd.set(3, stride_w);
-    pd.set(31, stride_h);
+    pd.set(13, stride_h);
     pd.set(4, pad_left);
     pd.set(15, pad_right);
     pd.set(14, pad_top);

diff --git a/src/layer/mips/convolution_mips.cpp b/src/layer/mips/convolution_mips.cpp
@@ -600,9 +600,9 @@ int Convolution_mips::forward(const std::vector<Mat>& bottom_blobs, std::vector<
     pd.set(1, _kernel_w);
     pd.set(11, _kernel_h);
     pd.set(2, dilation_w);
-    pd.set(21, dilation_h);
+    pd.set(12, dilation_h);
     pd.set(3, stride_w);
-    pd.set(31, stride_h);
+    pd.set(13, stride_h);
     pd.set(4, pad_left);
     pd.set(15, pad_right);
     pd.set(14, pad_top);

diff --git a/src/layer/riscv/convolution_riscv.cpp b/src/layer/riscv/convolution_riscv.cpp
@@ -684,9 +684,9 @@ int Convolution_riscv::forward(const std::vector<Mat>& bottom_blobs, std::vector
     pd.set(1, _kernel_w);
     pd.set(11, _kernel_h);
     pd.set(2, dilation_w);
-    pd.set(21, dilation_h);
+    pd.set(12, dilation_h);
     pd.set(3, stride_w);
-    pd.set(31, stride_h);
+    pd.set(13, stride_h);
     pd.set(4, pad_left);
     pd.set(15, pad_right);
     pd.set(14, pad_top);

diff --git a/src/layer/vulkan/deconvolution_vulkan.cpp b/src/layer/vulkan/deconvolution_vulkan.cpp
@@ -35,6 +35,13 @@ Deconvolution_vulkan::Deconvolution_vulkan()
 
 int Deconvolution_vulkan::create_pipeline(const Option& _opt)
 {
+    if (dynamic_weight)
+    {
+        support_vulkan = false;
+        support_image_storage = false;
+        return 0;
+    }
+
     Option opt = _opt;
     const Mat& shape = bottom_shapes.empty() ? Mat() : bottom_shapes[0];
     const Mat& out_shape = top_shapes.empty() ? Mat() : top_shapes[0];

diff --git a/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp b/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp
@@ -44,6 +44,13 @@ DeconvolutionDepthWise_vulkan::DeconvolutionDepthWise_vulkan()
 
 int DeconvolutionDepthWise_vulkan::create_pipeline(const Option& _opt)
 {
+    if (dynamic_weight)
+    {
+        support_vulkan = false;
+        support_image_storage = false;
+        return 0;
+    }
+
     Option opt = _opt;
     const Mat& shape = bottom_shapes.empty() ? Mat() : bottom_shapes[0];
     const Mat& out_shape = top_shapes.empty() ? Mat() : top_shapes[0];

diff --git a/src/layer/x86/convolution_x86.cpp b/src/layer/x86/convolution_x86.cpp
@@ -1189,9 +1189,9 @@ int Convolution_x86::forward(const std::vector<Mat>& bottom_blobs, std::vector<M
     pd.set(1, _kernel_w);
     pd.set(11, _kernel_h);
     pd.set(2, dilation_w);
-    pd.set(21, dilation_h);
+    pd.set(12, dilation_h);
     pd.set(3, stride_w);
-    pd.set(31, stride_h);
+    pd.set(13, stride_h);
     pd.set(4, pad_left);
     pd.set(15, pad_right);
     pd.set(14, pad_top);