re commit

Tencent · Oct 11, 2023 · a934a96 · a934a96
1 parent f38fc95
commit a934a96
Show file tree

Hide file tree

Showing 11 changed files with 2,345 additions and 0 deletions.
diff --git a/src/layer/vulkan/convolution1d_vulkan.cpp b/src/layer/vulkan/convolution1d_vulkan.cpp
diff --git a/src/layer/vulkan/convolution1d_vulkan.h b/src/layer/vulkan/convolution1d_vulkan.h
@@ -0,0 +1,53 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2023 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#ifndef LAYER_CONVOLUTION1D_VULKAN_H
+#define LAYER_CONVOLUTION1D_VULKAN_H
+
+#include "convolution1d.h"
+
+namespace ncnn {
+
+class Convolution1D_vulkan : virtual public Convolution1D
+{
+public:
+    Convolution1D_vulkan();
+
+    virtual int create_pipeline(const Option& opt);
+    virtual int destroy_pipeline(const Option& opt);
+
+    virtual int upload_model(VkTransfer& cmd, const Option& opt);
+
+    using Convolution1D::forward;
+    virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const;
+    virtual int forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const;
+
+public:
+    ncnn::Layer* padding;
+
+    Mat weight_data_packed;
+    Mat bias_data_packed;
+
+    VkMat weight_data_gpu;
+    VkMat bias_data_gpu;
+
+    VkImageMat weight_data_gpu_image;
+    VkImageMat bias_data_gpu_image;
+
+    Pipeline* pipeline_convolution1d;
+};
+
+} // namespace ncnn
+
+#endif // LAYER_CONVOLUTION1D_VULKAN_H
diff --git a/src/layer/vulkan/shader/convolution1d.comp b/src/layer/vulkan/shader/convolution1d.comp
@@ -0,0 +1,177 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2023 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#version 450
+
+#if NCNN_fp16_storage
+#extension GL_EXT_shader_16bit_storage: require
+#endif
+#if NCNN_fp16_arithmetic
+#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
+#endif
+
+#extension GL_GOOGLE_include_directive: enable
+#include "vulkan_activation.comp"
+
+layout (constant_id = 0) const int kernel_w = 1;
+layout (constant_id = 1) const int dilation_w = 1;
+layout (constant_id = 2) const int stride_w = 1;
+layout (constant_id = 3) const int bias_term = 0;
+layout (constant_id = 4) const int activation_type = 0;
+layout (constant_id = 5) const float activation_param_0 = 0;
+layout (constant_id = 6) const float activation_param_1 = 0;
+
+#define shape_constant_id_offset 7
+layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
+layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
+layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
+layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
+layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
+
+layout (constant_id = shape_constant_id_offset + 5) const int outdims = 0;
+layout (constant_id = shape_constant_id_offset + 6) const int outw = 0;
+layout (constant_id = shape_constant_id_offset + 7) const int outh = 0;
+layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
+layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
+
+#if NCNN_image_shader
+layout (binding = 0) uniform unfp sampler3D bottom_blob;
+layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
+layout (binding = 2) uniform unfp sampler3D weight_blob;
+layout (binding = 3) uniform unfp sampler3D bias_blob;
+#else
+layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
+layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
+layout (binding = 2) readonly buffer weight_blob { sfp weight_data[]; };
+layout (binding = 3) readonly buffer bias_blob { sfp bias_data[]; };
+#endif
+
+layout (push_constant) uniform parameter
+{
+    int dims;
+    int w;
+    int h;
+    int c;
+    int cstep;
+
+    int outdims;
+    int outw;
+    int outh;
+    int outc;
+    int outcstep;
+} p;
+
+void main()
+{
+    int gx = int(gl_GlobalInvocationID.x) * 2;
+    int gy = int(gl_GlobalInvocationID.y) * 2;
+
+    if (gx >= psc(outw) || gy >= psc(outh))
+        return;
+
+    const ivec2 gx2 = gx + ivec2(0, 1);
+    const ivec2 gy2 = gy + ivec2(0, 1);
+
+    afp sum0 = afp(0.0f);
+    afp sum1 = afp(0.0f);
+    afp sum2 = afp(0.0f);
+    afp sum3 = afp(0.0f);
+
+    if (bias_term == 1)
+    {
+#if NCNN_image_shader
+        sum0 = image3d_ld1(bias_blob, ivec3(gy2.x, 0, 0));
+        sum2 = image3d_ld1(bias_blob, ivec3(gy2.y, 0, 0));
+#else
+        sum0 = buffer_ld1(bias_data, gy2.x);
+        sum2 = buffer_ld1(bias_data, gy2.y);
+#endif
+        sum1 = sum0;
+        sum3 = sum2;
+    }
+
+#if NCNN_image_shader
+
+    ivec2 v_offset = gx2 * stride_w;
+
+    for (int y = 0; y < psc(h); y++)
+    {
+        int wx = 0;
+
+        for (int x = 0; x < kernel_w; x++)
+        {
+            afp v0 = image3d_ld1(bottom_blob, ivec3(v_offset.x + x * dilation_w, y, 0));
+            afp v1 = image3d_ld1(bottom_blob, ivec3(v_offset.y + x * dilation_w, y, 0));
+
+            afp k0 = image3d_ld1(weight_blob, ivec3(wx, y, gy2.x));
+            afp k1 = image3d_ld1(weight_blob, ivec3(wx, y, gy2.y));
+
+            sum0 += v0 * k0;
+            sum1 += v1 * k0;
+            sum2 += v0 * k1;
+            sum3 += v1 * k1;
+
+            wx += 1;
+        }
+    }
+
+#else
+
+    ivec2 v_offset = gx2 * stride_w;
+    ivec2 w_offset = gy2 * psc(h) * kernel_w;
+
+    for (int y = 0; y < psc(h); y++)
+    {    
+        for (int x = 0; x < kernel_w; x++)
+        {
+            afp v0 = buffer_ld1(bottom_blob_data, v_offset.x + x * dilation_w);
+            afp v1 = buffer_ld1(bottom_blob_data, v_offset.y + x * dilation_w);
+
+            afp k0 = buffer_ld1(weight_data, w_offset.x + x);
+            afp k1 = buffer_ld1(weight_data, w_offset.y + x);
+
+            sum0 += v0 * k0;
+            sum1 += v1 * k0;
+            sum2 += v0 * k1;
+            sum3 += v1 * k1;
+        }       
+        v_offset += psc(w);
+        w_offset += kernel_w;
+    }
+
+#endif	
+
+    sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1);
+    sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1);
+    sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1);
+    sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1);
+
+#if NCNN_image_shader
+
+    image3d_st1(top_blob, ivec3(gx2.x, gy2.x, 0), sum0);
+    image3d_st1(top_blob, ivec3(gx2.y, gy2.x, 0), sum1);
+    image3d_st1(top_blob, ivec3(gx2.x, gy2.y, 0), sum2);
+    image3d_st1(top_blob, ivec3(gx2.y, gy2.y, 0), sum3);
+
+#else
+
+    const int gi = gy * psc(outw) + gx;
+
+    buffer_st1(top_blob_data, gi, sum0);
+    if (gx + 1 < psc(outw)) buffer_st1(top_blob_data, gi + 1, sum1);
+    if (gy + 1 < psc(outh)) buffer_st1(top_blob_data, gi + psc(outw), sum2);
+    if (gy + 1 < psc(outh) && gx + 1 < psc(outw)) buffer_st1(top_blob_data, gi + psc(outw) + 1, sum3);
+
+#endif
+}