Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deconvolution dynamic weight #5119

Merged
merged 7 commits into from
Oct 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/developer-guide/operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,7 @@ y = activation(x3, act_type, act_params)
| 19 | output_pad_bottom| int | output_pad_right | |
| 20 | output_w | int | 0 | |
| 21 | output_h | int | output_w | |
| 28 | dynamic_weight| int | 0 | |

| weight | type | shape |
| ------------- | ----- | --------------------- |
Expand Down Expand Up @@ -558,6 +559,7 @@ y = activation(x3, act_type, act_params)
| 15 | pad_right | int | pad_left | |
| 18 | output_pad_right| int | 0 | |
| 20 | output_w | int | 0 | |
| 28 | dynamic_weight| int | 0 | |

| weight | type | shape |
| ------------- | ----- | --------------------- |
Expand Down Expand Up @@ -638,6 +640,7 @@ y = activation(x3, act_type, act_params)
| 19 | output_pad_bottom| int | output_pad_right | |
| 20 | output_w | int | 0 | |
| 21 | output_h | int | output_w | |
| 28 | dynamic_weight| int | 0 | |

| weight | type | shape |
| ------------- | ----- | --------------------- |
Expand Down Expand Up @@ -668,6 +671,7 @@ y = activation(x3, act_type, act_params)
| 15 | pad_right | int | pad_left | |
| 18 | output_pad_right| int | 0 | |
| 20 | output_w | int | 0 | |
| 28 | dynamic_weight| int | 0 | |

| weight | type | shape |
| ------------- | ----- | --------------------- |
Expand Down
4 changes: 2 additions & 2 deletions src/layer/arm/convolution_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -806,9 +806,9 @@ int Convolution_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<M
pd.set(1, _kernel_w);
pd.set(11, _kernel_h);
pd.set(2, dilation_w);
pd.set(21, dilation_h);
pd.set(12, dilation_h);
pd.set(3, stride_w);
pd.set(31, stride_h);
pd.set(13, stride_h);
pd.set(4, pad_left);
pd.set(15, pad_right);
pd.set(14, pad_top);
Expand Down
166 changes: 144 additions & 22 deletions src/layer/arm/deconvolution_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ Deconvolution_arm::Deconvolution_arm()

int Deconvolution_arm::create_pipeline(const Option& opt)
{
if (dynamic_weight)
return 0;

activation = create_activation_layer(activation_type, activation_params, opt);

#if NCNN_ARM82
Expand Down Expand Up @@ -750,6 +753,146 @@ int Deconvolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opti
return 0;
}

int Deconvolution_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
{
const Mat& bottom_blob = bottom_blobs[0];
const Mat& _weight_data = bottom_blobs[1];
Mat& top_blob = top_blobs[0];

const int _num_input = bottom_blob.c * bottom_blob.elempack;
const int _kernel_w = _weight_data.w;
const int _kernel_h = _weight_data.h;
const int _num_output = _weight_data.d * 1;

Mat weight_data_flattened;
flatten(_weight_data, weight_data_flattened, opt);
if (weight_data_flattened.empty())
return -100;

#if NCNN_ARM82
if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && weight_data_flattened.elembits() == 16)
{
Mat weight_data_flattened_fp32;
cast_float16_to_float32(weight_data_flattened, weight_data_flattened_fp32, opt);
weight_data_flattened = weight_data_flattened_fp32;
}
#endif // NCNN_ARM82
#if NCNN_BF16
if (opt.use_bf16_storage && weight_data_flattened.elembits() == 16)
{
Mat weight_data_flattened_fp32;
cast_bfloat16_to_float32(weight_data_flattened, weight_data_flattened_fp32, opt);
weight_data_flattened = weight_data_flattened_fp32;
}
#endif // NCNN_BF16

// weight_data_flattened as pack1
weight_data_flattened.w *= weight_data_flattened.elempack;
weight_data_flattened.elemsize /= weight_data_flattened.elempack;
weight_data_flattened.elempack = 1;

// transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw
Mat weight_data_transposed;
{
weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / 1, 4u, opt.workspace_allocator);
if (weight_data_transposed.empty())
return -100;

const int outch_g = _num_output / 1;
const int inch_g = _num_input / 1;
const int maxk = _kernel_h * _kernel_w;

for (int g = 0; g < 1; g++)
{
// reorder weight from inch-outch to outch-inch
float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
for (int i = 0; i < outch_g; i++)
{
for (int j = 0; j < inch_g; j++)
{
for (int k = 0; k < maxk; k++)
{
wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
}
}
}
}
}

Mat bias_data_flattened;
if (bias_term)
{
const Mat& _bias_data = bottom_blobs[2];
flatten(_bias_data, bias_data_flattened, opt);
if (bias_data_flattened.empty())
return -100;

#if NCNN_ARM82
if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && bias_data_flattened.elembits() == 16)
{
Mat bias_data_flattened_fp32;
cast_float16_to_float32(bias_data_flattened, bias_data_flattened_fp32, opt);
bias_data_flattened = bias_data_flattened_fp32;
}
#endif // NCNN_ARM82
#if NCNN_BF16
if (opt.use_bf16_storage && bias_data_flattened.elembits() == 16)
{
Mat bias_data_flattened_fp32;
cast_bfloat16_to_float32(bias_data_flattened, bias_data_flattened_fp32, opt);
bias_data_flattened = bias_data_flattened_fp32;
}
#endif // NCNN_BF16

// bias_data_flattened as pack1
bias_data_flattened.w *= bias_data_flattened.elempack;
bias_data_flattened.elemsize /= bias_data_flattened.elempack;
bias_data_flattened.elempack = 1;
}

ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Deconvolution);

ncnn::ParamDict pd;
pd.set(0, _num_output);
pd.set(1, _kernel_w);
pd.set(11, _kernel_h);
pd.set(2, dilation_w);
pd.set(12, dilation_h);
pd.set(3, stride_w);
pd.set(13, stride_h);
pd.set(4, pad_left);
pd.set(15, pad_right);
pd.set(14, pad_top);
pd.set(16, pad_bottom);
pd.set(18, output_pad_right);
pd.set(19, output_pad_bottom);
pd.set(20, output_w);
pd.set(21, output_h);
pd.set(5, bias_term);
pd.set(6, weight_data_transposed.w);
pd.set(9, activation_type);
pd.set(10, activation_params);

op->load_param(pd);

ncnn::Mat weights[2];
weights[0] = weight_data_transposed;
weights[1] = bias_data_flattened;

op->load_model(ncnn::ModelBinFromMatArray(weights));

op->create_pipeline(opt);

op->forward(bottom_blob, top_blob, opt);

op->destroy_pipeline(opt);

delete op;

return 0;
}

#if NCNN_BF16
int Deconvolution_arm::create_pipeline_bf16s(const Option& opt)
{
Expand Down Expand Up @@ -1167,28 +1310,7 @@ int Deconvolution_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, cons
kptr += maxk;
}

if (activation_type == 1)
{
sum = std::max(sum, 0.f);
}
else if (activation_type == 2)
{
float slope = activation_params[0];
sum = sum > 0.f ? sum : sum * slope;
}
else if (activation_type == 3)
{
float min = activation_params[0];
float max = activation_params[1];
if (sum < min)
sum = min;
if (sum > max)
sum = max;
}
else if (activation_type == 4)
{
sum = 1.f / (1.f + expf(-sum));
}
sum = activation_ss(sum, activation_type, activation_params);

outptr[j] = float32_to_bfloat16(sum);
}
Expand Down
2 changes: 2 additions & 0 deletions src/layer/arm/deconvolution_arm.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ class Deconvolution_arm : virtual public Deconvolution

virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;

virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;

protected:
#if NCNN_ARM82
int create_pipeline_fp16s(const Option& opt);
Expand Down
Loading
Loading