Skip to content

Commit

Permalink
deconvolution dynamic weight (#5119)
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui authored Oct 31, 2023
1 parent c96bc08 commit 4494aad
Show file tree
Hide file tree
Showing 44 changed files with 2,471 additions and 93 deletions.
4 changes: 4 additions & 0 deletions docs/developer-guide/operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,7 @@ y = activation(x3, act_type, act_params)
| 19 | output_pad_bottom| int | output_pad_right | |
| 20 | output_w | int | 0 | |
| 21 | output_h | int | output_w | |
| 28 | dynamic_weight| int | 0 | |

| weight | type | shape |
| ------------- | ----- | --------------------- |
Expand Down Expand Up @@ -558,6 +559,7 @@ y = activation(x3, act_type, act_params)
| 15 | pad_right | int | pad_left | |
| 18 | output_pad_right| int | 0 | |
| 20 | output_w | int | 0 | |
| 28 | dynamic_weight| int | 0 | |

| weight | type | shape |
| ------------- | ----- | --------------------- |
Expand Down Expand Up @@ -638,6 +640,7 @@ y = activation(x3, act_type, act_params)
| 19 | output_pad_bottom| int | output_pad_right | |
| 20 | output_w | int | 0 | |
| 21 | output_h | int | output_w | |
| 28 | dynamic_weight| int | 0 | |

| weight | type | shape |
| ------------- | ----- | --------------------- |
Expand Down Expand Up @@ -668,6 +671,7 @@ y = activation(x3, act_type, act_params)
| 15 | pad_right | int | pad_left | |
| 18 | output_pad_right| int | 0 | |
| 20 | output_w | int | 0 | |
| 28 | dynamic_weight| int | 0 | |

| weight | type | shape |
| ------------- | ----- | --------------------- |
Expand Down
4 changes: 2 additions & 2 deletions src/layer/arm/convolution_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -806,9 +806,9 @@ int Convolution_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<M
pd.set(1, _kernel_w);
pd.set(11, _kernel_h);
pd.set(2, dilation_w);
pd.set(21, dilation_h);
pd.set(12, dilation_h);
pd.set(3, stride_w);
pd.set(31, stride_h);
pd.set(13, stride_h);
pd.set(4, pad_left);
pd.set(15, pad_right);
pd.set(14, pad_top);
Expand Down
166 changes: 144 additions & 22 deletions src/layer/arm/deconvolution_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ Deconvolution_arm::Deconvolution_arm()

int Deconvolution_arm::create_pipeline(const Option& opt)
{
if (dynamic_weight)
return 0;

activation = create_activation_layer(activation_type, activation_params, opt);

#if NCNN_ARM82
Expand Down Expand Up @@ -750,6 +753,146 @@ int Deconvolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opti
return 0;
}

int Deconvolution_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
{
const Mat& bottom_blob = bottom_blobs[0];
const Mat& _weight_data = bottom_blobs[1];
Mat& top_blob = top_blobs[0];

const int _num_input = bottom_blob.c * bottom_blob.elempack;
const int _kernel_w = _weight_data.w;
const int _kernel_h = _weight_data.h;
const int _num_output = _weight_data.d * 1;

Mat weight_data_flattened;
flatten(_weight_data, weight_data_flattened, opt);
if (weight_data_flattened.empty())
return -100;

#if NCNN_ARM82
if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && weight_data_flattened.elembits() == 16)
{
Mat weight_data_flattened_fp32;
cast_float16_to_float32(weight_data_flattened, weight_data_flattened_fp32, opt);
weight_data_flattened = weight_data_flattened_fp32;
}
#endif // NCNN_ARM82
#if NCNN_BF16
if (opt.use_bf16_storage && weight_data_flattened.elembits() == 16)
{
Mat weight_data_flattened_fp32;
cast_bfloat16_to_float32(weight_data_flattened, weight_data_flattened_fp32, opt);
weight_data_flattened = weight_data_flattened_fp32;
}
#endif // NCNN_BF16

// weight_data_flattened as pack1
weight_data_flattened.w *= weight_data_flattened.elempack;
weight_data_flattened.elemsize /= weight_data_flattened.elempack;
weight_data_flattened.elempack = 1;

// transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw
Mat weight_data_transposed;
{
weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / 1, 4u, opt.workspace_allocator);
if (weight_data_transposed.empty())
return -100;

const int outch_g = _num_output / 1;
const int inch_g = _num_input / 1;
const int maxk = _kernel_h * _kernel_w;

for (int g = 0; g < 1; g++)
{
// reorder weight from inch-outch to outch-inch
float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
for (int i = 0; i < outch_g; i++)
{
for (int j = 0; j < inch_g; j++)
{
for (int k = 0; k < maxk; k++)
{
wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
}
}
}
}
}

Mat bias_data_flattened;
if (bias_term)
{
const Mat& _bias_data = bottom_blobs[2];
flatten(_bias_data, bias_data_flattened, opt);
if (bias_data_flattened.empty())
return -100;

#if NCNN_ARM82
if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && bias_data_flattened.elembits() == 16)
{
Mat bias_data_flattened_fp32;
cast_float16_to_float32(bias_data_flattened, bias_data_flattened_fp32, opt);
bias_data_flattened = bias_data_flattened_fp32;
}
#endif // NCNN_ARM82
#if NCNN_BF16
if (opt.use_bf16_storage && bias_data_flattened.elembits() == 16)
{
Mat bias_data_flattened_fp32;
cast_bfloat16_to_float32(bias_data_flattened, bias_data_flattened_fp32, opt);
bias_data_flattened = bias_data_flattened_fp32;
}
#endif // NCNN_BF16

// bias_data_flattened as pack1
bias_data_flattened.w *= bias_data_flattened.elempack;
bias_data_flattened.elemsize /= bias_data_flattened.elempack;
bias_data_flattened.elempack = 1;
}

ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Deconvolution);

ncnn::ParamDict pd;
pd.set(0, _num_output);
pd.set(1, _kernel_w);
pd.set(11, _kernel_h);
pd.set(2, dilation_w);
pd.set(12, dilation_h);
pd.set(3, stride_w);
pd.set(13, stride_h);
pd.set(4, pad_left);
pd.set(15, pad_right);
pd.set(14, pad_top);
pd.set(16, pad_bottom);
pd.set(18, output_pad_right);
pd.set(19, output_pad_bottom);
pd.set(20, output_w);
pd.set(21, output_h);
pd.set(5, bias_term);
pd.set(6, weight_data_transposed.w);
pd.set(9, activation_type);
pd.set(10, activation_params);

op->load_param(pd);

ncnn::Mat weights[2];
weights[0] = weight_data_transposed;
weights[1] = bias_data_flattened;

op->load_model(ncnn::ModelBinFromMatArray(weights));

op->create_pipeline(opt);

op->forward(bottom_blob, top_blob, opt);

op->destroy_pipeline(opt);

delete op;

return 0;
}

#if NCNN_BF16
int Deconvolution_arm::create_pipeline_bf16s(const Option& opt)
{
Expand Down Expand Up @@ -1167,28 +1310,7 @@ int Deconvolution_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, cons
kptr += maxk;
}

if (activation_type == 1)
{
sum = std::max(sum, 0.f);
}
else if (activation_type == 2)
{
float slope = activation_params[0];
sum = sum > 0.f ? sum : sum * slope;
}
else if (activation_type == 3)
{
float min = activation_params[0];
float max = activation_params[1];
if (sum < min)
sum = min;
if (sum > max)
sum = max;
}
else if (activation_type == 4)
{
sum = 1.f / (1.f + expf(-sum));
}
sum = activation_ss(sum, activation_type, activation_params);

outptr[j] = float32_to_bfloat16(sum);
}
Expand Down
2 changes: 2 additions & 0 deletions src/layer/arm/deconvolution_arm.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ class Deconvolution_arm : virtual public Deconvolution

virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;

virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;

protected:
#if NCNN_ARM82
int create_pipeline_fp16s(const Option& opt);
Expand Down
Loading

0 comments on commit 4494aad

Please sign in to comment.