diff --git a/docs/developer-guide/operation-param-weight-table.md b/docs/developer-guide/operation-param-weight-table.md index aa5c99adf5e6..fb77d5c250b7 100644 --- a/docs/developer-guide/operation-param-weight-table.md +++ b/docs/developer-guide/operation-param-weight-table.md @@ -151,6 +151,9 @@ ||2|width_scale|1.f| ||3|output_height|0| ||4|output_width|0| +|LinearInt8|0|in_dim|0|scale weight| +||1|out_dim|0| +||2|group_size|0| |Log|0|base|-1.f| ||1|scale|1.f| ||2|shift|0.f| diff --git a/docs/developer-guide/operators.md b/docs/developer-guide/operators.md index 56e7516a36a0..6db563b429f1 100644 --- a/docs/developer-guide/operators.md +++ b/docs/developer-guide/operators.md @@ -45,6 +45,7 @@ * [InstanceNorm](#instancenorm) * [Interp](#interp) * [LayerNorm](#layernorm) +* [LinearInt8](#linearint8) * [Log](#log) * [LRN](#lrn) * [LSTM](#lstm) @@ -1104,6 +1105,24 @@ y = x * gamma + beta by elementwise | gamma_data | float | [affine_size] | | beta_data | float | [affine_size] | +# LinearInt8 +``` +y = x (WS)^T +``` + +* one_blob_only + +| param id | name | type | default | description | +| --------- | ------------- | ----- | --------- | ----------------- | +| 0 | in_dim | int | 0 | | +| 1 | out_dim | int | 0 | | +| 2 | group_size | int | 0 | | + +| weight | type | shape | +| ------------- | ----- | --------------------- | +| scale | float | [in_dim * out_dim / group_size] | +| weight | int8 | [in_dim, out_dim] | + # Log ``` if base == -1 y = log(shift + x * scale) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4a4ea24e6365..4dd6812f5c06 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -165,6 +165,7 @@ ncnn_add_layer(CopyTo) ncnn_add_layer(Erf) ncnn_add_layer(Diag) ncnn_add_layer(CELU) +ncnn_add_layer(LinearInt8) if(NCNN_VULKAN) ncnn_add_shader(${CMAKE_CURRENT_SOURCE_DIR}/convert_ycbcr.comp) diff --git a/src/layer/arm/linearint8_arm.cpp b/src/layer/arm/linearint8_arm.cpp new file mode 100644 index 000000000000..b0cff23423b7 --- /dev/null +++ b/src/layer/arm/linearint8_arm.cpp @@ -0,0 +1,91 @@ +#include "linearint8_arm.h" + +#if __ARM_NEON +#include +#endif // __ARM_NEON + +namespace ncnn { + +int LinearInt8_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const +{ + if (bottom_blob.dims != 2 || bottom_blob.w != in_dim) + return -1; + + int w = bottom_blob.w; + int h = bottom_blob.h; + size_t elemsize = bottom_blob.elemsize; + + top_blob.create(out_dim, h, elemsize, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + const int8_t* wt = (const int8_t*)weight; + +#if (__ARM_NEON && __aarch64__) + + float zero = 0.0f; + + if (!(w % group_size) && !(group_size % 8)) + { + for (int j = 0; j < h; j++) + { + const float* m = bottom_blob.row(j); + float* out = top_blob.row(j); + + #pragma omp parallel for num_threads(opt.num_threads) + for (int p = 0; p < out_dim; p++) + { + int base = w * p; + float32x4_t acc_p0 = vld1q_dup_f32(&zero), acc_p1 = vld1q_dup_f32(&zero); + for (int k = 0; k < w; k += group_size) + { + int scales_index = (base + k) / group_size; + int index = base + k; + const float* sc = (const float*)scales + scales_index; + for (int i = 0, ind = index; i < group_size; i += 8, ind += 8) + { + int8x8_t i8x8 = vld1_s8(wt + ind); + int16x8_t i16x8 = vmovl_s8(i8x8); + int32x4_t i32_0 = vmovl_s16(vget_low_s16(i16x8)); + int32x4_t i32_1 = vmovl_s16(vget_high_s16(i16x8)); + float32x4_t wt_p0 = vcvtq_f32_s32(i32_0); + float32x4_t wt_p1 = vcvtq_f32_s32(i32_1); + float32x4_t m_p0 = vld1q_f32(m + k + i); + float32x4_t m_p1 = vld1q_f32(m + k + i + 4); + float32x4_t sc_p = vld1q_dup_f32(sc); + float32x4_t acc_real0 = vmulq_f32(wt_p0, sc_p); + float32x4_t acc_real1 = vmulq_f32(wt_p1, sc_p); + acc_p0 = vmlaq_f32(acc_p0, m_p0, acc_real0); + acc_p1 = vmlaq_f32(acc_p1, m_p1, acc_real1); + } + } + out[p] = vaddvq_f32(acc_p0) + vaddvq_f32(acc_p1); + } + } + return 0; + } +#endif + + for (int j = 0; j < h; j++) + { + const float* m = bottom_blob.row(j); + float* out = top_blob.row(j); + + #pragma omp parallel for num_threads(opt.num_threads) + for (int p = 0; p < out_dim; p++) + { + int base = w * p; + float acc = 0.0f; + for (int i = 0, index = base, scales_index = index / group_size; i < w; i++, index++) + { + acc += m[i] * wt[index] * scales[scales_index]; + if (index % group_size == group_size - 1) scales_index++; + } + out[p] = acc; + } + } + + return 0; +} + +} // namespace ncnn diff --git a/src/layer/arm/linearint8_arm.h b/src/layer/arm/linearint8_arm.h new file mode 100644 index 000000000000..76b2255f4fa5 --- /dev/null +++ b/src/layer/arm/linearint8_arm.h @@ -0,0 +1,31 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef LAYER_LINEARINT8_ARM_H +#define LAYER_LINEARINT8_ARM_H + +#include "net.h" +#include "linearint8.h" + +namespace ncnn { + +class LinearInt8_arm : virtual public LinearInt8 +{ +public: + virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; +}; + +} // namespace ncnn + +#endif // LAYER_LINEARINT8_H diff --git a/src/layer/linearint8.cpp b/src/layer/linearint8.cpp new file mode 100644 index 000000000000..6c3e2d7da107 --- /dev/null +++ b/src/layer/linearint8.cpp @@ -0,0 +1,81 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "linearint8.h" + +namespace ncnn { + +LinearInt8::LinearInt8() +{ + one_blob_only = true; + support_inplace = false; +} + +int LinearInt8::load_param(const ParamDict& pd) +{ + in_dim = pd.get(0, 0); + out_dim = pd.get(1, 0); + group_size = pd.get(2, 1); + if (in_dim * out_dim % group_size) + return -1; + return 0; +} + +int LinearInt8::load_model(const ModelBin& mb) +{ + scales = mb.load(in_dim * out_dim / group_size, 1); + weight = mb.load(in_dim * out_dim, 0); + if (weight.elemsize != 1) + return -1; + return 0; +} + +int LinearInt8::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const +{ + if (bottom_blob.dims != 2 || bottom_blob.w != in_dim) + return -1; + + int w = bottom_blob.w; + int h = bottom_blob.h; + size_t elemsize = bottom_blob.elemsize; + + top_blob.create(out_dim, h, elemsize, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + const int8_t* wt = (const int8_t*)weight; + + for (int j = 0; j < h; j++) + { + const float* m = bottom_blob.row(j); + float* out = top_blob.row(j); + + #pragma omp parallel for num_threads(opt.num_threads) + for (int p = 0; p < out_dim; p++) + { + int base = w * p; + float acc = 0.0f; + for (int i = 0; i < w; i++) + { + int index = base + i; + acc += m[i] * wt[index] * scales[index / group_size]; + } + out[p] = acc; + } + } + + return 0; +} + +} // namespace ncnn diff --git a/src/layer/linearint8.h b/src/layer/linearint8.h new file mode 100644 index 000000000000..981177d1c8b8 --- /dev/null +++ b/src/layer/linearint8.h @@ -0,0 +1,42 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef LAYER_LINEARINT8_H +#define LAYER_LINEARINT8_H + +#include "layer.h" + +namespace ncnn { + +class LinearInt8 : public Layer +{ +public: + LinearInt8(); + + virtual int load_param(const ParamDict& pd); + + virtual int load_model(const ModelBin& mb); + + virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + + int in_dim; + int out_dim; + int group_size; + Mat weight; + Mat scales; +}; + +} // namespace ncnn + +#endif // LAYER_LINEARINT8_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 21de08c6ff35..bdf0dee43d1a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -114,6 +114,7 @@ ncnn_add_layer_test(InnerProduct) ncnn_add_layer_test(InstanceNorm) ncnn_add_layer_test(Interp) ncnn_add_layer_test(LayerNorm) +ncnn_add_layer_test(LinearInt8) ncnn_add_layer_test(LRN) ncnn_add_layer_test(LSTM) ncnn_add_layer_test(MatMul) diff --git a/tests/test_linearint8.cpp b/tests/test_linearint8.cpp new file mode 100644 index 000000000000..a4cea2a027b7 --- /dev/null +++ b/tests/test_linearint8.cpp @@ -0,0 +1,73 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "layer/linearint8.h" +#include "testutil.h" + +static int test_linearint8(const ncnn::Mat& a, int in_dim, int out_dim, int group_size) +{ + if (in_dim * out_dim % group_size) + { + fprintf(stderr, "malformed test case: in_dim=%d out_dim=%d group_size=%d\n", in_dim, out_dim, group_size); + return -1; + } + if (a.w != in_dim) + { + fprintf(stderr, "malformed test case: in_dim=%d out_dim=%d group_size=%d\n", in_dim, out_dim, group_size); + return -1; + } + ncnn::ParamDict pd; + pd.set(0, in_dim); + pd.set(1, out_dim); + pd.set(2, group_size); + + std::vector weights(2); + weights[0] = RandomMat(in_dim * out_dim / group_size); + weights[1] = RandomS8Mat(in_dim * out_dim); + + int ret = test_layer("LinearInt8", pd, weights, a); + if (ret != 0) + { + fprintf(stderr, "test_linearint8 failed a.dims=%d a=(%d, %d) in_dim=%d out_dim=%d group_size=%d\n", a.dims, a.h, a.w, in_dim, out_dim, group_size); + } + + return ret; +} + +static int test_linearint8_0() +{ + ncnn::Mat a = RandomMat(10, 1); + + return 0 + || test_linearint8(a, 10, 6, 4) + || test_linearint8(a, 10, 8, 4) + || test_linearint8(a, 10, 10, 4); +} + +static int test_linearint8_1() +{ + ncnn::Mat a = RandomMat(16, 1); + + return 0 + || test_linearint8(a, 16, 6, 16); +} + +int main() +{ + SRAND(7767517); + + return 0 + || test_linearint8_0() + || test_linearint8_1(); +} diff --git a/tests/testutil.h b/tests/testutil.h index b879fa527fbe..a826ca78955f 100644 --- a/tests/testutil.h +++ b/tests/testutil.h @@ -1458,6 +1458,11 @@ int test_layer_opt(const char* layer_type, const ncnn::ParamDict& pd, const std: weights_fp16.resize(weights.size()); for (size_t j = 0; j < weights.size(); j++) { + if (weights[j].elemsize != 4) + { + weights_fp16[j] = weights[j].clone(); + continue; + } ncnn::Mat tmp; ncnn::cast_float32_to_bfloat16(weights[j], tmp, opt); ncnn::cast_bfloat16_to_float32(tmp, weights_fp16[j], opt); @@ -1469,6 +1474,11 @@ int test_layer_opt(const char* layer_type, const ncnn::ParamDict& pd, const std: weights_fp16.resize(weights.size()); for (size_t j = 0; j < weights.size(); j++) { + if (weights[j].elemsize != 4) + { + weights_fp16[j] = weights[j].clone(); + continue; + } ncnn::Mat tmp; ncnn::cast_float32_to_float16(weights[j], tmp, opt); ncnn::cast_float16_to_float32(tmp, weights_fp16[j], opt);