From 1436252c019dec9a92eb9c02ffe54d9c662d92bb Mon Sep 17 00:00:00 2001 From: Leran Wang <2428592483@qq.com> Date: Tue, 5 Sep 2023 11:46:57 +0800 Subject: [PATCH 01/14] Rebase: Rebase: Rebase: Add custom layer for int8-quantized LLM --- src/CMakeLists.txt | 1 + src/layer/linearint8.cpp | 76 ++++++++++++++++++++++++++++++++++++++++ src/layer/linearint8.h | 37 +++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 src/layer/linearint8.cpp create mode 100644 src/layer/linearint8.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4a4ea24e6365..4dd6812f5c06 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -165,6 +165,7 @@ ncnn_add_layer(CopyTo) ncnn_add_layer(Erf) ncnn_add_layer(Diag) ncnn_add_layer(CELU) +ncnn_add_layer(LinearInt8) if(NCNN_VULKAN) ncnn_add_shader(${CMAKE_CURRENT_SOURCE_DIR}/convert_ycbcr.comp) diff --git a/src/layer/linearint8.cpp b/src/layer/linearint8.cpp new file mode 100644 index 000000000000..8be3c0260209 --- /dev/null +++ b/src/layer/linearint8.cpp @@ -0,0 +1,76 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "linearint8.h" + +namespace ncnn { + +LinearInt8::LinearInt8() +{ + one_blob_only = true; + support_inplace = false; +} + +int LinearInt8::load_param(const ParamDict& pd) +{ + in_dim = pd.get(0, 0); + out_dim = pd.get(1, 0); + group_size = pd.get(2, 1); + return 0; +} + +int LinearInt8::load_model(const ModelBin& mb) +{ + scales = mb.load(in_dim * out_dim / group_size, 1); + weight = mb.load(in_dim * out_dim, 0); + if (weight.elemsize != 1) + return -1; + return 0; +} + +int LinearInt8::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const +{ + if (bottom_blob.dims != 2 || bottom_blob.w != in_dim) + return -1; + + int w = bottom_blob.w; + int h = bottom_blob.h; + size_t elemsize = bottom_blob.elemsize; + + top_blob.create(out_dim, h, elemsize, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + for (int j = 0; j < h; j++) + { + const float* m = bottom_blob.row(j); + float* out = top_blob.row(j); + +#pragma omp parallel for num_threads(opt.num_threads) + for (int p = 0; p < out_dim; p++) + { + int base = w * p; + out[p] = 0; + for (int i = 0; i < w; i++) + { + int index = base + i; + out[p] += m[i] * ((const int8_t*)weight)[index] * scales[index / group_size]; + } + } + } + + return 0; +} + +} // namespace ncnn \ No newline at end of file diff --git a/src/layer/linearint8.h b/src/layer/linearint8.h new file mode 100644 index 000000000000..374a139eb1ba --- /dev/null +++ b/src/layer/linearint8.h @@ -0,0 +1,37 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "layer.h" + +namespace ncnn { + +class LinearInt8 : public Layer +{ +public: + LinearInt8(); + + virtual int load_param(const ParamDict& pd); + + virtual int load_model(const ModelBin& mb); + + virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + + int in_dim; + int out_dim; + int group_size; + Mat weight; + Mat scales; +}; + +} // namespace ncnn \ No newline at end of file From 6e8d028b6e804639ac16072a3ffecc53380a46da Mon Sep 17 00:00:00 2001 From: Leran Wang <2428592483@qq.com> Date: Tue, 5 Sep 2023 11:48:41 +0800 Subject: [PATCH 02/14] Fix trailing newlines --- src/layer/linearint8.cpp | 2 +- src/layer/linearint8.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/layer/linearint8.cpp b/src/layer/linearint8.cpp index 8be3c0260209..b58ba086cd53 100644 --- a/src/layer/linearint8.cpp +++ b/src/layer/linearint8.cpp @@ -73,4 +73,4 @@ int LinearInt8::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt return 0; } -} // namespace ncnn \ No newline at end of file +} // namespace ncnn diff --git a/src/layer/linearint8.h b/src/layer/linearint8.h index 374a139eb1ba..a4287307d87c 100644 --- a/src/layer/linearint8.h +++ b/src/layer/linearint8.h @@ -34,4 +34,4 @@ class LinearInt8 : public Layer Mat scales; }; -} // namespace ncnn \ No newline at end of file +} // namespace ncnn From 3b40f308aba7af575c19cc3458c0453ea398d8ae Mon Sep 17 00:00:00 2001 From: lrw04 Date: Tue, 5 Sep 2023 03:50:44 +0000 Subject: [PATCH 03/14] apply code-format changes --- src/layer/linearint8.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layer/linearint8.cpp b/src/layer/linearint8.cpp index b58ba086cd53..30d87948f512 100644 --- a/src/layer/linearint8.cpp +++ b/src/layer/linearint8.cpp @@ -57,7 +57,7 @@ int LinearInt8::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt const float* m = bottom_blob.row(j); float* out = top_blob.row(j); -#pragma omp parallel for num_threads(opt.num_threads) + #pragma omp parallel for num_threads(opt.num_threads) for (int p = 0; p < out_dim; p++) { int base = w * p; From 2ecd5afc4af3bdc8f974e22455a0d3f91ce820dd Mon Sep 17 00:00:00 2001 From: Leran Wang <2428592483@qq.com> Date: Tue, 5 Sep 2023 17:04:19 +0800 Subject: [PATCH 04/14] Add more error checking --- src/layer/linearint8.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/layer/linearint8.cpp b/src/layer/linearint8.cpp index 30d87948f512..8b3d52cddfb8 100644 --- a/src/layer/linearint8.cpp +++ b/src/layer/linearint8.cpp @@ -27,6 +27,8 @@ int LinearInt8::load_param(const ParamDict& pd) in_dim = pd.get(0, 0); out_dim = pd.get(1, 0); group_size = pd.get(2, 1); + if (in_dim * out_dim % group_size) + return -1; return 0; } From 5184b87fd23728d8ac89c38c11b90aa321edb7d7 Mon Sep 17 00:00:00 2001 From: Leran Wang <2428592483@qq.com> Date: Tue, 5 Sep 2023 21:37:28 +0800 Subject: [PATCH 05/14] Add include guard --- src/layer/linearint8.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/layer/linearint8.h b/src/layer/linearint8.h index a4287307d87c..981177d1c8b8 100644 --- a/src/layer/linearint8.h +++ b/src/layer/linearint8.h @@ -12,6 +12,9 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +#ifndef LAYER_LINEARINT8_H +#define LAYER_LINEARINT8_H + #include "layer.h" namespace ncnn { @@ -35,3 +38,5 @@ class LinearInt8 : public Layer }; } // namespace ncnn + +#endif // LAYER_LINEARINT8_H From e53be3cac957011ca03da09ffe23668fe8973091 Mon Sep 17 00:00:00 2001 From: Leran Wang <2428592483@qq.com> Date: Wed, 6 Sep 2023 15:51:12 +0800 Subject: [PATCH 06/14] Initial commit for tests --- tests/CMakeLists.txt | 1 + tests/test_linearint8.cpp | 78 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 tests/test_linearint8.cpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 21de08c6ff35..bdf0dee43d1a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -114,6 +114,7 @@ ncnn_add_layer_test(InnerProduct) ncnn_add_layer_test(InstanceNorm) ncnn_add_layer_test(Interp) ncnn_add_layer_test(LayerNorm) +ncnn_add_layer_test(LinearInt8) ncnn_add_layer_test(LRN) ncnn_add_layer_test(LSTM) ncnn_add_layer_test(MatMul) diff --git a/tests/test_linearint8.cpp b/tests/test_linearint8.cpp new file mode 100644 index 000000000000..d158cf45fe25 --- /dev/null +++ b/tests/test_linearint8.cpp @@ -0,0 +1,78 @@ +// TODO + +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "layer/linearint8.h" +#include "testutil.h" + +static int test_linearint8(const ncnn::Mat& a, int in_dim, int out_dim, int group_size) +{ + if (in_dim * out_dim % group_size) + { + fprintf(stderr, "malformed test case: in_dim=%d out_dim=%d group_size=%d\n", in_dim, out_dim, group_size); + return -1; + } + if (a.w != in_dim) + { + fprintf(stderr, "malformed test case: in_dim=%d out_dim=%d group_size=%d\n", in_dim, out_dim, group_size); + return -1; + } + ncnn::ParamDict pd; + pd.set(0, in_dim); + pd.set(1, out_dim); + pd.set(2, group_size); + + std::vector weights(2); + weights[0] = RandomMat(in_dim * out_dim / group_size); + weights[1] = RandomS8Mat(in_dim * out_dim); + + int ret = test_layer("LinearInt8", pd, weights, a); + if (ret != 0) + { + fprintf(stderr, "test_linearint8 failed a.dims=%d a=(%d, %d) in_dim=%d out_dim=%d group_size=%d\n", a.dims, a.h, a.w, in_dim, out_dim, group_size); + } + + return ret; +} + +static int test_lrn_0() +{ + ncnn::Mat a = RandomMat(10, 1); + + return 0 + || test_linearint8(a, 10, 6, 4) + || test_linearint8(a, 10, 8, 4) + || test_linearint8(a, 10, 10, 4); +} + +static int test_lrn_1() +{ + ncnn::Mat a = RandomMat(16, 1); + + return 0 + || test_linearint8(a, 16, 6, 16) + || test_linearint8(a, 16, 6, 16) + || test_linearint8(a, 16, 6, 16) + || test_linearint8(a, 16, 6, 16); +} + +int main() +{ + SRAND(7767517); + + return 0 + || test_lrn_0() + || test_lrn_1(); +} From 01f30ecc00b6fe6d54291ac2fc65a4bebcf64702 Mon Sep 17 00:00:00 2001 From: Leran Wang <2428592483@qq.com> Date: Wed, 6 Sep 2023 16:23:55 +0800 Subject: [PATCH 07/14] Change test function names --- tests/test_linearint8.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/test_linearint8.cpp b/tests/test_linearint8.cpp index d158cf45fe25..5f7af39af884 100644 --- a/tests/test_linearint8.cpp +++ b/tests/test_linearint8.cpp @@ -47,7 +47,7 @@ static int test_linearint8(const ncnn::Mat& a, int in_dim, int out_dim, int grou return ret; } -static int test_lrn_0() +static int test_linearint8_0() { ncnn::Mat a = RandomMat(10, 1); @@ -57,14 +57,11 @@ static int test_lrn_0() || test_linearint8(a, 10, 10, 4); } -static int test_lrn_1() +static int test_linearint8_1() { ncnn::Mat a = RandomMat(16, 1); return 0 - || test_linearint8(a, 16, 6, 16) - || test_linearint8(a, 16, 6, 16) - || test_linearint8(a, 16, 6, 16) || test_linearint8(a, 16, 6, 16); } @@ -73,6 +70,6 @@ int main() SRAND(7767517); return 0 - || test_lrn_0() - || test_lrn_1(); + || test_linearint8_0() + || test_linearint8_1(); } From 52cbc590308f1bf54ef4e25eb56e5971d87cb5ea Mon Sep 17 00:00:00 2001 From: Leran Wang <2428592483@qq.com> Date: Wed, 6 Sep 2023 17:09:46 +0800 Subject: [PATCH 08/14] Add general optimizations --- src/layer/arm/linearint8_arm.cpp | 91 ++++++++++++++++++++++++++++++++ src/layer/arm/linearint8_arm.h | 31 +++++++++++ src/layer/linearint8.cpp | 7 ++- 3 files changed, 127 insertions(+), 2 deletions(-) create mode 100644 src/layer/arm/linearint8_arm.cpp create mode 100644 src/layer/arm/linearint8_arm.h diff --git a/src/layer/arm/linearint8_arm.cpp b/src/layer/arm/linearint8_arm.cpp new file mode 100644 index 000000000000..59369c2c7549 --- /dev/null +++ b/src/layer/arm/linearint8_arm.cpp @@ -0,0 +1,91 @@ +#include "linearint8_arm.h" + +#if __ARM_NEON +#include +#endif // __ARM_NEON + +namespace ncnn { + +int LinearInt8_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const +{ + if (bottom_blob.dims != 2 || bottom_blob.w != in_dim) + return -1; + + int w = bottom_blob.w; + int h = bottom_blob.h; + size_t elemsize = bottom_blob.elemsize; + + top_blob.create(out_dim, h, elemsize, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + const int8_t* wt = (const int8_t*)weight; + +#if (__ARM_NEON && __aarch64__) + + float zero = 0.0f; + + if (!(w % group_size) && !(group_size % 8)) + { + for (int j = 0; j < h; j++) + { + const float* m = bottom_blob.row(j); + float* out = top_blob.row(j); + +#pragma omp parallel for num_threads(opt.num_threads) + for (int p = 0; p < out_dim; p++) + { + int base = w * p; + float32x4_t acc_p0 = vld1q_dup_f32(&zero), acc_p1 = vld1q_dup_f32(&zero); + for (int k = 0; k < w; k += group_size) + { + int scales_index = (base + k) / group_size; + int index = base + k; + const float* sc = (const float*)scales + scales_index; + for (int i = 0, ind = index; i < group_size; i += 8, ind += 8) + { + int8x8_t i8x8 = vld1_s8(wt + ind); + int16x8_t i16x8 = vmovl_s8(i8x8); + int32x4_t i32_0 = vmovl_s16(vget_low_s16(i16x8)); + int32x4_t i32_1 = vmovl_s16(vget_high_s16(i16x8)); + float32x4_t wt_p0 = vcvtq_f32_s32(i32_0); + float32x4_t wt_p1 = vcvtq_f32_s32(i32_1); + float32x4_t m_p0 = vld1q_f32(m + k + i); + float32x4_t m_p1 = vld1q_f32(m + k + i + 4); + float32x4_t sc_p = vld1q_dup_f32(sc); + float32x4_t acc_real0 = vmulq_f32(wt_p0, sc_p); + float32x4_t acc_real1 = vmulq_f32(wt_p1, sc_p); + acc_p0 = vmlaq_f32(acc_p0, m_p0, acc_real0); + acc_p1 = vmlaq_f32(acc_p1, m_p1, acc_real1); + } + } + out[p] = vaddvq_f32(acc_p0) + vaddvq_f32(acc_p1); + } + } + return 0; + } +#endif + + for (int j = 0; j < h; j++) + { + const float* m = bottom_blob.row(j); + float* out = top_blob.row(j); + +#pragma omp parallel for num_threads(opt.num_threads) + for (int p = 0; p < out_dim; p++) + { + int base = w * p; + float acc = 0.0f; + for (int i = 0, index = base, scales_index = index / group_size; i < w; i++, index++) + { + acc += m[i] * wt[index] * scales[scales_index]; + if (index % group_size == group_size - 1) scales_index++; + } + out[p] = acc; + } + } + + return 0; +} + +} // namespace ncnn \ No newline at end of file diff --git a/src/layer/arm/linearint8_arm.h b/src/layer/arm/linearint8_arm.h new file mode 100644 index 000000000000..76b2255f4fa5 --- /dev/null +++ b/src/layer/arm/linearint8_arm.h @@ -0,0 +1,31 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef LAYER_LINEARINT8_ARM_H +#define LAYER_LINEARINT8_ARM_H + +#include "net.h" +#include "linearint8.h" + +namespace ncnn { + +class LinearInt8_arm : virtual public LinearInt8 +{ +public: + virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; +}; + +} // namespace ncnn + +#endif // LAYER_LINEARINT8_H diff --git a/src/layer/linearint8.cpp b/src/layer/linearint8.cpp index 8b3d52cddfb8..3967478992bb 100644 --- a/src/layer/linearint8.cpp +++ b/src/layer/linearint8.cpp @@ -54,6 +54,8 @@ int LinearInt8::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt if (top_blob.empty()) return -100; + const int8_t *wt = (const int8_t *)weight; + for (int j = 0; j < h; j++) { const float* m = bottom_blob.row(j); @@ -63,12 +65,13 @@ int LinearInt8::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt for (int p = 0; p < out_dim; p++) { int base = w * p; - out[p] = 0; + float acc = 0.0f; for (int i = 0; i < w; i++) { int index = base + i; - out[p] += m[i] * ((const int8_t*)weight)[index] * scales[index / group_size]; + acc += m[i] * wt[index] * scales[index / group_size]; } + out[p] = acc; } } From 9426487871e69cffaf2d359090f97cf15a60333f Mon Sep 17 00:00:00 2001 From: lrw04 Date: Wed, 6 Sep 2023 09:12:02 +0000 Subject: [PATCH 09/14] apply code-format changes --- src/layer/arm/linearint8_arm.cpp | 4 ++-- src/layer/linearint8.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/layer/arm/linearint8_arm.cpp b/src/layer/arm/linearint8_arm.cpp index 59369c2c7549..1608339b6fb2 100644 --- a/src/layer/arm/linearint8_arm.cpp +++ b/src/layer/arm/linearint8_arm.cpp @@ -32,7 +32,7 @@ int LinearInt8_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& const float* m = bottom_blob.row(j); float* out = top_blob.row(j); -#pragma omp parallel for num_threads(opt.num_threads) + #pragma omp parallel for num_threads(opt.num_threads) for (int p = 0; p < out_dim; p++) { int base = w * p; @@ -71,7 +71,7 @@ int LinearInt8_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& const float* m = bottom_blob.row(j); float* out = top_blob.row(j); -#pragma omp parallel for num_threads(opt.num_threads) + #pragma omp parallel for num_threads(opt.num_threads) for (int p = 0; p < out_dim; p++) { int base = w * p; diff --git a/src/layer/linearint8.cpp b/src/layer/linearint8.cpp index 3967478992bb..6c3e2d7da107 100644 --- a/src/layer/linearint8.cpp +++ b/src/layer/linearint8.cpp @@ -54,7 +54,7 @@ int LinearInt8::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt if (top_blob.empty()) return -100; - const int8_t *wt = (const int8_t *)weight; + const int8_t* wt = (const int8_t*)weight; for (int j = 0; j < h; j++) { From d2ce74bb7fc2a4faf6d28d16c6c234a9f235bdf7 Mon Sep 17 00:00:00 2001 From: Leran Wang <2428592483@qq.com> Date: Thu, 7 Sep 2023 14:29:52 +0800 Subject: [PATCH 10/14] Fix int8 weights being converted into fp16 or bf16 --- tests/testutil.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/testutil.h b/tests/testutil.h index b879fa527fbe..a826ca78955f 100644 --- a/tests/testutil.h +++ b/tests/testutil.h @@ -1458,6 +1458,11 @@ int test_layer_opt(const char* layer_type, const ncnn::ParamDict& pd, const std: weights_fp16.resize(weights.size()); for (size_t j = 0; j < weights.size(); j++) { + if (weights[j].elemsize != 4) + { + weights_fp16[j] = weights[j].clone(); + continue; + } ncnn::Mat tmp; ncnn::cast_float32_to_bfloat16(weights[j], tmp, opt); ncnn::cast_bfloat16_to_float32(tmp, weights_fp16[j], opt); @@ -1469,6 +1474,11 @@ int test_layer_opt(const char* layer_type, const ncnn::ParamDict& pd, const std: weights_fp16.resize(weights.size()); for (size_t j = 0; j < weights.size(); j++) { + if (weights[j].elemsize != 4) + { + weights_fp16[j] = weights[j].clone(); + continue; + } ncnn::Mat tmp; ncnn::cast_float32_to_float16(weights[j], tmp, opt); ncnn::cast_float16_to_float32(tmp, weights_fp16[j], opt); From 61d6c8407f423427da29195dfa0e5a55f7c1a522 Mon Sep 17 00:00:00 2001 From: Leran Wang <2428592483@qq.com> Date: Thu, 7 Sep 2023 22:18:45 +0800 Subject: [PATCH 11/14] [skip ci] Add a trailing newline? --- src/layer/arm/linearint8_arm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layer/arm/linearint8_arm.cpp b/src/layer/arm/linearint8_arm.cpp index 1608339b6fb2..b0cff23423b7 100644 --- a/src/layer/arm/linearint8_arm.cpp +++ b/src/layer/arm/linearint8_arm.cpp @@ -88,4 +88,4 @@ int LinearInt8_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& return 0; } -} // namespace ncnn \ No newline at end of file +} // namespace ncnn From f4e961a607ca49ff2f6bf8710833c4effce3e51e Mon Sep 17 00:00:00 2001 From: Leran Wang <2428592483@qq.com> Date: Fri, 8 Sep 2023 10:44:01 +0800 Subject: [PATCH 12/14] Remove TODO banner --- tests/test_linearint8.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_linearint8.cpp b/tests/test_linearint8.cpp index 5f7af39af884..a4cea2a027b7 100644 --- a/tests/test_linearint8.cpp +++ b/tests/test_linearint8.cpp @@ -1,5 +1,3 @@ -// TODO - // Tencent is pleased to support the open source community by making ncnn available. // // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. From 6be81969d7d210c32c74fd9fa8d9522bd021b923 Mon Sep 17 00:00:00 2001 From: Leran Wang <2428592483@qq.com> Date: Sun, 10 Sep 2023 18:50:46 +0800 Subject: [PATCH 13/14] Update documentation --- docs/developer-guide/operation-param-weight-table.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/developer-guide/operation-param-weight-table.md b/docs/developer-guide/operation-param-weight-table.md index aa5c99adf5e6..fb77d5c250b7 100644 --- a/docs/developer-guide/operation-param-weight-table.md +++ b/docs/developer-guide/operation-param-weight-table.md @@ -151,6 +151,9 @@ ||2|width_scale|1.f| ||3|output_height|0| ||4|output_width|0| +|LinearInt8|0|in_dim|0|scale weight| +||1|out_dim|0| +||2|group_size|0| |Log|0|base|-1.f| ||1|scale|1.f| ||2|shift|0.f| From 68b4b2d16ac413175250d02fc07f219cda3b3204 Mon Sep 17 00:00:00 2001 From: Leran Wang <2428592483@qq.com> Date: Sun, 10 Sep 2023 18:59:46 +0800 Subject: [PATCH 14/14] Update operators table --- docs/developer-guide/operators.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/developer-guide/operators.md b/docs/developer-guide/operators.md index 56e7516a36a0..6db563b429f1 100644 --- a/docs/developer-guide/operators.md +++ b/docs/developer-guide/operators.md @@ -45,6 +45,7 @@ * [InstanceNorm](#instancenorm) * [Interp](#interp) * [LayerNorm](#layernorm) +* [LinearInt8](#linearint8) * [Log](#log) * [LRN](#lrn) * [LSTM](#lstm) @@ -1104,6 +1105,24 @@ y = x * gamma + beta by elementwise | gamma_data | float | [affine_size] | | beta_data | float | [affine_size] | +# LinearInt8 +``` +y = x (WS)^T +``` + +* one_blob_only + +| param id | name | type | default | description | +| --------- | ------------- | ----- | --------- | ----------------- | +| 0 | in_dim | int | 0 | | +| 1 | out_dim | int | 0 | | +| 2 | group_size | int | 0 | | + +| weight | type | shape | +| ------------- | ----- | --------------------- | +| scale | float | [in_dim * out_dim / group_size] | +| weight | int8 | [in_dim, out_dim] | + # Log ``` if base == -1 y = log(shift + x * scale)