diff --git a/.ci/test-coverage.yml b/.ci/test-coverage.yml index f46bf6e3621..7ccc106564f 100644 --- a/.ci/test-coverage.yml +++ b/.ci/test-coverage.yml @@ -79,7 +79,7 @@ jobs: run: | mkdir build && cd build cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_AVX512=OFF -DNCNN_XOP=OFF -DNCNN_OPENMP=OFF -DNCNN_VULKAN=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) + cmake --build . -j 4 - name: test run: | printf "[Processor]\nThreadCount=4\n" > build/tests/SwiftShader.ini @@ -159,7 +159,7 @@ jobs: run: | mkdir build && cd build cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_AVX512=OFF -DNCNN_XOP=OFF -DNCNN_OPENMP=OFF -DNCNN_VULKAN=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) + cmake --build . -j 4 - name: test run: | export LP_NUM_THREADS=4 @@ -230,9 +230,9 @@ jobs: -DNCNN_AVX512BF16=${{matrix.AVX512BF16}} \ -DNCNN_AVX512FP16=${{matrix.AVX512FP16}} \ .. - cmake --build . -j $(nproc) + cmake --build . -j 4 - name: test - run: cd build && ctest --output-on-failure -j $(nproc) + run: cd build && ctest --output-on-failure -j 4 - name: lcov-collect run: | cd build @@ -309,12 +309,12 @@ jobs: run: | mkdir build && cd build cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_GNU_INLINE_ASM=${{matrix.GNU_INLINE_ASM}} -DNCNN_VFPV4=ON -DNCNN_ARM82=OFF -DNCNN_OPENMP=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) + cmake --build . -j 4 - name: test run: | export PATH=${{ci.workspace}}/qemu-install/bin:$PATH cd build - TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabi" ctest --output-on-failure -j $(nproc) + TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabi" ctest --output-on-failure -j 4 - name: lcov-collect run: | cd build @@ -327,12 +327,12 @@ jobs: run: | mkdir build-armhf-vfpv3-d16 && cd build-armhf-vfpv3-d16 cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf-vfpv3-d16.toolchain.cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_GNU_INLINE_ASM=${{matrix.GNU_INLINE_ASM}} -DNCNN_VFPV4=OFF -DNCNN_ARM82=OFF -DNCNN_OPENMP=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) + cmake --build . -j 4 - name: test-armhf-vfpv3-d16 run: | export PATH=${{ci.workspace}}/qemu-install/bin:$PATH cd build-armhf-vfpv3-d16 - TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabihf" ctest --output-on-failure -j $(nproc) + TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabihf" ctest --output-on-failure -j 4 - name: lcov-collect-armhf-vfpv3-d16 run: | cd build-armhf-vfpv3-d16 @@ -423,12 +423,12 @@ jobs: -DNCNN_ARM84BF16=${{matrix.ARM84BF16}} \ -DNCNN_ARM84I8MM=${{matrix.ARM84I8MM}} \ .. - cmake --build . -j $(nproc) + cmake --build . -j 4 - name: test run: | export PATH=${{ci.workspace}}/qemu-install/bin:$PATH cd build - TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j $(nproc) + TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 4 - name: lcov-collect run: | cd build @@ -502,12 +502,12 @@ jobs: run: | mkdir build && cd build cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/mipsisa32r6el-linux-gnu.toolchain.cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_MSA=OFF -DNCNN_MMI=OFF -DNCNN_OPENMP=${{matrix.OPENMP}} -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) + cmake --build . -j 4 - name: test run: | export PATH=${{ci.workspace}}/qemu-install/bin:$PATH cd build - TESTS_EXECUTABLE_LOADER=qemu-mipsel TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/mipsisa32r6el-linux-gnu" ctest --output-on-failure -j $(nproc) + TESTS_EXECUTABLE_LOADER=qemu-mipsel TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/mipsisa32r6el-linux-gnu" ctest --output-on-failure -j 4 - name: lcov-collect run: | cd build @@ -581,12 +581,12 @@ jobs: run: | mkdir build && cd build cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/mipsisa64r6el-linux-gnuabi64.toolchain.cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_MSA=ON -DNCNN_MMI=OFF -DNCNN_OPENMP=${{matrix.OPENMP}} -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) + cmake --build . -j 4 - name: test run: | export PATH=${{ci.workspace}}/qemu-install/bin:$PATH cd build - TESTS_EXECUTABLE_LOADER=qemu-mips64el TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/mipsisa64r6el-linux-gnuabi64" ctest --output-on-failure -j $(nproc) + TESTS_EXECUTABLE_LOADER=qemu-mips64el TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/mipsisa64r6el-linux-gnuabi64" ctest --output-on-failure -j 4 - name: lcov-collect run: | cd build @@ -660,12 +660,12 @@ jobs: run: | mkdir build && cd build cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/powerpc-linux-gnu.toolchain.cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_OPENMP=${{matrix.OPENMP}} -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) + cmake --build . -j 4 - name: test run: | export PATH=${{ci.workspace}}/qemu-install/bin:$PATH cd build - TESTS_EXECUTABLE_LOADER=qemu-ppc TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc-linux-gnu" ctest --output-on-failure -j $(nproc) + TESTS_EXECUTABLE_LOADER=qemu-ppc TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc-linux-gnu" ctest --output-on-failure -j 4 - name: lcov-collect run: | cd build @@ -739,12 +739,12 @@ jobs: run: | mkdir build && cd build cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/powerpc64le-linux-gnu.toolchain.cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_OPENMP=${{matrix.OPENMP}} -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) + cmake --build . -j 4 - name: test run: | export PATH=${{ci.workspace}}/qemu-install/bin:$PATH cd build - TESTS_EXECUTABLE_LOADER=qemu-ppc64le TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc64le-linux-gnu" ctest --output-on-failure -j $(nproc) + TESTS_EXECUTABLE_LOADER=qemu-ppc64le TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc64le-linux-gnu" ctest --output-on-failure -j 4 - name: lcov-collect run: | cd build @@ -824,12 +824,12 @@ jobs: run: | mkdir build && cd build cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/riscv64-linux-gnu.toolchain.cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_OPENMP=${{matrix.OPENMP}} -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) + cmake --build . -j 4 - name: test run: | export PATH=${{ci.workspace}}/qemu-install/bin:$PATH cd build - TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/riscv64-linux-gnu" ctest --output-on-failure -j $(nproc) + TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/riscv64-linux-gnu" ctest --output-on-failure -j 4 - name: lcov-collect run: | cd build @@ -951,12 +951,12 @@ jobs: export RISCV_ROOT_PATH=${{ci.workspace}}/rv64gcv-install mkdir build && cd build cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/riscv64-unknown-linux-gnu.toolchain.cmake -DCMAKE_BUILD_TYPE=debug -DCMAKE_C_FLAGS="-O1" -DCMAKE_CXX_FLAGS="-O1" -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_RVV=ON -DNCNN_OPENMP=${{matrix.OPENMP}} -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) + cmake --build . -j 4 - name: test-vlen128 run: | export PATH=${{ci.workspace}}/qemu-install/bin:$PATH cd build - TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,x-zvfh=true,vlen=128,elen=64,vext_spec=v1.0;-L;${{ci.workspace}}/rv64gcv-install/sysroot" ctest --output-on-failure -j $(nproc) + TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,x-zvfh=true,vlen=128,elen=64,vext_spec=v1.0;-L;${{ci.workspace}}/rv64gcv-install/sysroot" ctest --output-on-failure -j 4 - name: lcov-collect-vlen128 run: | cd build @@ -971,7 +971,7 @@ jobs: run: | export PATH=${{ci.workspace}}/qemu-install/bin:$PATH cd build - TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,x-zvfh=true,vlen=256,elen=64,vext_spec=v1.0;-L;${{ci.workspace}}/rv64gcv-install/sysroot" ctest --output-on-failure -j $(nproc) + TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,x-zvfh=true,vlen=256,elen=64,vext_spec=v1.0;-L;${{ci.workspace}}/rv64gcv-install/sysroot" ctest --output-on-failure -j 4 - name: lcov-collect-vlen256 run: | cd build @@ -1051,12 +1051,12 @@ jobs: export LOONGARCH64_ROOT_PATH=${{ci.workspace}}/cross-tools mkdir build && cd build cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/loongarch64-unknown-linux-gnu.toolchain.cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_LSX=ON -DNCNN_LASX=OFF -DNCNN_OPENMP=${{matrix.OPENMP}} -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) + cmake --build . -j 4 - name: test run: | export PATH=${{ci.workspace}}/qemu-install/bin:$PATH cd build - TESTS_EXECUTABLE_LOADER=qemu-loongarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;${{ci.workspace}}/cross-tools/target" ctest --output-on-failure -j $(nproc) + TESTS_EXECUTABLE_LOADER=qemu-loongarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;${{ci.workspace}}/cross-tools/target" ctest --output-on-failure -j 4 - name: lcov-collect run: | cd build @@ -1099,9 +1099,9 @@ jobs: run: | mkdir build && cd build cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host-c.gcc.toolchain.cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_SIMPLEMATH=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. - cmake --build . -j $(nproc) + cmake --build . -j 4 - name: test - run: cd build && ctest --output-on-failure -j $(nproc) + run: cd build && ctest --output-on-failure -j 4 - name: lcov-collect run: | cd build diff --git a/src/layer/convolution.cpp b/src/layer/convolution.cpp index fe025456f48..c55b3f27687 100644 --- a/src/layer/convolution.cpp +++ b/src/layer/convolution.cpp @@ -241,13 +241,13 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op op->create_pipeline(opt); // forward - op->forward(bottom_blob, top_blob, opt); + int ret = op->forward(bottom_blob, top_blob, opt); op->destroy_pipeline(opt); delete op; - return 0; + return ret; } } @@ -401,6 +401,8 @@ int Convolution::forward_int8(const Mat& bottom_blob, Mat& top_blob, const Optio opt_g.blob_allocator = opt.workspace_allocator; quantize_to_int8(bottom_blob, bottom_blob_unbordered, bottom_blob_int8_scales, opt_g); + if (bottom_blob_unbordered.empty()) + return -100; } Mat bottom_blob_bordered; diff --git a/tests/test_convolution_oom.cpp b/tests/test_convolution_oom.cpp new file mode 100644 index 00000000000..6643753359a --- /dev/null +++ b/tests/test_convolution_oom.cpp @@ -0,0 +1,149 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "testutil.h" + +static int test_convolution_oom(int w, int h, int c, int outch, int kernel, int dilation, int stride, int pad, int bias) +{ + ncnn::Mat a = RandomMat(w, h, c); + + ncnn::ParamDict pd; + pd.set(0, outch); + pd.set(1, kernel); + pd.set(2, dilation); + pd.set(3, stride); + pd.set(4, pad); + pd.set(5, bias); + pd.set(6, outch * c * kernel * kernel); + + int activation_type = RAND() % 7; // 0 1 2 3 4 5 6 + ncnn::Mat activation_params(2); + activation_params[0] = (activation_type == 6) ? RandomFloat(0, 1) : RandomFloat(-1, 0); // alpha + activation_params[1] = RandomFloat(0, 1); // beta + pd.set(9, activation_type); + pd.set(10, activation_params); + + std::vector weights(bias ? 2 : 1); + weights[0] = RandomMat(outch * c * kernel * kernel); + if (bias) + weights[1] = RandomMat(outch); + + int ret = test_layer_oom("Convolution", pd, weights, a); + if (ret != 0) + { + fprintf(stderr, "test_convolution_oom failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d act=%d actparams=[%f,%f]\n", w, h, c, outch, kernel, dilation, stride, pad, bias, activation_type, activation_params[0], activation_params[1]); + return ret; + } + + return ret; +} + +static int test_convolution_0() +{ + return 0 + || test_convolution_oom(9, 7, 31, 63, 1, 1, 1, 0, 1) + || test_convolution_oom(9, 7, 31, 63, 3, 1, 1, 1, 1); +} + +#if NCNN_INT8 +static int test_convolution_oom_int8(int w, int h, int c, int outch, int kernel, int dilation, int stride, int pad, int bias, bool requant = false) +{ + ncnn::Mat a = RandomMat(w, h, c); + + ncnn::ParamDict pd; + pd.set(0, outch); + pd.set(1, kernel); + pd.set(2, dilation); + pd.set(3, stride); + pd.set(4, pad); + pd.set(5, bias); + pd.set(6, outch * c * kernel * kernel); + pd.set(8, requant ? 101 : 1); // int8_scale_term + + int activation_type = RAND() % 7; // 0 1 2 3 4 5 6 + ncnn::Mat activation_params(2); + activation_params[0] = (activation_type == 6) ? RandomFloat(0, 1) : RandomFloat(-1, 0); // alpha + activation_params[1] = RandomFloat(0, 1); // beta + pd.set(9, activation_type); + pd.set(10, activation_params); + + std::vector weights(bias ? 5 : 4); + weights[0] = RandomMat(outch * c * kernel * kernel); + + ncnn::Mat weight_scales = scales_mat(weights[0], outch, c * kernel * kernel, c * kernel * kernel); + ncnn::Mat input_scales = scales_mat(a, 1, w * h * c, a.cstep); + ncnn::Mat top_scales = requant ? scales_mat(a, 1, w * h * c, a.cstep) : ncnn::Mat(); + + if (kernel == 3 && dilation == 1 && stride == 1) + { + // test for 6bit quant + for (int i = 0; i < weight_scales.w; i++) + weight_scales[i] = weight_scales[i] / 4.f; + } + + if (bias) + { + weights[1] = RandomMat(outch); + weights[2] = weight_scales; + weights[3] = input_scales; + weights[4] = top_scales; + } + else + { + weights[1] = weight_scales; + weights[2] = input_scales; + weights[3] = top_scales; + } + + int flag = TEST_LAYER_DISABLE_GPU_TESTING; + int ret = test_layer_oom("Convolution", pd, weights, a, flag); + if (ret != 0) + { + fprintf(stderr, "test_convolution_oom_int8 failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d requant=%d act=%d actparams=[%f,%f]\n", w, h, c, outch, kernel, dilation, stride, pad, bias, requant, activation_type, activation_params[0], activation_params[1]); + return ret; + } + + return ret; +} + +static int test_convolution_1() +{ + return 0 + || test_convolution_oom_int8(9, 7, 31, 63, 1, 1, 1, 0, 1) + || test_convolution_oom_int8(9, 7, 31, 63, 3, 1, 1, 1, 1); +} + +static int test_convolution_2() +{ + return 0 + || test_convolution_oom_int8(9, 7, 31, 63, 1, 1, 1, 0, 1, true) + || test_convolution_oom_int8(9, 7, 31, 63, 3, 1, 1, 1, 1, true); +} +#endif // NCNN_INT8 + +int main() +{ + SRAND(7767517); + +#if __mips__ || __loongarch64 || __riscv + // TODO + return 0; +#endif + +#if NCNN_INT8 + return test_convolution_0() || test_convolution_1() || test_convolution_2(); +#else + return test_convolution_0(); +#endif +} diff --git a/tests/test_softmax_oom.cpp b/tests/test_softmax_oom.cpp new file mode 100644 index 00000000000..5fea7636939 --- /dev/null +++ b/tests/test_softmax_oom.cpp @@ -0,0 +1,60 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "testutil.h" + +static int test_softmax_oom(const ncnn::Mat& a, int axis) +{ + ncnn::ParamDict pd; + pd.set(0, axis); // axis + pd.set(1, 1); // fixbug0 + + std::vector weights(0); + + int ret = test_layer_oom("Softmax", pd, weights, a); + if (ret != 0) + { + fprintf(stderr, "test_softmax_oom failed a.dims=%d a=(%d %d %d) axis=%d\n", a.dims, a.w, a.h, a.c, axis); + } + + return ret; +} + +static int test_softmax_0() +{ + ncnn::Mat a = RandomMat(25, 27, 32); + return test_softmax_oom(a, 0) || test_softmax_oom(a, 1) || test_softmax_oom(a, 2); +} + +static int test_softmax_1() +{ + ncnn::Mat a = RandomMat(25, 32); + return test_softmax_oom(a, 0) || test_softmax_oom(a, 1); +} + +static int test_softmax_2() +{ + ncnn::Mat a = RandomMat(128); + return test_softmax_oom(a, 0); +} + +int main() +{ + SRAND(7767517); + + return 0 + || test_softmax_0() + || test_softmax_1() + || test_softmax_2(); +} diff --git a/tests/testutil.cpp b/tests/testutil.cpp index 2e76f6f3901..07d95547d44 100644 --- a/tests/testutil.cpp +++ b/tests/testutil.cpp @@ -19,6 +19,7 @@ #include "mat.h" #include "prng.h" +#include #include #include @@ -323,6 +324,166 @@ int CompareMat(const std::vector& a, const std::vector& b, return 0; } +static int convert_to_optimal_layout(const ncnn::Mat& a, ncnn::Mat& a4, const ncnn::Option& opt, const ncnn::Layer* op, int flag) +{ + // clang-format off + // *INDENT-OFF* +#if NCNN_VFPV4 + if (opt.use_fp16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) + { + ncnn::cast_float32_to_float16(a, a4, opt); + } + else +#endif // NCNN_VFPV4 +#if NCNN_RVV + if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) + { + ncnn::cast_float32_to_float16(a, a4, opt); + } + else +#endif // NCNN_RVV +#if NCNN_BF16 + if (opt.use_bf16_storage && op->support_bf16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) + { + ncnn::cast_float32_to_bfloat16(a, a4, opt); + } + else +#endif // NCNN_BF16 + if (opt.use_fp16_storage && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) + { + ncnn::cast_float32_to_float16(a, a4, opt); + } + else + { + a4 = a; + } + // *INDENT-ON* + // clang-format on + + if (opt.use_packing_layout && op->support_packing && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_PACKING)) + { + // resolve dst_elempack + int dims = a4.dims; + int elemcount = 0; + if (dims == 1) elemcount = a4.elempack * a4.w; + if (dims == 2) elemcount = a4.elempack * a4.h; + if (dims == 3 || dims == 4) elemcount = a4.elempack * a4.c; + + int elembits = a4.elembits(); + + int dst_elempack = 1; + + if (elembits == 32) + { +#if NCNN_AVX512 + if (elemcount % 16 == 0 && ncnn::cpu_support_x86_avx512()) + dst_elempack = 16; + else if (elemcount % 8 == 0 && ncnn::cpu_support_x86_avx()) + dst_elempack = 8; + else if (elemcount % 4 == 0) + dst_elempack = 4; +#elif NCNN_AVX + if (elemcount % 8 == 0 && ncnn::cpu_support_x86_avx()) + dst_elempack = 8; + else if (elemcount % 4 == 0) + dst_elempack = 4; +#elif NCNN_RVV + const int packn = ncnn::cpu_riscv_vlenb() / (elembits / 8); + if (elemcount % packn == 0) + dst_elempack = packn; +#else + if (elemcount % 4 == 0) + dst_elempack = 4; +#endif + } + if (elembits == 16) + { +#if NCNN_ARM82 + if (elemcount % 8 == 0 && ncnn::cpu_support_arm_asimdhp() && opt.use_fp16_arithmetic) + dst_elempack = 8; + else if (elemcount % 4 == 0) + dst_elempack = 4; +#elif NCNN_RVV + const int packn = ncnn::cpu_riscv_vlenb() / 2; + if (elemcount % packn == 0) + dst_elempack = packn; +#else + if (elemcount % 4 == 0) + dst_elempack = 4; +#endif + } + if (elembits == 8) + { +#if NCNN_RVV + const int packn = ncnn::cpu_riscv_vlenb() / 1; + if (elemcount % packn == 0) + dst_elempack = packn; +#else + if (elemcount % 8 == 0) + dst_elempack = 8; +#endif + } + + if (flag & TEST_LAYER_ENABLE_FORCE_INPUT_PACK8) + dst_elempack = 8; + + ncnn::Mat a4_packed; + ncnn::convert_packing(a4, a4_packed, dst_elempack, opt); + a4 = a4_packed; + } + + return 0; +} + +static int convert_to_vanilla_layout(const ncnn::Mat& c4, ncnn::Mat& c, const ncnn::Option& opt, const ncnn::Layer* op, int flag) +{ + ncnn::Mat c4_unpacked; + if (c4.elempack != 1) + { + ncnn::convert_packing(c4, c4_unpacked, 1, opt); + } + else + { + c4_unpacked = c4; + } + + // clang-format off + // *INDENT-OFF* +#if NCNN_VFPV4 + if (opt.use_fp16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && c4_unpacked.elembits() == 16) + { + ncnn::cast_float16_to_float32(c4_unpacked, c, opt); + } + else +#endif // NCNN_VFPV4 +#if NCNN_RVV + if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && c4_unpacked.elembits() == 16) + { + ncnn::cast_float16_to_float32(c4_unpacked, c, opt); + } + else +#endif // NCNN_RVV +#if NCNN_BF16 + if (opt.use_bf16_storage && op->support_bf16_storage && c4_unpacked.elembits() == 16) + { + ncnn::cast_bfloat16_to_float32(c4_unpacked, c, opt); + } + else +#endif // NCNN_BF16 + if (opt.use_fp16_storage && op->support_fp16_storage && c4_unpacked.elembits() == 16) + { + ncnn::cast_float16_to_float32(c4_unpacked, c, opt); + } + else + { + c = c4_unpacked; + } + // *INDENT-ON* + // clang-format on + + return 0; +} + int test_layer_naive(int typeindex, const ncnn::ParamDict& pd, const std::vector& weights, const std::vector& a, int top_blob_count, std::vector& b, void (*func)(ncnn::Layer*), int flag) { ncnn::Layer* op = ncnn::create_layer_naive(typeindex); @@ -444,111 +605,7 @@ int test_layer_cpu(int typeindex, const ncnn::ParamDict& pd, const std::vectorsupport_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) - { - ncnn::cast_float32_to_float16(a[i], a4[i], opt); - } - else -#endif // NCNN_VFPV4 -#if NCNN_RVV - if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) - { - ncnn::cast_float32_to_float16(a[i], a4[i], opt); - } - else -#endif // NCNN_RVV -#if NCNN_BF16 - if (opt.use_bf16_storage && op->support_bf16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) - { - ncnn::cast_float32_to_bfloat16(a[i], a4[i], opt); - } - else -#endif // NCNN_BF16 - if (opt.use_fp16_storage && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) - { - ncnn::cast_float32_to_float16(a[i], a4[i], opt); - } - else - { - a4[i] = a[i]; - } - // *INDENT-ON* - // clang-format on - - if (opt.use_packing_layout && op->support_packing && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_PACKING)) - { - // resolve dst_elempack - int dims = a4[i].dims; - int elemcount = 0; - if (dims == 1) elemcount = a4[i].elempack * a4[i].w; - if (dims == 2) elemcount = a4[i].elempack * a4[i].h; - if (dims == 3 || dims == 4) elemcount = a4[i].elempack * a4[i].c; - - int elembits = a4[i].elembits(); - - int dst_elempack = 1; - - if (elembits == 32) - { -#if NCNN_AVX512 - if (elemcount % 16 == 0 && ncnn::cpu_support_x86_avx512()) - dst_elempack = 16; - else if (elemcount % 8 == 0 && ncnn::cpu_support_x86_avx()) - dst_elempack = 8; - else if (elemcount % 4 == 0) - dst_elempack = 4; -#elif NCNN_AVX - if (elemcount % 8 == 0 && ncnn::cpu_support_x86_avx()) - dst_elempack = 8; - else if (elemcount % 4 == 0) - dst_elempack = 4; -#elif NCNN_RVV - const int packn = ncnn::cpu_riscv_vlenb() / (elembits / 8); - if (elemcount % packn == 0) - dst_elempack = packn; -#else - if (elemcount % 4 == 0) - dst_elempack = 4; -#endif - } - if (elembits == 16) - { -#if NCNN_ARM82 - if (elemcount % 8 == 0 && ncnn::cpu_support_arm_asimdhp() && opt.use_fp16_arithmetic) - dst_elempack = 8; - else if (elemcount % 4 == 0) - dst_elempack = 4; -#elif NCNN_RVV - const int packn = ncnn::cpu_riscv_vlenb() / 2; - if (elemcount % packn == 0) - dst_elempack = packn; -#else - if (elemcount % 4 == 0) - dst_elempack = 4; -#endif - } - if (elembits == 8) - { -#if NCNN_RVV - const int packn = ncnn::cpu_riscv_vlenb() / 1; - if (elemcount % packn == 0) - dst_elempack = packn; -#else - if (elemcount % 8 == 0) - dst_elempack = 8; -#endif - } - - if (flag & TEST_LAYER_ENABLE_FORCE_INPUT_PACK8) - dst_elempack = 8; - - ncnn::Mat a4_packed; - ncnn::convert_packing(a4[i], a4_packed, dst_elempack, opt); - a4[i] = a4_packed; - } + convert_to_optimal_layout(a[i], a4[i], opt, op, flag); } c.resize(top_blob_count); @@ -569,43 +626,7 @@ int test_layer_cpu(int typeindex, const ncnn::ParamDict& pd, const std::vectorsupport_fp16_storage && c[i].elembits() == 16) - { - ncnn::Mat c_fp32; - ncnn::cast_float16_to_float32(c[i], c_fp32, opt); - c[i] = c_fp32; - } - else -#endif // NCNN_VFPV4 -#if NCNN_RVV - if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && c[i].elembits() == 16) - { - ncnn::Mat c_fp32; - ncnn::cast_float16_to_float32(c[i], c_fp32, opt); - c[i] = c_fp32; - } - else -#endif // NCNN_RVV -#if NCNN_BF16 - if (opt.use_bf16_storage && op->support_bf16_storage && c[i].elembits() == 16) - { - ncnn::Mat c_fp32; - ncnn::cast_bfloat16_to_float32(c[i], c_fp32, opt); - c[i] = c_fp32; - } - else -#endif // NCNN_BF16 - if (opt.use_fp16_storage && op->support_fp16_storage && c[i].elembits() == 16) - { - ncnn::Mat c_fp32; - ncnn::cast_float16_to_float32(c[i], c_fp32, opt); - c[i] = c_fp32; - } - // *INDENT-ON* - // clang-format on + convert_to_vanilla_layout(c[i], c[i], opt, op, flag); } op->destroy_pipeline(opt); @@ -958,181 +979,40 @@ int test_layer_cpu(int typeindex, const ncnn::ParamDict& pd, const std::vectorsupport_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) - { - ncnn::cast_float32_to_float16(a, a4, opt); - } - else -#endif // NCNN_VFPV4 -#if NCNN_RVV - if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) + if (op->support_inplace) { - ncnn::cast_float32_to_float16(a, a4, opt); + c = a4.clone(); + op->forward_inplace(c, opt); } else -#endif // NCNN_RVV -#if NCNN_BF16 - if (opt.use_bf16_storage && op->support_bf16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) { - ncnn::cast_float32_to_bfloat16(a, a4, opt); + op->forward(a4, c, opt); } - else -#endif // NCNN_BF16 - if (opt.use_fp16_storage && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) + + convert_to_vanilla_layout(c, c, opt, op, flag); + + op->destroy_pipeline(opt); + + delete op; + + return 0; +} + +#if NCNN_VULKAN +int test_layer_gpu(int typeindex, const ncnn::ParamDict& pd, const std::vector& weights, const ncnn::Option& _opt, const ncnn::Mat& a, ncnn::Mat& d, const ncnn::Mat& top_shape, void (*func)(ncnn::Layer*), int flag) +{ + if (!_opt.use_packing_layout) { - ncnn::cast_float32_to_float16(a, a4, opt); + // pack1 test is useless for gpu + return 233; } - else + + ncnn::Layer* op = ncnn::create_layer_vulkan(typeindex); + if (!op) { - a4 = a; - } - // *INDENT-ON* - // clang-format on - - if (opt.use_packing_layout && op->support_packing && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_PACKING)) - { - // resolve dst_elempack - int dims = a4.dims; - int elemcount = 0; - if (dims == 1) elemcount = a4.elempack * a4.w; - if (dims == 2) elemcount = a4.elempack * a4.h; - if (dims == 3 || dims == 4) elemcount = a4.elempack * a4.c; - - int elembits = a4.elembits(); - - int dst_elempack = 1; - - if (elembits == 32) - { -#if NCNN_AVX512 - if (elemcount % 16 == 0 && ncnn::cpu_support_x86_avx512()) - dst_elempack = 16; - else if (elemcount % 8 == 0 && ncnn::cpu_support_x86_avx()) - dst_elempack = 8; - else if (elemcount % 4 == 0) - dst_elempack = 4; -#elif NCNN_AVX - if (elemcount % 8 == 0 && ncnn::cpu_support_x86_avx()) - dst_elempack = 8; - else if (elemcount % 4 == 0) - dst_elempack = 4; -#elif NCNN_RVV - const int packn = ncnn::cpu_riscv_vlenb() / (elembits / 8); - if (elemcount % packn == 0) - dst_elempack = packn; -#else - if (elemcount % 4 == 0) - dst_elempack = 4; -#endif - } - if (elembits == 16) - { -#if NCNN_ARM82 - if (elemcount % 8 == 0 && ncnn::cpu_support_arm_asimdhp() && opt.use_fp16_arithmetic) - dst_elempack = 8; - else if (elemcount % 4 == 0) - dst_elempack = 4; -#elif NCNN_RVV - const int packn = ncnn::cpu_riscv_vlenb() / 2; - if (elemcount % packn == 0) - dst_elempack = packn; -#else - if (elemcount % 4 == 0) - dst_elempack = 4; -#endif - } - if (elembits == 8) - { -#if NCNN_RVV - const int packn = ncnn::cpu_riscv_vlenb() / 1; - if (elemcount % packn == 0) - dst_elempack = packn; -#else - if (elemcount % 8 == 0) - dst_elempack = 8; -#endif - } - - if (flag & TEST_LAYER_ENABLE_FORCE_INPUT_PACK8) - dst_elempack = 8; - - ncnn::Mat a4_packed; - ncnn::convert_packing(a4, a4_packed, dst_elempack, opt); - a4 = a4_packed; - } - - if (op->support_inplace) - { - c = a4.clone(); - op->forward_inplace(c, opt); - } - else - { - op->forward(a4, c, opt); - } - - // clang-format off - // *INDENT-OFF* -#if NCNN_VFPV4 - if (opt.use_fp16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && c.elembits() == 16) - { - ncnn::Mat c_fp32; - ncnn::cast_float16_to_float32(c, c_fp32, opt); - c = c_fp32; - } - else -#endif // NCNN_VFPV4 -#if NCNN_RVV - if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && c.elembits() == 16) - { - ncnn::Mat c_fp32; - ncnn::cast_float16_to_float32(c, c_fp32, opt); - c = c_fp32; - } - else -#endif // NCNN_RVV -#if NCNN_BF16 - if (opt.use_bf16_storage && op->support_bf16_storage && c.elembits() == 16) - { - ncnn::Mat c_fp32; - ncnn::cast_bfloat16_to_float32(c, c_fp32, opt); - c = c_fp32; - } - else -#endif // NCNN_BF16 - if (opt.use_fp16_storage && op->support_fp16_storage && c.elembits() == 16) - { - ncnn::Mat c_fp32; - ncnn::cast_float16_to_float32(c, c_fp32, opt); - c = c_fp32; - } - // *INDENT-ON* - // clang-format on - - op->destroy_pipeline(opt); - - delete op; - - return 0; -} - -#if NCNN_VULKAN -int test_layer_gpu(int typeindex, const ncnn::ParamDict& pd, const std::vector& weights, const ncnn::Option& _opt, const ncnn::Mat& a, ncnn::Mat& d, const ncnn::Mat& top_shape, void (*func)(ncnn::Layer*), int flag) -{ - if (!_opt.use_packing_layout) - { - // pack1 test is useless for gpu - return 233; - } - - ncnn::Layer* op = ncnn::create_layer_vulkan(typeindex); - if (!op) - { - return 233; + return 233; } op->load_param(pd); @@ -1581,3 +1461,354 @@ int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vec return 0; } + +class TestOOMAllocator : public ncnn::UnlockedPoolAllocator +{ +public: + TestOOMAllocator(); + virtual void* fastMalloc(size_t size); + virtual void fastFree(void* ptr); + + ncnn::Mutex lock; + int counter; + int failid; +}; + +TestOOMAllocator::TestOOMAllocator() +{ + counter = 0; + failid = INT_MAX; +} + +void* TestOOMAllocator::fastMalloc(size_t size) +{ + lock.lock(); + + void* ptr; + if (counter == failid) + { + ptr = 0; + } + else + { + ptr = ncnn::UnlockedPoolAllocator::fastMalloc(size); + } + counter++; + + lock.unlock(); + + return ptr; +} + +void TestOOMAllocator::fastFree(void* ptr) +{ + lock.lock(); + + ncnn::UnlockedPoolAllocator::fastFree(ptr); + + lock.unlock(); +} + +int test_layer_oom_opt(const char* layer_type, const ncnn::ParamDict& pd, const std::vector& weights, const ncnn::Option& _opt, const std::vector& a, int top_blob_count, int flag) +{ + int typeindex = ncnn::layer_to_index(layer_type); + if (typeindex == -1) + return -1; + + ncnn::Layer* op = ncnn::create_layer_cpu(typeindex); + + if (!op->support_packing && _opt.use_packing_layout) + { + delete op; + return 233; + } + if (!op->support_bf16_storage && !op->support_fp16_storage && (_opt.use_bf16_storage || _opt.use_fp16_arithmetic)) + { + delete op; + return 233; + } + + op->load_param(pd); + + if (op->one_blob_only && a.size() != 1) + { + fprintf(stderr, "layer with one_blob_only but consume multiple inputs\n"); + delete op; + return -1; + } + + ncnn::ModelBinFromMatArray mb(weights.data()); + + op->load_model(mb); + + ncnn::Option opt = _opt; + opt.num_threads = 1; + opt.use_vulkan_compute = false; + + op->create_pipeline(opt); + + if (!op->support_packing && _opt.use_packing_layout) + { + op->destroy_pipeline(opt); + delete op; + return 233; + } + if (!op->support_bf16_storage && !op->support_fp16_storage && (_opt.use_bf16_storage || _opt.use_fp16_arithmetic)) + { + op->destroy_pipeline(opt); + delete op; + return 233; + } + + std::vector a4(a.size()); + + for (size_t i = 0; i < a4.size(); i++) + { + convert_to_optimal_layout(a[i], a4[i], opt, op, flag); + } + + TestOOMAllocator test_oom_allocator; + opt.blob_allocator = &test_oom_allocator; + opt.workspace_allocator = &test_oom_allocator; + + std::vector c; + c.resize(top_blob_count); + + if (op->support_inplace) + { + for (size_t i = 0; i < a4.size(); i++) + { + c[i] = a4[i].clone(); + } + + op->forward_inplace(c, opt); + } + else + { + op->forward(a4, c, opt); + } + + for (int i = 0; i < top_blob_count; i++) + { + c[i].release(); + } + + const int alloc_count = test_oom_allocator.counter; + for (int i = 0; i < alloc_count; i++) + { + test_oom_allocator.counter = 0; + test_oom_allocator.failid = i; + + int ret = 0; + if (op->support_inplace) + { + for (size_t i = 0; i < a4.size(); i++) + { + c[i] = a4[i].clone(); + } + + ret = op->forward_inplace(c, opt); + } + else + { + ret = op->forward(a4, c, opt); + } + + for (int i = 0; i < top_blob_count; i++) + { + c[i].release(); + } + + if (ret != -100) + { + fprintf(stderr, "oom not catched %d/%d\n", i, alloc_count); + + op->destroy_pipeline(opt); + + delete op; + + return -1; + } + } + + op->destroy_pipeline(opt); + + delete op; + + return 0; +} + +int test_layer_oom_opt(const char* layer_type, const ncnn::ParamDict& pd, const std::vector& weights, const ncnn::Option& _opt, const ncnn::Mat& a, int flag) +{ + int typeindex = ncnn::layer_to_index(layer_type); + if (typeindex == -1) + return -1; + + ncnn::Layer* op = ncnn::create_layer_cpu(typeindex); + + if (!op->support_packing && _opt.use_packing_layout) + { + delete op; + return 233; + } + if (!op->support_bf16_storage && !op->support_fp16_storage && (_opt.use_bf16_storage || _opt.use_fp16_arithmetic)) + { + delete op; + return 233; + } + + op->load_param(pd); + + ncnn::ModelBinFromMatArray mb(weights.data()); + + op->load_model(mb); + + ncnn::Option opt = _opt; + opt.num_threads = 1; + opt.use_vulkan_compute = false; + + op->create_pipeline(opt); + + if (!op->support_packing && _opt.use_packing_layout) + { + op->destroy_pipeline(opt); + delete op; + return 233; + } + if (!op->support_bf16_storage && !op->support_fp16_storage && (_opt.use_bf16_storage || _opt.use_fp16_arithmetic)) + { + op->destroy_pipeline(opt); + delete op; + return 233; + } + + ncnn::Mat a4; + convert_to_optimal_layout(a, a4, opt, op, flag); + + TestOOMAllocator test_oom_allocator; + opt.blob_allocator = &test_oom_allocator; + opt.workspace_allocator = &test_oom_allocator; + + ncnn::Mat c; + + if (op->support_inplace) + { + c = a4.clone(); + op->forward_inplace(c, opt); + } + else + { + op->forward(a4, c, opt); + } + + c.release(); + + const int alloc_count = test_oom_allocator.counter; + for (int i = 0; i < alloc_count; i++) + { + test_oom_allocator.counter = 0; + test_oom_allocator.failid = i; + + int ret = 0; + if (op->support_inplace) + { + c = a4.clone(); + ret = op->forward_inplace(c, opt); + } + else + { + ret = op->forward(a4, c, opt); + } + + c.release(); + + if (ret != -100) + { + fprintf(stderr, "oom not catched %d/%d\n", i, alloc_count); + + op->destroy_pipeline(opt); + + delete op; + + return -1; + } + } + + op->destroy_pipeline(opt); + + delete op; + + return 0; +} + +int test_layer_oom(const char* layer_type, const ncnn::ParamDict& pd, const std::vector& weights, const std::vector& a, int top_blob_count, int flag) +{ + // pack fp16p fp16s fp16a bf16s shader8 image + const int options[][7] = { + {0, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0}, + {0, 0, 1, 1, 1, 0, 0}, + {1, 0, 0, 0, 0, 0, 0}, + {1, 1, 0, 0, 1, 0, 0}, + {1, 0, 1, 0, 0, 1, 0}, + {1, 1, 1, 1, 0, 0, 0}, + {1, 1, 1, 1, 1, 1, 1}, + }; + + const int opt_count = sizeof(options) / sizeof(options[0]); + + for (int i = 0; i < opt_count; i++) + { + ncnn::Option opt; + opt.num_threads = 1; + opt.use_packing_layout = options[i][0]; + opt.use_fp16_packed = options[i][1]; + opt.use_fp16_storage = options[i][2]; + opt.use_fp16_arithmetic = options[i][3]; + opt.use_bf16_storage = options[i][4]; + opt.use_shader_pack8 = options[i][5]; + opt.use_image_storage = options[i][6]; + + int ret = test_layer_oom_opt(layer_type, pd, weights, opt, a, top_blob_count, flag); + if (ret != 233 && ret != 0) + return ret; + } + + return 0; +} + +int test_layer_oom(const char* layer_type, const ncnn::ParamDict& pd, const std::vector& weights, const ncnn::Mat& a, int flag) +{ + // pack fp16p fp16s fp16a bf16s shader8 image + const int options[][7] = { + {0, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0}, + {0, 0, 1, 1, 1, 0, 0}, + {1, 0, 0, 0, 0, 0, 0}, + {1, 1, 0, 0, 1, 0, 0}, + {1, 0, 1, 0, 0, 1, 0}, + {1, 1, 1, 1, 0, 0, 0}, + {1, 1, 1, 1, 1, 1, 1}, + }; + + const int opt_count = sizeof(options) / sizeof(options[0]); + + for (int i = 0; i < opt_count; i++) + { + ncnn::Option opt; + opt.num_threads = 1; + opt.use_packing_layout = options[i][0]; + opt.use_fp16_packed = options[i][1]; + opt.use_fp16_storage = options[i][2]; + opt.use_fp16_arithmetic = options[i][3]; + opt.use_bf16_storage = options[i][4]; + opt.use_shader_pack8 = options[i][5]; + opt.use_image_storage = options[i][6]; + + int ret = test_layer_oom_opt(layer_type, pd, weights, opt, a, flag); + if (ret != 233 && ret != 0) + return ret; + } + + return 0; +} diff --git a/tests/testutil.h b/tests/testutil.h index 12f9d0daa65..60ff4d65260 100644 --- a/tests/testutil.h +++ b/tests/testutil.h @@ -106,4 +106,14 @@ int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vec int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vector& weights, const ncnn::Mat& a, float epsilon = 0.001, void (*func)(ncnn::Layer*) = 0, int flag = 0); +// oom test + +int test_layer_oom_opt(const char* layer_type, const ncnn::ParamDict& pd, const std::vector& weights, const ncnn::Option& opt, const std::vector& a, int top_blob_count = 1, int flag = 0); + +int test_layer_oom_opt(const char* layer_type, const ncnn::ParamDict& pd, const std::vector& weights, const ncnn::Option& opt, const ncnn::Mat& a, int flag = 0); + +int test_layer_oom(const char* layer_type, const ncnn::ParamDict& pd, const std::vector& weights, const std::vector& a, int top_blob_count = 1, int flag = 0); + +int test_layer_oom(const char* layer_type, const ncnn::ParamDict& pd, const std::vector& weights, const ncnn::Mat& a, int flag = 0); + #endif // TESTUTIL_H