From 4f9b9277e61c920851ae085a6d7f1ac4d7b70275 Mon Sep 17 00:00:00 2001 From: Vadim Musin Date: Mon, 20 Jan 2025 14:27:44 +0100 Subject: [PATCH 1/2] Bump clang-tidy to 16 --- .github/workflows/clang-tidy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml index 52b44590..43df7e9b 100644 --- a/.github/workflows/clang-tidy.yml +++ b/.github/workflows/clang-tidy.yml @@ -102,4 +102,4 @@ jobs: shell: bash run: | cd build - python3 ../llvm-project/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py -warnings-as-errors=* -p ./ -config-file ../llvm-project/mlir/.clang-tidy -clang-tidy-binary $(which clang-tidy-15) ${{ env.CHANGED_FILES }} + python3 ../llvm-project/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py -warnings-as-errors=* -p ./ -config-file ../llvm-project/mlir/.clang-tidy -clang-tidy-binary $(which clang-tidy-16) ${{ env.CHANGED_FILES }} From 7370a71ac202c3ff050b39b0e2af0b763eaf3548 Mon Sep 17 00:00:00 2001 From: Artem Kroviakov Date: Mon, 20 Jan 2025 14:35:54 +0000 Subject: [PATCH 2/2] fix style to satisfy clang-format 18.1.3 --- .../core/src/compiler/codegen/codegen_c.cpp | 22 +- .../compiler/codegen/llvm/intrinsic_impl.cpp | 19 +- .../ir/graph/anchor_loop_generator.cpp | 17 +- .../ir/graph/anchor_loop_generator.hpp | 19 +- .../core/src/compiler/ir/graph/graph_map.hpp | 21 +- .../core/src/compiler/ir/graph/graph_op.hpp | 19 +- .../src/compiler/ir/graph/mixed_partition.cpp | 23 +- legacy/core/src/compiler/ir/intrinsics.hpp | 19 +- .../transform/dynamic_parallel_transform.cpp | 21 +- .../jit/xbyak/backend/stack_frame_model.cpp | 44 +- .../xbyak/backend/xbyak_lowering_viewer.cpp | 23 +- legacy/core/src/ops/fusible/transpose.cpp | 33 +- .../core/src/ops/fusible/unary_elemwise.cpp | 21 +- legacy/core/src/ops/managed_matmul_core.cpp | 18 +- legacy/core/src/ops/matmul_core.cpp | 18 +- .../ops/templates/conv1x1_backprop_data.cpp | 55 +- .../ops/templates/conv1x1_backprop_weight.cpp | 27 +- .../ops/templates/convNxN_backprop_weight.cpp | 33 +- legacy/core/src/ops/templates/conv_dw_fwd.cpp | 37 +- legacy/core/src/ops/templates/conv_fwd.cpp | 132 ++-- legacy/core/src/ops/templates/conv_fwd.hpp | 21 +- legacy/core/src/ops/templates/conv_rl.cpp | 51 +- .../src/ops/templates/managed_matmul_core.cpp | 116 +-- legacy/core/src/ops/templates/matmul_core.cpp | 45 +- .../nested_conv1x1_backprop_data.cpp | 21 +- .../nested_conv1x1_backprop_weight.cpp | 78 +- .../nested_convNxN_backprop_data.cpp | 21 +- .../nested_convNxN_backprop_weight.cpp | 75 +- .../src/ops/templates/nested_conv_fwd.cpp | 746 +++++++++--------- .../runtime/microkernel/cpu/brgemm_onednn.cpp | 27 +- legacy/core/src/util/reflection.cpp | 19 +- legacy/core/src/util/variant.hpp | 22 +- lib/gc/Dialect/Microkernel/MicrokernelOps.cpp | 4 +- .../GPURuntime/ocl/GpuOclRuntime.cpp | 7 +- .../ConvertLinalgToMicrokernel.cpp | 3 +- 35 files changed, 963 insertions(+), 914 deletions(-) diff --git a/legacy/core/src/compiler/codegen/codegen_c.cpp b/legacy/core/src/compiler/codegen/codegen_c.cpp index 8d39d9f7..2b124e12 100644 --- a/legacy/core/src/compiler/codegen/codegen_c.cpp +++ b/legacy/core/src/compiler/codegen/codegen_c.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2020-2023 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "codegen_c.hpp" #include "../ir/viewer.hpp" @@ -1219,9 +1220,8 @@ void c_generator_pass_t::operator()(func_t f) { c_generator_pass_t::c_generator_pass_t(std::ostream &source, const context_ptr &ctx, bool gen_wrapper, c_generator_optional_out_t *optional_out) - : source_(source), context_(ctx), - gen_wrapper_(gen_wrapper), pre_passes_{get_default_precodegen_passes( - ctx, gen_wrapper)}, + : source_(source), context_(ctx), gen_wrapper_(gen_wrapper), + pre_passes_{get_default_precodegen_passes(ctx, gen_wrapper)}, optional_out_(optional_out) { prepare_include(&source_); if (optional_out_) { diff --git a/legacy/core/src/compiler/codegen/llvm/intrinsic_impl.cpp b/legacy/core/src/compiler/codegen/llvm/intrinsic_impl.cpp index d92bdc61..5e4d0e3b 100644 --- a/legacy/core/src/compiler/codegen/llvm/intrinsic_impl.cpp +++ b/legacy/core/src/compiler/codegen/llvm/intrinsic_impl.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2023-2024 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include #include #include @@ -74,7 +75,7 @@ Value *codegen_llvm_vis_t::make_int_min_max(const intrin_call_c &v, bool ismin, Value *codegen_llvm_vis_t::make_int_min_max(Value *v1, Value *v2, bool ismin, type_category cate) { // fix-me: use smax/smin for newer LLVM - llvm::Value *(llvm::IRBuilder<>::*ptr)(llvm::Value * LHS, llvm::Value * RHS, + llvm::Value *(llvm::IRBuilder<>::*ptr)(llvm::Value *LHS, llvm::Value *RHS, const llvm::Twine &Name); if (ismin) { if (cate == CATE_INT) { diff --git a/legacy/core/src/compiler/ir/graph/anchor_loop_generator.cpp b/legacy/core/src/compiler/ir/graph/anchor_loop_generator.cpp index 948a6a80..09fb4447 100644 --- a/legacy/core/src/compiler/ir/graph/anchor_loop_generator.cpp +++ b/legacy/core/src/compiler/ir/graph/anchor_loop_generator.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2023 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "anchor_loop_generator.hpp" #include "fusible_op_utils.hpp" diff --git a/legacy/core/src/compiler/ir/graph/anchor_loop_generator.hpp b/legacy/core/src/compiler/ir/graph/anchor_loop_generator.hpp index 891c1f00..8f2357b4 100644 --- a/legacy/core/src/compiler/ir/graph/anchor_loop_generator.hpp +++ b/legacy/core/src/compiler/ir/graph/anchor_loop_generator.hpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2023 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #ifndef GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_COMPILER_IR_GRAPH_ANCHOR_LOOP_GENERATOR_HPP #define GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_COMPILER_IR_GRAPH_ANCHOR_LOOP_GENERATOR_HPP @@ -54,7 +55,7 @@ class anchor_loop_generator_t : public body_generator_base_t { const std::shared_ptr &parent_fanchor) const; void schedule_loops(context_ptr ctx, const void *config, stmt body, - std::vector &fors) const override{}; + std::vector &fors) const override {}; float get_gflop() const override { return 0; } }; diff --git a/legacy/core/src/compiler/ir/graph/graph_map.hpp b/legacy/core/src/compiler/ir/graph/graph_map.hpp index 3de7a4ae..a42807c8 100644 --- a/legacy/core/src/compiler/ir/graph/graph_map.hpp +++ b/legacy/core/src/compiler/ir/graph/graph_map.hpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2022-2023 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #ifndef GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_COMPILER_IR_GRAPH_GRAPH_MAP_HPP #define GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_COMPILER_IR_GRAPH_GRAPH_MAP_HPP @@ -26,7 +27,9 @@ namespace impl { namespace graph { namespace gc { -template struct is_vector { static constexpr bool value = false; }; +template struct is_vector { + static constexpr bool value = false; +}; template struct is_vector> { static constexpr bool value = true; diff --git a/legacy/core/src/compiler/ir/graph/graph_op.hpp b/legacy/core/src/compiler/ir/graph/graph_op.hpp index 56cac186..f2f18401 100644 --- a/legacy/core/src/compiler/ir/graph/graph_op.hpp +++ b/legacy/core/src/compiler/ir/graph/graph_op.hpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2020-2023 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #ifndef GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_COMPILER_IR_GRAPH_GRAPH_OP_HPP #define GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_COMPILER_IR_GRAPH_GRAPH_OP_HPP @@ -48,7 +49,7 @@ class graph_op_t : public sc_op { void query_format( context_ptr ctx, std::vector> &supported_ins, - std::vector> &supported_outs) override{}; + std::vector> &supported_outs) override {}; // the param graph is created by upper function and passed to this function. // It should be an empty graph and already synced with external graph. diff --git a/legacy/core/src/compiler/ir/graph/mixed_partition.cpp b/legacy/core/src/compiler/ir/graph/mixed_partition.cpp index 06cee83a..6e75af34 100644 --- a/legacy/core/src/compiler/ir/graph/mixed_partition.cpp +++ b/legacy/core/src/compiler/ir/graph/mixed_partition.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2022-2024 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "mixed_partition.hpp" #include "binding_axis.hpp" @@ -1849,8 +1850,8 @@ static bool try_merge_mixed_parti_parallel(mixed_parti_t *A, mixed_parti_t *B) { auto append_parti = (dep == parti_dep::l_dep_r) ? A : B, target_parti = (dep == parti_dep::l_dep_r) ? B : A; - SC_MODULE_INFO << "Start try_merge_mixed_parti_parallel: " - << "Target: " << target_parti->func_->name_ + SC_MODULE_INFO << "Start try_merge_mixed_parti_parallel: " << "Target: " + << target_parti->func_->name_ << ", Append: " << append_parti->func_->name_; auto outer_loops_target = target_parti->get_outer_loops(), @@ -4028,7 +4029,7 @@ static void crossover_dispatcher(const std::vector &parti_vec, parti_merge_kind merge_kind) { // select merger by merge kind - bool (*merger)(mixed_parti_t * A, mixed_parti_t * B); + bool (*merger)(mixed_parti_t *A, mixed_parti_t *B); switch (merge_kind) { case parti_merge_kind::vertical: { merger = try_merge_mixed_parti_vertically; diff --git a/legacy/core/src/compiler/ir/intrinsics.hpp b/legacy/core/src/compiler/ir/intrinsics.hpp index 7d402070..58a1be8a 100644 --- a/legacy/core/src/compiler/ir/intrinsics.hpp +++ b/legacy/core/src/compiler/ir/intrinsics.hpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2020-2024 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #ifndef GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_COMPILER_IR_INTRINSICS_HPP #define GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_COMPILER_IR_INTRINSICS_HPP @@ -38,7 +39,7 @@ struct intrinsic_handler_t { }; struct x86_intrinsic_handler_t : public intrinsic_handler_t { - virtual void on_initialize(intrin_call_node &node){}; + virtual void on_initialize(intrin_call_node &node) {}; virtual void on_initialize(low_level_intrin_node &node) = 0; x86_intrinsic_handler_t(const std::string &name); virtual ~x86_intrinsic_handler_t() = default; diff --git a/legacy/core/src/compiler/ir/transform/dynamic_parallel_transform.cpp b/legacy/core/src/compiler/ir/transform/dynamic_parallel_transform.cpp index 7095ec36..ccd4ef59 100644 --- a/legacy/core/src/compiler/ir/transform/dynamic_parallel_transform.cpp +++ b/legacy/core/src/compiler/ir/transform/dynamic_parallel_transform.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2023-2024 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "dynamic_parallel_transform.hpp" #include @@ -123,8 +124,8 @@ struct parallel_for_scope_t { bool is_start, uint64_t tid_step, const std::vector *parent_iters, const expr_c &cur_iter) - : loop_{loop}, - nested_level_{nested_level}, is_start_{is_start}, tid_step_{tid_step} { + : loop_{loop}, nested_level_{nested_level}, is_start_{is_start}, + tid_step_{tid_step} { if (parent_iters) { iters_ = *parent_iters; if (cur_iter.defined()) { diff --git a/legacy/core/src/compiler/jit/xbyak/backend/stack_frame_model.cpp b/legacy/core/src/compiler/jit/xbyak/backend/stack_frame_model.cpp index 3b69380c..9db937ea 100644 --- a/legacy/core/src/compiler/jit/xbyak/backend/stack_frame_model.cpp +++ b/legacy/core/src/compiler/jit/xbyak/backend/stack_frame_model.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2021-2023 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include #include @@ -210,8 +211,8 @@ std::string stack_frame_model::one_line_summary() const { #define LOG_LINE(S1, ...) \ if (logging_enabled_) { \ - cout << "[" << utils::brief_lineloc(__FILE__, __LINE__) << "]" \ - << " " << S1 __VA_ARGS__ << endl; \ + cout << "[" << utils::brief_lineloc(__FILE__, __LINE__) << "]" << " " \ + << S1 __VA_ARGS__ << endl; \ } #define LOG_FUNC_ENTRY \ @@ -309,10 +310,9 @@ void stack_frame_model::push_named_object(const std::string &name, x86_64::cpu_data_type val_type, size_t num_bytes, const std::string &debug_comment) { - LOG_FUNC_ENTRY_WITH_TEXT("name=\"" << name << "\"" - << " num_bytes=" << num_bytes - << " debug_comment=\"" << debug_comment - << "\"") + LOG_FUNC_ENTRY_WITH_TEXT("name=\"" + << name << "\"" << " num_bytes=" << num_bytes + << " debug_comment=\"" << debug_comment << "\"") assert_unused_name(name); COMPILE_ASSERT(!name.empty(), "named objects cannot have blank name"); COMPILE_ASSERT(num_bytes > 0, @@ -328,11 +328,10 @@ void stack_frame_model::push_named_object(const std::string &name, void stack_frame_model::push_named_tensor_buffer_object( const std::string &name, x86_64::cpu_data_type val_type, size_t num_elements, size_t num_bytes, const std::string &debug_comment) { - LOG_FUNC_ENTRY_WITH_TEXT("name=\"" << name << "\"" - << " num_elements=" << num_elements - << " num_bytes=" << num_bytes - << " debug_comment=\"" << debug_comment - << "\"") + LOG_FUNC_ENTRY_WITH_TEXT("name=\"" + << name << "\"" << " num_elements=" << num_elements + << " num_bytes=" << num_bytes << " debug_comment=\"" + << debug_comment << "\"") assert_unused_name(name); COMPILE_ASSERT(!name.empty(), "named objects cannot have blank name"); COMPILE_ASSERT(num_bytes > 0, @@ -455,10 +454,9 @@ void stack_frame_model::assert_unused_name(const std::string &name) { } void stack_frame_model::add_caller_param_slot(const caller_param_slot &s) { - LOG_FUNC_ENTRY_WITH_TEXT("name=\"" << s.name_ << "\"" - << " slot_size=" << s.slot_size_ - << " debug_comment=\"" << s.debug_comment_ - << "\"") + LOG_FUNC_ENTRY_WITH_TEXT("name=\"" + << s.name_ << "\"" << " slot_size=" << s.slot_size_ + << " debug_comment=\"" << s.debug_comment_ << "\"") COMPILE_ASSERT(!s.name_.empty(), "named objects cannot have blank name"); COMPILE_ASSERT(s.slot_size_ > 0, "stack_frame_model items must have positive sizes"); diff --git a/legacy/core/src/compiler/jit/xbyak/backend/xbyak_lowering_viewer.cpp b/legacy/core/src/compiler/jit/xbyak/backend/xbyak_lowering_viewer.cpp index 2434bf1d..07cc7748 100644 --- a/legacy/core/src/compiler/jit/xbyak/backend/xbyak_lowering_viewer.cpp +++ b/legacy/core/src/compiler/jit/xbyak/backend/xbyak_lowering_viewer.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2020-2024 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include #include @@ -650,8 +651,7 @@ void xbyak_lowering_viewer::handle_x86_intrisic( XBYAK_GEN(pext, X86_R64_R64_R64, op_dst, op_lhs, op_rhs); } break; default: { - COMPILE_ASSERT(false, FUNC_INFO << "Invalid intrisic: " - << "intrin"); + COMPILE_ASSERT(false, FUNC_INFO << "Invalid intrisic: " << "intrin"); } break; } } @@ -925,8 +925,7 @@ void xbyak_lowering_viewer::handle_avx_intrisic( handle_avx_mov_mask(op_dst, op_src, src_dtype); } break; default: { - COMPILE_ASSERT(false, FUNC_INFO << "Invalid intrisic: " - << "intrin"); + COMPILE_ASSERT(false, FUNC_INFO << "Invalid intrisic: " << "intrin"); } break; } } diff --git a/legacy/core/src/ops/fusible/transpose.cpp b/legacy/core/src/ops/fusible/transpose.cpp index 5238cf33..299e0b79 100644 --- a/legacy/core/src/ops/fusible/transpose.cpp +++ b/legacy/core/src/ops/fusible/transpose.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2023-2024 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "compiler/ir/graph/fusible_op_utils.hpp" #include "reorder.hpp" #include "util/math_utils.hpp" @@ -348,23 +349,25 @@ bool can_be_fast_transpose( #define TRANS2D_ASSIGN(dst, src) \ cur_list.emplace_back( \ - builder::make_assign_unattached(rows[((dst)-1)], rows[((src)-1)])); + builder::make_assign_unattached(rows[((dst) - 1)], rows[((src) - 1)])); // unpack and interleave #define TRANS2D_UNPACK_ASSIGN(option, dst, src1, src2, elem_bits) \ cur_list.emplace_back(builder::make_assign_unattached( \ - rows[((dst)-1)], builder::make_unpack_##option( \ - rows[((src1)-1)], rows[((src2)-1)], elem_bits))); + rows[((dst) - 1)], \ + builder::make_unpack_##option(rows[((src1) - 1)], rows[((src2) - 1)], \ + elem_bits))); #define TRANS2D_SHUFFLE_PERMUTE_ASSIGN_F32(command, dst, src1, src2, imm, \ elem_bits) \ cur_list.emplace_back(builder::make_assign_unattached( \ - rows[((dst)-1)], \ - builder::make_##command(rows[((src1)-1)], rows[((src2)-1)], imm, \ + rows[((dst) - 1)], \ + builder::make_##command(rows[((src1) - 1)], rows[((src2) - 1)], imm, \ elem_bits))); #define PERMUTEX_ASSIGN_F32(dst, src1, src2, imm, mask) \ cur_list.emplace_back(builder::make_assign_unattached( \ - rows[((dst)-1)], \ - builder::make_permute(rows[((src1)-1)], rows[((src2)-1)], imm, mask))); + rows[((dst) - 1)], \ + builder::make_permute(rows[((src1) - 1)], rows[((src2) - 1)], imm, \ + mask))); #define TRANS2D_REG_CALCULATION_F32(type_bits) \ TRANS2D_UNPACK_ASSIGN(low, 9, 1, 2, 32) \ diff --git a/legacy/core/src/ops/fusible/unary_elemwise.cpp b/legacy/core/src/ops/fusible/unary_elemwise.cpp index bef110c5..da4d8366 100644 --- a/legacy/core/src/ops/fusible/unary_elemwise.cpp +++ b/legacy/core/src/ops/fusible/unary_elemwise.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2020-2024 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include @@ -282,7 +283,7 @@ expr tanh_op_t::compute_element(expr in) { #define DECL_VEC_CONSTANT(name, dtype, value) \ expr name = make_expr(value, sc_data_type_t::dtype(lanes)); -// clang-format off + // clang-format off // NOLINTNEXTLINE #define DECL_VEC_VAR(name, dtype) auto name = builder::make_var( \ sc_data_type_t::dtype(lanes), #name + fusion_create_var_idx()); \ @@ -290,7 +291,7 @@ expr tanh_op_t::compute_element(expr in) { // clang-format on #define DECL_CONSTANT(name, dtype, value) \ expr name = make_expr(value, datatypes::dtype); -// clang-format off + // clang-format off // NOLINTNEXTLINE #define DECL_VAR(name, dtype) auto name = builder::make_var( \ datatypes::dtype, #name + fusion_create_var_idx()); \ diff --git a/legacy/core/src/ops/managed_matmul_core.cpp b/legacy/core/src/ops/managed_matmul_core.cpp index acf691c4..1fa13b75 100644 --- a/legacy/core/src/ops/managed_matmul_core.cpp +++ b/legacy/core/src/ops/managed_matmul_core.cpp @@ -1,18 +1,20 @@ -/******************************************************************************* - * Copyright 2022-2024 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ + #include "managed_matmul_core.hpp" #include "matmul_core.hpp" #include "templates/managed_matmul_core.hpp" diff --git a/legacy/core/src/ops/matmul_core.cpp b/legacy/core/src/ops/matmul_core.cpp index 9e2187b5..97750214 100644 --- a/legacy/core/src/ops/matmul_core.cpp +++ b/legacy/core/src/ops/matmul_core.cpp @@ -1,18 +1,20 @@ -/******************************************************************************* - * Copyright 2022-2023 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ + #include "matmul_core.hpp" #include "templates/matmul_core.hpp" #include "templates/utils.hpp" diff --git a/legacy/core/src/ops/templates/conv1x1_backprop_data.cpp b/legacy/core/src/ops/templates/conv1x1_backprop_data.cpp index 8fe1dfee..7b0563c9 100644 --- a/legacy/core/src/ops/templates/conv1x1_backprop_data.cpp +++ b/legacy/core/src/ops/templates/conv1x1_backprop_data.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2022-2023 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "conv1x1_backprop_data.hpp" #include @@ -198,18 +199,18 @@ bool gen_conv1x1_backprop_data_t::generate(context_ptr ctx, assert(tile_d == 1 && tile_p == 1 && tile_q == 1); int C_shift_d = padding_d > 0 ? (padding_d > stride_d - ? (stride_d == 1 ? 0 : stride_d - padding_d % stride_d) - : stride_d - padding_d) + ? (stride_d == 1 ? 0 : stride_d - padding_d % stride_d) + : stride_d - padding_d) : 0; int C_shift_h = padding_h > 0 ? (padding_h > stride_h - ? (stride_h == 1 ? 0 : stride_h - padding_h % stride_h) - : stride_h - padding_h) + ? (stride_h == 1 ? 0 : stride_h - padding_h % stride_h) + : stride_h - padding_h) : 0; int C_shift_w = padding_w > 0 ? (padding_w > stride_w - ? (stride_w == 1 ? 0 : stride_w - padding_w % stride_w) - : stride_w - padding_w) + ? (stride_w == 1 ? 0 : stride_w - padding_w % stride_w) + : stride_w - padding_w) : 0; C_shift_d = C_shift_d < 0 ? 0 : C_shift_d; C_shift_h = C_shift_h < 0 ? 0 : C_shift_h; @@ -246,17 +247,17 @@ bool gen_conv1x1_backprop_data_t::generate(context_ptr ctx, stride_a = is_3d ? O * P * Q * K_block : P * Q * K_block; a_idx = is_3d ? std::vector {n, 0, d_o * tile_d + A_shift_d, - p_o * tile_p + A_shift_h, q_o * tile_q + A_shift_w, 0} + p_o * tile_p + A_shift_h, q_o * tile_q + A_shift_w, 0} : std::vector {n, 0, p_o * tile_p + A_shift_h, - q_o * tile_q + A_shift_w, 0}; + q_o * tile_q + A_shift_w, 0}; } else { LDA = K; stride_a = K_block; a_idx = is_3d ? std::vector {n, d_o * tile_d + A_shift_d, - p_o * tile_p + A_shift_h, q_o * tile_q + A_shift_w, 0} - : std::vector { - n, p_o * tile_p + A_shift_h, q_o * tile_q + A_shift_w, 0}; + p_o * tile_p + A_shift_h, q_o * tile_q + A_shift_w, 0} + : std::vector {n, p_o * tile_p + A_shift_h, + q_o * tile_q + A_shift_w, 0}; } b_idx = std::vector {c_o, 0, 0, 0, 0, 0}; if (is_3d) b_idx.emplace_back(0); @@ -264,22 +265,22 @@ bool gen_conv1x1_backprop_data_t::generate(context_ptr ctx, if (is_out_blocking) { LDC = C_block * stride_w; c_idx = is_3d ? std::vector {n, c_o, - d_o * tile_d * stride_d + C_shift_d, - p_o * tile_p * stride_h + C_shift_h, - q_o * tile_q * stride_w + C_shift_w, 0} + d_o * tile_d * stride_d + C_shift_d, + p_o * tile_p * stride_h + C_shift_h, + q_o * tile_q * stride_w + C_shift_w, 0} : std::vector {n, c_o, - p_o * tile_p * stride_h + C_shift_h, - q_o * tile_q * stride_w + C_shift_w, 0}; + p_o * tile_p * stride_h + C_shift_h, + q_o * tile_q * stride_w + C_shift_w, 0}; } else { LDC = C * stride_w; LDC->attr().set("N_axis", is_3d ? std::vector {4} : std::vector {3}); c_idx = is_3d ? std::vector {n, d_o * tile_d * stride_d + C_shift_d, - p_o * tile_p * stride_h + C_shift_h, - q_o * tile_q * stride_w + C_shift_w, c_o * C_block} + p_o * tile_p * stride_h + C_shift_h, + q_o * tile_q * stride_w + C_shift_w, c_o * C_block} : std::vector {n, p_o * tile_p * stride_h + C_shift_h, - q_o * tile_q * stride_w + C_shift_w, c_o * C_block}; + q_o * tile_q * stride_w + C_shift_w, c_o * C_block}; } LDC->attr().set("stride_w", stride_w); builtin::brgemm_init_update(tensor_ptr(output, a_idx), diff --git a/legacy/core/src/ops/templates/conv1x1_backprop_weight.cpp b/legacy/core/src/ops/templates/conv1x1_backprop_weight.cpp index 8b0fdf58..2150c429 100644 --- a/legacy/core/src/ops/templates/conv1x1_backprop_weight.cpp +++ b/legacy/core/src/ops/templates/conv1x1_backprop_weight.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2022-2023 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "conv1x1_backprop_weight.hpp" #include @@ -498,7 +499,7 @@ bool gen_conv1x1_backprop_weight_t::generate_reduce_ALL(const context_ptr &ctx, _tensor_(output_tmp, dtype, dtype_block > 1 ? std::vector {NPQ_tile, NPQ_block_pad / dtype_block, - K_block, dtype_block} + K_block, dtype_block} : std::vector {NPQ_tile, NPQ_block, K_block}); _named_for_(lnt, nt_i, 0, NPQ_tile) { _named_for_(lnpq, npq_i, 0, NPQ_block_pad) { @@ -572,9 +573,9 @@ bool gen_conv1x1_backprop_weight_t::generate_reduce_ALL(const context_ptr &ctx, : std::vector {0, 0, 0}), tensor_ptr(del_weight_tmp_buf, is_3d ? std::vector {n_o * K_num_block + k_o, c_o, 0, 0, - 0, 0, 0} + 0, 0, 0} : std::vector {n_o * K_num_block + k_o, c_o, 0, 0, - 0, 0}), + 0, 0}), NPQ_tile, C_block, K_block, NPQ_block_pad, NPQ_block_pad, K_block, K_block, C_block * NPQ_block_pad, K_block @@ -589,9 +590,9 @@ bool gen_conv1x1_backprop_weight_t::generate_reduce_ALL(const context_ptr &ctx, : std::vector {0, 0, 0}), tensor_ptr(del_weight_tmp_buf, is_3d ? std::vector {n_o * K_num_block + k_o, c_o, 0, 0, - 0, 0, 0} + 0, 0, 0} : std::vector {n_o * K_num_block + k_o, c_o, 0, 0, - 0, 0}), + 0, 0}), NPQ_tile, C_block, K_block, NPQ_block_pad, NPQ_block_pad, K_block, K_block, C_block * NPQ_block_pad, K_block diff --git a/legacy/core/src/ops/templates/convNxN_backprop_weight.cpp b/legacy/core/src/ops/templates/convNxN_backprop_weight.cpp index 69b446de..9c943493 100644 --- a/legacy/core/src/ops/templates/convNxN_backprop_weight.cpp +++ b/legacy/core/src/ops/templates/convNxN_backprop_weight.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2022-2023 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "convNxN_backprop_weight.hpp" #include @@ -287,11 +288,11 @@ bool gen_convNxN_backprop_weight::generate_reduce_N(const context_ptr &ctx, output, {n_o, k_o, p_o, output_q_start, 0, 0}), tensor_ptr(data, dtype_block > 1 ? std::vector {n_o, c_o, - p_o * stride_h + r - padding_h, q_start_valid, 0, - 0, 0} + p_o * stride_h + r - padding_h, + q_start_valid, 0, 0, 0} : std::vector {n_o, c_o, - p_o * stride_h + r - padding_h, - q_start_valid, 0, 0}), + p_o * stride_h + r - padding_h, + q_start_valid, 0, 0}), tensor_ptr( N_num_block > 1 ? del_weight_tmp_buf : del_weight, {n_o * K_num_block + k_o, c_o, r, s, 0, 0}), @@ -309,11 +310,11 @@ bool gen_convNxN_backprop_weight::generate_reduce_N(const context_ptr &ctx, output, {n_o, k_o, p_o, output_q_start, 0, 0}), tensor_ptr(data, dtype_block > 1 ? std::vector {n_o, c_o, - p_o * stride_h + r - padding_h, q_start_valid, 0, - 0, 0} + p_o * stride_h + r - padding_h, + q_start_valid, 0, 0, 0} : std::vector {n_o, c_o, - p_o * stride_h + r - padding_h, - q_start_valid, 0, 0}), + p_o * stride_h + r - padding_h, + q_start_valid, 0, 0}), tensor_ptr( N_num_block > 1 ? del_weight_tmp_buf : del_weight, {n_o * K_num_block + k_o, c_o, r, s, 0, 0}), diff --git a/legacy/core/src/ops/templates/conv_dw_fwd.cpp b/legacy/core/src/ops/templates/conv_dw_fwd.cpp index 8d7cad29..e9215ea1 100644 --- a/legacy/core/src/ops/templates/conv_dw_fwd.cpp +++ b/legacy/core/src/ops/templates/conv_dw_fwd.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2023-2024 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "conv_dw_fwd.hpp" #include @@ -71,7 +72,7 @@ config_ptr gen_conv_dw_fwd_t::get_default_config(context_ptr ctx) const { cfg.bs_threads = mb_ > num_threads ? num_threads : *(std::find_if(thread_split.rbegin(), thread_split.rend(), - [&](int split) { return split == 1 || split < mb_; })); + [&](int split) { return split == 1 || split < mb_; })); cfg.h_threads = num_threads / cfg.bs_threads; cfg.w_threads = 1; cfg.g_threads = 1; @@ -608,7 +609,7 @@ void gen_conv_dw_fwd_t::compute_conv_physical_padding(CONV_ARG_LIST) const { builtin::brgemm_init( tensor_ptr(global_aux_buffer, is_3d_ ? std::vector {tid, aux_buf_d, aux_buf_h, - 0, 0} + 0, 0} : std::vector {tid, aux_buf_h, 0, 0}), builder::make_cast(datatypes::s32, left_pad), g_block, LDA, dtype_input, padding_value); @@ -640,9 +641,9 @@ void gen_conv_dw_fwd_t::compute_conv_physical_padding(CONV_ARG_LIST) const { builtin::brgemm_init( tensor_ptr(global_aux_buffer, is_3d_ ? std::vector {tid, aux_buf_d, aux_buf_h, - w_block_size_without_pad, 0} + w_block_size_without_pad, 0} : std::vector {tid, aux_buf_h, - w_block_size_without_pad, 0}), + w_block_size_without_pad, 0}), builder::make_cast(datatypes::s32, aux_w_block_size - w_block_size_without_pad), g_block, LDA, dtype_input, padding_value); @@ -654,8 +655,8 @@ void gen_conv_dw_fwd_t::compute_conv_physical_padding(CONV_ARG_LIST) const { auto valid_kh = (h_nopad_end_idx - h_nopad_begin_idx - 1) / dh_ + 1; idx = builder::make_cast(datatypes::u32, - use_var_bs ? ( - aux_buf_d * valid_kh * kw_ + aux_buf_h * kw_ + kw) + use_var_bs ? (aux_buf_d * valid_kh * kw_ + + aux_buf_h * kw_ + kw) : (kd * kh_ * kw_ + kh * kw_ + kw)); } else { idx = builder::make_cast(datatypes::u32, @@ -668,7 +669,7 @@ void gen_conv_dw_fwd_t::compute_conv_physical_padding(CONV_ARG_LIST) const { A_list[idx] = tensor_ptr(global_aux_buffer, is_3d_ ? std::vector {tid, aux_buf_d, aux_buf_h, - kw * dw_, 0} + kw * dw_, 0} : std::vector {tid, aux_buf_h, kw * dw_, 0}); } } @@ -848,7 +849,7 @@ void gen_conv_dw_fwd_t::compute_conv_physical_padding(CONV_ARG_LIST) const { if (is_3d_) { auto cond = large_pad ? (((cur_iw + aux_w_block_size <= 0) || (cur_iw > iw_)) - || (num_d_pad >= kd_ || num_h_pad >= kh_)) + || (num_d_pad >= kd_ || num_h_pad >= kh_)) : (num_d_pad >= kd_ || num_h_pad >= kh_); _if_(cond && padding_value == 0) { zero_out_aux_buffer(); @@ -891,7 +892,7 @@ void gen_conv_dw_fwd_t::compute_conv_physical_padding(CONV_ARG_LIST) const { expr idx = builder::make_cast(datatypes::u32, use_var_bs ? ((kd - d_nopad_begin_idx) * valid_kh * kw_ - + (kh - h_nopad_begin_idx) * kw_ + kw) + + (kh - h_nopad_begin_idx) * kw_ + kw) : (kd * kh_ * kw_ + kh * kw_ + kw)); A_list[idx] = tensor_ptr(input, std::vector {n, cur_id + kd * dd_, @@ -962,7 +963,7 @@ void gen_conv_dw_fwd_t::compute_conv_physical_padding(CONV_ARG_LIST) const { } else { auto cond = large_pad ? (((cur_iw + aux_w_block_size <= 0) || (cur_iw > iw_)) - || (num_h_pad >= kh_)) + || (num_h_pad >= kh_)) : (num_h_pad >= kh_); _if_(cond && padding_value == 0) { zero_out_aux_buffer(); diff --git a/legacy/core/src/ops/templates/conv_fwd.cpp b/legacy/core/src/ops/templates/conv_fwd.cpp index d95f3653..e6dc5fc8 100644 --- a/legacy/core/src/ops/templates/conv_fwd.cpp +++ b/legacy/core/src/ops/templates/conv_fwd.cpp @@ -1,18 +1,20 @@ -/******************************************************************************* - * Copyright 2020-2024 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ + #include "conv_fwd.hpp" #include #include @@ -509,18 +511,18 @@ std::vector gen_conv_fwd_t::data_offset(const expr &N, const expr &G, !(is_3d_ && force_3d), "Force_3d is only capable for 2d inputs"); return is_group_conv_ ? ((is_3d_ || force_3d) - ? (!blocking_input_ - ? std::vector {N, D, H, W, G, C * C_block + c_idx} - : std::vector {N, G, C, D, H, W, c_idx}) - : (!blocking_input_ - ? std::vector {N, H, W, G, C * C_block + c_idx} - : std::vector {N, G, C, H, W, c_idx})) + ? (!blocking_input_ + ? std::vector {N, D, H, W, G, C * C_block + c_idx} + : std::vector {N, G, C, D, H, W, c_idx}) + : (!blocking_input_ + ? std::vector {N, H, W, G, C * C_block + c_idx} + : std::vector {N, G, C, H, W, c_idx})) : ((is_3d_ || force_3d) - ? (!blocking_input_ - ? std::vector {N, D, H, W, C * C_block + c_idx} - : std::vector {N, C, D, H, W, c_idx}) - : (!blocking_input_ ? std::vector {N, H, W, C * C_block + c_idx} - : std::vector {N, C, H, W, c_idx})); + ? (!blocking_input_ + ? std::vector {N, D, H, W, C * C_block + c_idx} + : std::vector {N, C, D, H, W, c_idx}) + : (!blocking_input_ ? std::vector {N, H, W, C * C_block + c_idx} + : std::vector {N, C, H, W, c_idx})); } std::vector gen_conv_fwd_t::output_offset(const expr &N, const expr &G, @@ -528,17 +530,17 @@ std::vector gen_conv_fwd_t::output_offset(const expr &N, const expr &G, const expr &C_block, const expr &c_idx) const { return is_group_conv_ ? (is_3d_ ? (!blocking_output_ - ? std::vector {N, D, H, W, G, C * C_block + c_idx} - : std::vector {N, G, C, D, H, W, c_idx}) + ? std::vector {N, D, H, W, G, C * C_block + c_idx} + : std::vector {N, G, C, D, H, W, c_idx}) + : (!blocking_output_ + ? std::vector {N, H, W, G, C * C_block + c_idx} + : std::vector {N, G, C, H, W, c_idx})) + : (is_3d_ ? (!blocking_output_ + ? std::vector {N, D, H, W, C * C_block + c_idx} + : std::vector {N, C, D, H, W, c_idx}) : (!blocking_output_ - ? std::vector {N, H, W, G, C * C_block + c_idx} - : std::vector {N, G, C, H, W, c_idx})) - : (is_3d_ - ? (!blocking_output_ - ? std::vector {N, D, H, W, C * C_block + c_idx} - : std::vector {N, C, D, H, W, c_idx}) - : (!blocking_output_ ? std::vector {N, H, W, C * C_block + c_idx} - : std::vector {N, C, H, W, c_idx})); + ? std::vector {N, H, W, C * C_block + c_idx} + : std::vector {N, C, H, W, c_idx})); } void gen_conv_fwd_t::bind_output_loop_axis(const for_loop &loop, @@ -643,31 +645,31 @@ void gen_conv_fwd_t::create_anchor(fusion_anchor_mgr_t *fusion, fusion->create_fusion_anchor(slice_map {{output_gt.get(), blocking_output_ ? slice_range_list {{{n, n_len}, {g, g_len}, {k, k_len}, {d, d_len}, - {p, p_len}, {q, q_len}, {0, K_block}}} + {p, p_len}, {q, q_len}, {0, K_block}}} : slice_range_list {{{n, n_len}, {d, d_len}, {p, p_len}, {q, q_len}, - {g, g_len}, {k * K_block, k_len * K_block}}}}}); + {g, g_len}, {k * K_block, k_len * K_block}}}}}); } else { fusion->create_fusion_anchor(slice_map {{output_gt.get(), blocking_output_ ? slice_range_list {{{n, n_len}, {g, g_len}, {k, k_len}, {p, p_len}, - {q, q_len}, {0, K_block}}} + {q, q_len}, {0, K_block}}} : slice_range_list {{{n, n_len}, {p, p_len}, {q, q_len}, {g, g_len}, - {k * K_block, k_len * K_block}}}}}); + {k * K_block, k_len * K_block}}}}}); } } else { if (is_3d_) { fusion->create_fusion_anchor(slice_map {{output_gt.get(), blocking_output_ ? slice_range_list {{{n, n_len}, {k, k_len}, {d, d_len}, {p, p_len}, - {q, q_len}, {0, K_block}}} + {q, q_len}, {0, K_block}}} : slice_range_list {{{n, n_len}, {d, d_len}, {p, p_len}, {q, q_len}, - {k * K_block, k_len * K_block}}}}}); + {k * K_block, k_len * K_block}}}}}); } else { fusion->create_fusion_anchor(slice_map {{output_gt.get(), blocking_output_ ? slice_range_list {{{n, n_len}, {k, k_len}, - {p, p_len}, {q, q_len}, {0, K_block}}} + {p, p_len}, {q, q_len}, {0, K_block}}} : slice_range_list {{{n, n_len}, {p, p_len}, - {q, q_len}, {k * K_block, k_len * K_block}}}}}); + {q, q_len}, {k * K_block, k_len * K_block}}}}}); } } } @@ -804,9 +806,9 @@ void gen_conv_fwd_t::compute_1x1_pack_input(CONV_ARG_LIST) const { if (blocking_input_) { _tensor_(input_tmp, get_input_dtype(), is_group_conv_ ? std::vector {mb_expr_, groups_, C_num_block, - oh_expr_, ow_, config.C_block} - : std::vector { - mb_expr_, C_num_block, oh_expr_, ow_, config.C_block}); + oh_expr_, ow_, config.C_block} + : std::vector {mb_expr_, C_num_block, oh_expr_, + ow_, config.C_block}); _named_for_(ln, n, 0, mb_expr_, 1, for_type::PARALLEL) { _named_for_(lg, g, 0, groups_) { _named_for_(lk, c_o, 0, C_num_block) { @@ -1042,9 +1044,9 @@ void gen_conv_fwd_t::compute_conv_no_padding(CONV_ARG_LIST) const { if (blocking_input_) { _tensor_(input_tmp, get_input_dtype(), is_group_conv_ ? std::vector {mb_expr_, groups_, C_num_block, sh_, - pack_ih, iw_, config.C_block} + pack_ih, iw_, config.C_block} : std::vector {mb_expr_, C_num_block, sh_, pack_ih, - iw_, config.C_block}); + iw_, config.C_block}); for_loop ls; _named_for_(ln, n, 0, mb_expr_, 1, for_type::PARALLEL) { _named_for_(lg, g, 0, groups_) { @@ -1136,13 +1138,13 @@ void gen_conv_fwd_t::compute_conv_no_padding(CONV_ARG_LIST) const { auto idx = c_o * kh_ * kw_ + r * kw_ + s; std::vector input_pos = need_pack_strided_input ? data_offset(n, g, c_o, dh_ * r % sh_, - ((o_o * config.tile_os) / adj_ow) + dh_ * r / sh_, - ((o_o * config.tile_os) % adj_ow) * sw_ + dw_ * s, - config.C_block, 0, need_pack_strided_input) + ((o_o * config.tile_os) / adj_ow) + dh_ * r / sh_, + ((o_o * config.tile_os) % adj_ow) * sw_ + dw_ * s, + config.C_block, 0, need_pack_strided_input) : data_offset(n, g, c_o, 0, - ((o_o * config.tile_os) / adj_ow) * sh_ + dh_ * r, - ((o_o * config.tile_os) % adj_ow) * sw_ + dw_ * s, - config.C_block, 0, need_pack_strided_input); + ((o_o * config.tile_os) / adj_ow) * sh_ + dh_ * r, + ((o_o * config.tile_os) % adj_ow) * sw_ + dw_ * s, + config.C_block, 0, need_pack_strided_input); A_list[idx] = tensor_ptr(real_input, input_pos); B_list[idx] = tensor_ptr(weight, weight_offset(g, k_o, c_o, 0, r, s)); @@ -1921,9 +1923,9 @@ void gen_conv_fwd_t::compute_conv_padding_v2(CONV_ARG_LIST) const { builtin::brgemm_init( tensor_ptr(g_sub_tensor, is_3d_ ? std::vector {tid, sub_tsr_d, - sub_tsr_h, 0, 0} + sub_tsr_h, 0, 0} : std::vector {tid, sub_tsr_h, - 0, 0}), + 0, 0}), builder::make_cast(datatypes::s32, left_pad), config.C_block, LDA, dtype_input, padding_value); @@ -1936,9 +1938,9 @@ void gen_conv_fwd_t::compute_conv_padding_v2(CONV_ARG_LIST) const { _for_(k, 0, config.C_block, (int)lanes) { g_sub_tensor[span_t(is_3d_ ? std::vector {tid, sub_tsr_d, - sub_tsr_h, j, k} + sub_tsr_h, j, k} : std::vector {tid, sub_tsr_h, j, - k}, + k}, lanes)] = input[span_t( data_offset(n, g, c_o, @@ -1953,9 +1955,10 @@ void gen_conv_fwd_t::compute_conv_padding_v2(CONV_ARG_LIST) const { builtin::brgemm_init( tensor_ptr(g_sub_tensor, is_3d_ ? std::vector {tid, sub_tsr_d, - sub_tsr_h, tile_size_exclude_right_pad, 0} + sub_tsr_h, + tile_size_exclude_right_pad, 0} : std::vector {tid, sub_tsr_h, - tile_size_exclude_right_pad, 0}), + tile_size_exclude_right_pad, 0}), builder::make_cast(datatypes::s32, src_row_tile_size - tile_size_exclude_right_pad), @@ -1973,7 +1976,7 @@ void gen_conv_fwd_t::compute_conv_padding_v2(CONV_ARG_LIST) const { idx = builder::make_cast(datatypes::u32, use_var_bs ? (sub_tsr_d * valid_kh * num_kw - + sub_tsr_h * num_kw + wi) + + sub_tsr_h * num_kw + wi) : (di * kh_ * num_kw + hi * num_kw + wi)); } else { idx = builder::make_cast(datatypes::u32, @@ -1984,9 +1987,9 @@ void gen_conv_fwd_t::compute_conv_padding_v2(CONV_ARG_LIST) const { // conv A_list[idx] = tensor_ptr(g_sub_tensor, is_3d_ ? std::vector {tid, sub_tsr_d, - sub_tsr_h, wi * dw_ * kw_step, 0} + sub_tsr_h, wi * dw_ * kw_step, 0} : std::vector {tid, sub_tsr_h, - wi * dw_ * kw_step, 0}); + wi * dw_ * kw_step, 0}); } } } @@ -2194,7 +2197,7 @@ void gen_conv_fwd_t::compute_conv_padding_v2(CONV_ARG_LIST) const { auto cond = large_pad ? (((cur_iw + src_row_tile_size <= pw_b_) || (cur_iw > iw_ + pw_b_)) - || (num_d_pad >= kd_ || num_h_pad >= kh_)) + || (num_d_pad >= kd_ || num_h_pad >= kh_)) : (num_d_pad >= kd_ || num_h_pad >= kh_); _if_(cond && padding_value == 0) { zero_out_sub_tensor(); @@ -2240,11 +2243,12 @@ void gen_conv_fwd_t::compute_conv_padding_v2(CONV_ARG_LIST) const { auto valid_kh = h_unpad_end_idx - h_unpad_begin_idx; idx = builder::make_cast(datatypes::u32, - use_var_bs ? ((di - d_unpad_begin_idx) - * valid_kh * num_kw - + (hi - h_unpad_begin_idx) * num_kw + wi) - : (di * kh_ * num_kw - + hi * num_kw + wi)); + use_var_bs + ? ((di - d_unpad_begin_idx) * valid_kh + * num_kw + + (hi - h_unpad_begin_idx) * num_kw + + wi) + : (di * kh_ * num_kw + hi * num_kw + wi)); A_list[idx] = tensor_ptr(input, data_offset(n, g, c_o, cur_id + di * dd_ - pd_b_, @@ -2337,7 +2341,7 @@ void gen_conv_fwd_t::compute_conv_padding_v2(CONV_ARG_LIST) const { auto cond = large_pad ? (((cur_iw + src_row_tile_size <= pw_b_) || (cur_iw > iw_ + pw_b_)) - || (num_h_pad >= kh_)) + || (num_h_pad >= kh_)) : (num_h_pad >= kh_); _if_(cond && padding_value == 0) { zero_out_sub_tensor(); diff --git a/legacy/core/src/ops/templates/conv_fwd.hpp b/legacy/core/src/ops/templates/conv_fwd.hpp index 6c8f0299..7c0f0494 100644 --- a/legacy/core/src/ops/templates/conv_fwd.hpp +++ b/legacy/core/src/ops/templates/conv_fwd.hpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2020-2024 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #ifndef GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_OPS_TEMPLATES_CONV_FWD_HPP #define GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_OPS_TEMPLATES_CONV_FWD_HPP @@ -73,13 +74,13 @@ class gen_conv_fwd_t : public body_generator_t { gen_conv_fwd_t(sc_op *owner, const sc_dims &stride, const sc_dims &pads_begin, std::vector &&ins, std::vector &&outs) : gen_conv_fwd_t(owner, stride, sc_dims {1}, pads_begin, pads_begin, - std::move(ins), std::move(outs)) {} + std::move(ins), std::move(outs)) {} gen_conv_fwd_t(sc_op *owner, const sc_dims &stride, const sc_dims &pads_begin, const sc_dims &pads_end, std::vector &&ins, std::vector &&outs) : gen_conv_fwd_t(owner, stride, sc_dims {1}, pads_begin, pads_end, - std::move(ins), std::move(outs)) {} + std::move(ins), std::move(outs)) {} gen_conv_fwd_t(sc_op *owner, const sc_dims &stride, const sc_dims &dilation, const sc_dims &pads_begin, const sc_dims &pads_end, diff --git a/legacy/core/src/ops/templates/conv_rl.cpp b/legacy/core/src/ops/templates/conv_rl.cpp index 025caf98..0aa19856 100644 --- a/legacy/core/src/ops/templates/conv_rl.cpp +++ b/legacy/core/src/ops/templates/conv_rl.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2023-2024 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "conv_rl.hpp" #include #include @@ -96,16 +97,16 @@ void gen_conv_fwd_rl_t::create_anchor(fusion_anchor_mgr_t *fusion, fusion->create_fusion_anchor(slice_map {{output_gt.get(), blocking_output_ ? slice_range_list {{{n, n_len}, {g, g_len}, {k, k_len}, {p, p_len}, - {q, q_len}, {0, K_block}}} + {q, q_len}, {0, K_block}}} : slice_range_list {{{n, n_len}, {p, p_len}, {q, q_len}, {g, g_len}, - {k * K_block, k_len * K_block}}}}}); + {k * K_block, k_len * K_block}}}}}); } else { fusion->create_fusion_anchor(slice_map {{output_gt.get(), blocking_output_ ? slice_range_list {{{n, n_len}, {k, k_len}, - {p, p_len}, {q, q_len}, {0, K_block}}} + {p, p_len}, {q, q_len}, {0, K_block}}} : slice_range_list {{{n, n_len}, {p, p_len}, - {q, q_len}, {k * K_block, k_len * K_block}}}}}); + {q, q_len}, {k * K_block, k_len * K_block}}}}}); } } } @@ -192,8 +193,8 @@ gen_conv_fwd_rl_t::gen_conv_fwd_rl_t(sc_op *owner, const sc_dims &stride, parallel_axis_ = (mb_ >= num_threads) ? parallel_kind::BATCH : ((int)utils::divide_and_ceil(oh_, num_threads) > height_threshold - ? parallel_kind::HEIGHT - : parallel_kind::BATCH); + ? parallel_kind::HEIGHT + : parallel_kind::BATCH); num_brgemm_k_ = attrs_.get("num_brgemm_k"); brgemm_k_ = attrs_.get("brgemm_k"); @@ -457,19 +458,19 @@ bool gen_conv_fwd_rl_t::generate(context_ptr ctx, create_fusion_anchor(fusion, owner_->get_outputs()[0], is_group_conv_ ? (blocking_output_ - ? slice_range {{n_o, 1}, {g, 1}, {0, 1}, {p, 1}, - {q * config.brgemm_m, config.brgemm_m}, - {k_o * config.brgemm_n, config.brgemm_n}} - : slice_range {{n_o, 1}, {p, 1}, - {q * config.brgemm_m, config.brgemm_m}, {g, 1}, - {k_o * config.brgemm_n, config.brgemm_n}}) + ? slice_range {{n_o, 1}, {g, 1}, {0, 1}, {p, 1}, + {q * config.brgemm_m, config.brgemm_m}, + {k_o * config.brgemm_n, config.brgemm_n}} + : slice_range {{n_o, 1}, {p, 1}, + {q * config.brgemm_m, config.brgemm_m}, {g, 1}, + {k_o * config.brgemm_n, config.brgemm_n}}) : (blocking_output_ ? slice_range {{n_o, 1}, {g, 1}, {p, 1}, - {q * config.brgemm_m, config.brgemm_m}, - {k_o * config.brgemm_n, config.brgemm_n}} + {q * config.brgemm_m, config.brgemm_m}, + {k_o * config.brgemm_n, config.brgemm_n}} : slice_range {{n_o, 1}, {p, 1}, - {q * config.brgemm_m, config.brgemm_m}, - {(g * K_num_block + k_o) * config.brgemm_n, - config.brgemm_n}})); + {q * config.brgemm_m, config.brgemm_m}, + {(g * K_num_block + k_o) * config.brgemm_n, + config.brgemm_n}})); } // brgemm_m * oc_ create_anchor(fusion, owner_->get_outputs()[0], n_o, 1, g, 1, 0, 1, 0, 1, diff --git a/legacy/core/src/ops/templates/managed_matmul_core.cpp b/legacy/core/src/ops/templates/managed_matmul_core.cpp index 72399ff9..26a69023 100644 --- a/legacy/core/src/ops/templates/managed_matmul_core.cpp +++ b/legacy/core/src/ops/templates/managed_matmul_core.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2022-2024 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "managed_matmul_core.hpp" #include @@ -954,28 +955,28 @@ void gen_managed_matmul_core_t::single_thread_matmul_call( std::vector aidx = ta.get_format() == sc_data_format_t::MK() ? std::vector {m_start_idx, k_start_idx} : std::vector { - m_start_idx / iim_block_, k_start_idx / iik_block_, 0, 0}; + m_start_idx / iim_block_, k_start_idx / iik_block_, 0, 0}; std::vector bidx = dtype_block > 1 ? std::vector {n_start_idx / iin_block_, - k_start_idx / iik_block_, 0, 0, 0} + k_start_idx / iik_block_, 0, 0, 0} : (!tb.get_format().is_blocking() - ? std::vector {k_start_idx, n_start_idx} - : std::vector { - n_start_idx / iin_block_, k_start_idx / iik_block_, 0, 0}); + ? std::vector {k_start_idx, n_start_idx} + : std::vector {n_start_idx / iin_block_, + k_start_idx / iik_block_, 0, 0}); std::vector cidx; if (is_partial) { cidx = !tc.get_format().is_blocking() ? std::vector {m_b_idx * iim_block_ - + (doroll(m_o, m_o_end)) * iim_block_, - n_b_idx * iin_block_ + (doroll(n_o, n_o_end)) * iin_block_} + + (doroll(m_o, m_o_end)) * iim_block_, + n_b_idx * iin_block_ + (doroll(n_o, n_o_end)) * iin_block_} : std::vector {m_b_idx + doroll(m_o, m_o_end), - n_b_idx + doroll(n_o, n_o_end), 0, 0}; + n_b_idx + doroll(n_o, n_o_end), 0, 0}; cidx.insert(cidx.begin(), k_s); } else { cidx = !tc.get_format().is_blocking() ? std::vector {m_start_idx, n_start_idx} : std::vector { - m_start_idx / iim_block_, n_start_idx / iin_block_, 0, 0}; + m_start_idx / iim_block_, n_start_idx / iin_block_, 0, 0}; } expr LDA = ta.get_format() == sc_data_format_t::MK() ? graph.dim_to_expr(ori_K) @@ -1046,11 +1047,11 @@ void gen_managed_matmul_core_t::single_thread_matmul_call( !tc.get_format().is_blocking() ? std::vector> {{m_start_idx, expr(iim_block_)}, - {n_start_idx, expr(iin_block_)}} + {n_start_idx, expr(iin_block_)}} : std::vector> { - {m_start_idx / iim_block_, 1}, - {n_start_idx / iin_block_, 1}, {0, expr(iim_block_)}, - {0, expr(iin_block_)}}); + {m_start_idx / iim_block_, 1}, + {n_start_idx / iin_block_, 1}, {0, expr(iim_block_)}, + {0, expr(iin_block_)}}); } } } @@ -1066,14 +1067,14 @@ void gen_managed_matmul_core_t::single_thread_matmul_call( m_o, m_o_end) * iim_block_, expr(iim_block_)}, - {0, utils::rnd_up(ori_N, iin_block_)}} + {0, utils::rnd_up(ori_N, iin_block_)}} : std::vector> { - {(m_idx + m_b_idx * iim_block_ - + (doroll(m_o, m_o_end)) * iim_block_) - / iim_block_, - 1}, - {0, utils::divide_and_ceil(ori_N, iin_block_)}, - {0, expr(iim_block_)}, {0, expr(iin_block_)}}); + {(m_idx + m_b_idx * iim_block_ + + (doroll(m_o, m_o_end)) * iim_block_) + / iim_block_, + 1}, + {0, utils::divide_and_ceil(ori_N, iin_block_)}, + {0, expr(iim_block_)}, {0, expr(iin_block_)}}); } } } @@ -1092,14 +1093,14 @@ void gen_managed_matmul_core_t::single_thread_matmul_call( + m_b_idx * iim_block_, M_anchor_info[1] / config.M_sub_block}, - {n_idx + n_b_idx * iin_block_, - N_anchor_info[1] / config.N_sub_block}} + {n_idx + n_b_idx * iin_block_, + N_anchor_info[1] / config.N_sub_block}} : std::vector> { - {(m_idx + m_b_idx * iim_block_) / expr(iim_block_), - M_anchor_info[1] / iim_block_ / config.M_sub_block}, - {(n_idx + n_b_idx * iin_block_) / expr(iin_block_), - N_anchor_info[1] / iin_block_ / config.N_sub_block}, - {0, expr(iim_block_)}, {0, expr(iin_block_)}}); + {(m_idx + m_b_idx * iim_block_) / expr(iim_block_), + M_anchor_info[1] / iim_block_ / config.M_sub_block}, + {(n_idx + n_b_idx * iin_block_) / expr(iin_block_), + N_anchor_info[1] / iin_block_ / config.N_sub_block}, + {0, expr(iim_block_)}, {0, expr(iin_block_)}}); } else { slice_range_list mm_multi_slice; // order:X_anchor_info[1] -> X_anchor_info[2] @@ -1407,26 +1408,27 @@ func_t gen_managed_matmul_core_t::get_single_core_func(context_ptr ctx, std::vector aidx = input_plain ? std::vector {m_start_idx, k_start_idx} : std::vector { - m_start_idx / iim_block_, k_start_idx / iik_block_, 0, 0}; + m_start_idx / iim_block_, k_start_idx / iik_block_, 0, 0}; std::vector bidx = dtype_block > 1 ? std::vector {n_start_idx / iin_block_, - k_start_idx / iik_block_, 0, 0, 0} + k_start_idx / iik_block_, 0, 0, 0} : (!tb.get_format().is_blocking() - ? std::vector {k_start_idx, n_start_idx} - : std::vector {n_start_idx / iin_block_, - k_start_idx / iik_block_, 0, 0}); + ? std::vector {k_start_idx, n_start_idx} + : std::vector {n_start_idx / iin_block_, + k_start_idx / iik_block_, 0, 0}); std::vector partial_cidx, full_cidx; partial_cidx = !tc.get_format().is_blocking() ? std::vector {m_b_idx * iim_block_ - + (doroll(m_o, m_o_end)) * iim_block_, - n_b_idx * iin_block_ + (doroll(n_o, n_o_end)) * iin_block_} + + (doroll(m_o, m_o_end)) * iim_block_, + n_b_idx * iin_block_ + + (doroll(n_o, n_o_end)) * iin_block_} : std::vector {m_b_idx + doroll(m_o, m_o_end), - n_b_idx + doroll(n_o, n_o_end), 0, 0}; + n_b_idx + doroll(n_o, n_o_end), 0, 0}; partial_cidx.insert(partial_cidx.begin(), k_s); full_cidx = !tc.get_format().is_blocking() ? std::vector {m_start_idx, n_start_idx} : std::vector { - m_start_idx / iim_block_, n_start_idx / iin_block_, 0, 0}; + m_start_idx / iim_block_, n_start_idx / iin_block_, 0, 0}; auto partial_C_ptr = tensor_ptr(C_tptr, partial_cidx); auto full_C_ptr = tensor_ptr(C_tptr, full_cidx); expr LDA = input_plain ? ori_K_expr : expr(iik_block_); @@ -1434,7 +1436,7 @@ func_t gen_managed_matmul_core_t::get_single_core_func(context_ptr ctx, : expr(iin_block_); expr partial_LDC = !tc.get_format().is_blocking() ? do_cast_and_fold( - divide_and_ceil(N / iin_block_, N_split_num) * iin_block_) + divide_and_ceil(N / iin_block_, N_split_num) * iin_block_) : iin_block_; expr full_LDC = !tc.get_format().is_blocking() ? ori_N_expr : iin_block_; @@ -1517,11 +1519,11 @@ func_t gen_managed_matmul_core_t::get_single_core_func(context_ptr ctx, std::vector tail_aidx = {m_start_idx, k_tail_idx}; std::vector tail_bidx = dtype_block > 1 ? std::vector {n_start_idx / iin_block_, - k_tail_idx / iik_block_, 0, 0, 0} + k_tail_idx / iik_block_, 0, 0, 0} : (!tb.get_format().is_blocking() - ? std::vector {k_tail_idx, n_start_idx} - : std::vector {n_start_idx / iin_block_, - k_tail_idx / iik_block_, 0, 0}); + ? std::vector {k_tail_idx, n_start_idx} + : std::vector {n_start_idx / iin_block_, + k_tail_idx / iik_block_, 0, 0}); _if_(K_tail_cond) { _if_(k_b == 0 && bs == 0) { call_init_update_brgemm(1, K_tail, tail_aidx, tail_bidx); @@ -1538,11 +1540,11 @@ func_t gen_managed_matmul_core_t::get_single_core_func(context_ptr ctx, !tc.get_format().is_blocking() ? std::vector> {{m_start_idx, expr(m_block)}, - {n_start_idx, expr(n_block)}} + {n_start_idx, expr(n_block)}} : std::vector> { - {m_start_idx / iim_block_, 1}, - {n_start_idx / iin_block_, 1}, {0, expr(iim_block_)}, - {0, expr(iin_block_)}}); + {m_start_idx / iim_block_, 1}, + {n_start_idx / iin_block_, 1}, + {0, expr(iim_block_)}, {0, expr(iin_block_)}}); } } } @@ -1674,15 +1676,15 @@ bool gen_managed_matmul_core_t::generate(context_ptr ctx, expr M_real_split = is_dynamic ? M_split_num : do_cast_and_fold( - builder::make_min(divide_and_ceil(M_expr, iim_block_), M_split_num)); + builder::make_min(divide_and_ceil(M_expr, iim_block_), M_split_num)); expr N_real_split = is_dynamic ? N_split_num : do_cast_and_fold( - builder::make_min(divide_and_ceil(N_expr, iin_block_), N_split_num)); + builder::make_min(divide_and_ceil(N_expr, iin_block_), N_split_num)); expr K_real_split = is_dynamic ? K_split_num : do_cast_and_fold( - builder::make_min(divide_and_ceil(K_expr, iik_block_), K_split_num)); + builder::make_min(divide_and_ceil(K_expr, iik_block_), K_split_num)); if (K_split_num == 1) { expr m_idx, n_idx, M_single_thr_size, N_single_thr_size, X_bigger_num; @@ -1958,7 +1960,7 @@ bool gen_managed_matmul_core_t::generate(context_ptr ctx, std::vector out_tmp_buf_shape_expr = out_tensors_[0].get_format().is_blocking() ? std::vector {K_real_split, M_block_size_expr / iim_block_, - N_block_size_expr / iin_block_, iim_block_, iin_block_} + N_block_size_expr / iin_block_, iim_block_, iin_block_} : std::vector {K_real_split, M_block_size_expr, N_block_size_expr}; if (is_dynamic) { out_tmp_buf_shape_expr = std::vector {K_real_split, diff --git a/legacy/core/src/ops/templates/matmul_core.cpp b/legacy/core/src/ops/templates/matmul_core.cpp index 1fc34104..4df7107f 100644 --- a/legacy/core/src/ops/templates/matmul_core.cpp +++ b/legacy/core/src/ops/templates/matmul_core.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2022-2023 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "matmul_core.hpp" #include @@ -389,22 +390,22 @@ void gen_matmul_core_t::get_and_check_blocks(sc_graph_t &graph, // divide and ceil(x, 1) to convert x to index datatype. M_num_blocks = blocking_axis_.A_m.size() == 1 ? divide_and_ceil( - A_dims[blocking_axis_.A_m.at(0)], graph.dim_to_expr(M_block)) + A_dims[blocking_axis_.A_m.at(0)], graph.dim_to_expr(M_block)) : A_dims[blocking_axis_.A_m.at(0)]; K_num_blocks = blocking_axis_.A_k.size() == 1 ? divide_and_ceil( - A_dims[blocking_axis_.A_k.at(0)], graph.dim_to_expr(K_block)) + A_dims[blocking_axis_.A_k.at(0)], graph.dim_to_expr(K_block)) : A_dims[blocking_axis_.A_k.at(0)]; B_K_num_blocks = blocking_axis_.B_k.size() == 1 ? divide_and_ceil( - B_dims[blocking_axis_.B_k.at(0)], graph.dim_to_expr(K_block)) + B_dims[blocking_axis_.B_k.at(0)], graph.dim_to_expr(K_block)) : B_dims[blocking_axis_.B_k.at(0)]; N_num_blocks = blocking_axis_.B_n.size() == 1 ? divide_and_ceil( - B_dims[blocking_axis_.B_n.at(0)], graph.dim_to_expr(N_block)) + B_dims[blocking_axis_.B_n.at(0)], graph.dim_to_expr(N_block)) : B_dims[blocking_axis_.B_n.at(0)]; COMPILE_ASSERT( @@ -734,9 +735,9 @@ bool gen_matmul_core_t::generate(context_ptr ctx, std::vector> fidx3 = blocking_axis_.C_m.size() == 1 && blocking_axis_.C_n.size() == 1 ? concat_vec(batch_tensor_slice_ranges, - {{0, M_num_blocks * M_block}, {0, N_num_blocks * N_block}}) + {{0, M_num_blocks * M_block}, {0, N_num_blocks * N_block}}) : concat_vec(batch_tensor_slice_ranges, - {{0, M_num_blocks}, {0, N_num_blocks}, {0, M_block}, {0, N_block}}); + {{0, M_num_blocks}, {0, N_num_blocks}, {0, M_block}, {0, N_block}}); _named_for_(lm_c, m_o, 0, M_num_blocks) { _named_for_(ln_c, n_o, 0, N_num_blocks) { @@ -757,15 +758,15 @@ bool gen_matmul_core_t::generate(context_ptr ctx, fidx1 = blocking_axis_.C_m.size() == 1 && blocking_axis_.C_n.size() == 1 ? concat_vec(batch_tensor_slice_ranges, - {{m_o * M_block, M_block}, {n_o * N_block, N_block}}) + {{m_o * M_block, M_block}, {n_o * N_block, N_block}}) : concat_vec(batch_tensor_slice_ranges, - {{m_o, 1}, {n_o, 1}, {0, M_block}, {0, N_block}}); + {{m_o, 1}, {n_o, 1}, {0, M_block}, {0, N_block}}); fidx2 = blocking_axis_.C_m.size() == 1 && blocking_axis_.C_n.size() == 1 ? concat_vec(batch_tensor_slice_ranges, - {{m_o * M_block, M_block}, {0, N_num_blocks * N_block}}) + {{m_o * M_block, M_block}, {0, N_num_blocks * N_block}}) : concat_vec(batch_tensor_slice_ranges, - {{m_o, 1}, {0, N_num_blocks}, {0, M_block}, {0, N_block}}); + {{m_o, 1}, {0, N_num_blocks}, {0, M_block}, {0, N_block}}); if (dtype_block > 1) bidx.emplace_back(0); expr LDA = K_block, LDB = N_block, LDC = N_block, @@ -820,8 +821,8 @@ bool gen_matmul_core_t::generate(context_ptr ctx, tensor_ptr(B, dtype_block > 1 ? std::vector {n_o, 0, 0, 0, 0} : (!in_tensors_[1].get_format().is_blocking() - ? std::vector {0, n_o * N_block} - : std::vector {n_o, 0, 0, 0})), + ? std::vector {0, n_o * N_block} + : std::vector {n_o, 0, 0, 0})), tensor_ptr(C, !out_tensors_[0].get_format().is_blocking() ? std::vector {m_o * M_block, n_o * N_block} @@ -842,9 +843,9 @@ bool gen_matmul_core_t::generate(context_ptr ctx, create_fusion_anchor(fusion, owner_->get_outputs()[0], !out_tensors_[0].get_format().is_blocking() ? slice_range {{m_o * M_block, M_block}, - {0, N_num_blocks * N_block}} + {0, N_num_blocks * N_block}} : slice_range { - {m_o, 1}, {0, N_num_blocks}, {0, M_block}, {0, N_block}}); + {m_o, 1}, {0, N_num_blocks}, {0, M_block}, {0, N_block}}); } } } diff --git a/legacy/core/src/ops/templates/nested_conv1x1_backprop_data.cpp b/legacy/core/src/ops/templates/nested_conv1x1_backprop_data.cpp index 50f40cb1..74ab7035 100644 --- a/legacy/core/src/ops/templates/nested_conv1x1_backprop_data.cpp +++ b/legacy/core/src/ops/templates/nested_conv1x1_backprop_data.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2022-2023 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "nested_conv1x1_backprop_data.hpp" #include @@ -355,11 +356,11 @@ void gen_nested_conv1x1_backprop_data_t:: m_start_idx / ori_W, m_start_idx % ori_W, k_start_idx}; std::vector bidx = dtype_block > 1 ? std::vector {n_start_idx / im_ic_block_, - k_start_idx / im_oc_block_ / 2, 0, 0, 0, 0, 0} + k_start_idx / im_oc_block_ / 2, 0, 0, 0, 0, 0} : !tb.get_format().is_blocking() ? std::vector {k_start_idx, n_start_idx, 0, 0} : std::vector {n_start_idx / im_ic_block_, - k_start_idx / im_oc_block_, 0, 0, 0, 0}; + k_start_idx / im_oc_block_, 0, 0, 0, 0}; std::vector cidx = {bs_start_idx, m_start_idx / ori_W * stride_h, m_start_idx % ori_W * stride_w * ori_W, n_start_idx}; diff --git a/legacy/core/src/ops/templates/nested_conv1x1_backprop_weight.cpp b/legacy/core/src/ops/templates/nested_conv1x1_backprop_weight.cpp index 1521c609..f564a78c 100644 --- a/legacy/core/src/ops/templates/nested_conv1x1_backprop_weight.cpp +++ b/legacy/core/src/ops/templates/nested_conv1x1_backprop_weight.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2022-2023 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "nested_conv1x1_backprop_weight.hpp" #include @@ -287,26 +288,26 @@ void gen_nested_conv1x1_backprop_weight_t::inner_loop_call(context_ptr &ctx, // full shape based on delta_output's reorder result std::vector temp_output_delta_shape_full = dtype_block > 1 ? std::vector {BS / im_bs_block_, OC / im_oc_block_, OH, OW, - im_bs_block_ / dtype_block, im_oc_block_, dtype_block} - : std::vector { - BS / im_bs_block_, OC / im_oc_block_, OH, OW, im_bs_block_, im_oc_block_}; + im_bs_block_ / dtype_block, im_oc_block_, dtype_block} + : std::vector {BS / im_bs_block_, OC / im_oc_block_, OH, OW, + im_bs_block_, im_oc_block_}; _tensor_(temp_output_delta, dtype, temp_output_delta_shape_full); _for_(i_ic, 0, ic_block / im_ic_block_) { // shrinked_shape std::vector temp_output_delta_shape_shr = dtype_block > 1 ? std::vector {bs_block / im_bs_block_, oc_block / im_oc_block_, - oh_block, ow_block, im_bs_block_ / dtype_block, im_oc_block_, - dtype_block} + oh_block, ow_block, im_bs_block_ / dtype_block, im_oc_block_, + dtype_block} : std::vector {bs_block / im_bs_block_, oc_block / im_oc_block_, - oh_block, ow_block, im_bs_block_, im_oc_block_}; + oh_block, ow_block, im_bs_block_, im_oc_block_}; // f32 --> vectorized; bf16 --> vnni_reorder std::vector shrink_offset = dtype_block > 1 ? std::vector {obs_offset / im_bs_block_, oc_offset / im_oc_block_, - oh_offset, ow_offset, obs_offset % im_bs_block_ / dtype_block, - oc_offset % im_oc_block_, obs_offset % im_bs_block_ % dtype_block} + oh_offset, ow_offset, obs_offset % im_bs_block_ / dtype_block, + oc_offset % im_oc_block_, obs_offset % im_bs_block_ % dtype_block} : std::vector {obs_offset / im_bs_block_, oc_offset / im_oc_block_, - oh_offset, ow_offset, obs_offset % im_bs_block_, - oc_offset % im_oc_block_}; + oh_offset, ow_offset, obs_offset % im_bs_block_, + oc_offset % im_oc_block_}; _if_(i_ic == 0) { // reorder temp_output_delta @@ -319,12 +320,12 @@ void gen_nested_conv1x1_backprop_weight_t::inner_loop_call(context_ptr &ctx, shrink_offset, temp_output_delta_shape_shr, stmts()}; slice_range tmp_output_slice_range = dtype_block > 1 ? slice_range {{obs_offset, bs_block / im_bs_block_}, - {oc_offset, oc_block / im_oc_block_}, {oh_offset, oh_block}, - {ow_offset, ow_block}, {0, im_bs_block_ / dtype_block}, - {0, im_oc_block_}, {0, dtype_block}} + {oc_offset, oc_block / im_oc_block_}, {oh_offset, oh_block}, + {ow_offset, ow_block}, {0, im_bs_block_ / dtype_block}, + {0, im_oc_block_}, {0, dtype_block}} : slice_range {{obs_offset, bs_block / im_bs_block_}, - {oc_offset, oc_block / im_oc_block_}, {oh_offset, oh_block}, - {ow_offset, ow_block}, {0, im_bs_block_}, {0, im_oc_block_}}; + {oc_offset, oc_block / im_oc_block_}, {oh_offset, oh_block}, + {ow_offset, ow_block}, {0, im_bs_block_}, {0, im_oc_block_}}; ops::commit_op(ctx, "reorder", {tensor_slice(delta_output, {{obs_offset, bs_block}, {oh_offset, oh_block}, {ow_offset, ow_block}, @@ -342,20 +343,20 @@ void gen_nested_conv1x1_backprop_weight_t::inner_loop_call(context_ptr &ctx, _for_(i_oc, 0, oc_block / im_oc_block_) { auto real_weight_idx = is_partial ? std::vector {temp_weight_idx[0], temp_weight_idx[1] + i_ic, - temp_weight_idx[2] + i_oc, 0, 0, 0, 0} + temp_weight_idx[2] + i_oc, 0, 0, 0, 0} : std::vector { - temp_weight_idx[0] + i_ic, temp_weight_idx[1] + i_oc, 0, 0, 0, 0}; + temp_weight_idx[0] + i_ic, temp_weight_idx[1] + i_oc, 0, 0, 0, 0}; _for_(i_bs, 0, bs_block / im_bs_block_) { _for_(i_od, 0, od_block) { _for_(i_oh, 0, oh_block) { auto temp_output_delta_brgemm_index = dtype_block > 1 ? std::vector {shrink_offset[0] + i_bs, - shrink_offset[1] + i_oc, shrink_offset[2] + i_oh, - shrink_offset[3], shrink_offset[4], shrink_offset[5], - shrink_offset[6]} + shrink_offset[1] + i_oc, shrink_offset[2] + i_oh, + shrink_offset[3], shrink_offset[4], shrink_offset[5], + shrink_offset[6]} : std::vector {shrink_offset[0] + i_bs, - shrink_offset[1] + i_oc, shrink_offset[2] + i_oh, - shrink_offset[3], shrink_offset[4], shrink_offset[5]}; + shrink_offset[1] + i_oc, shrink_offset[2] + i_oh, + shrink_offset[3], shrink_offset[4], shrink_offset[5]}; _if_(o_bs == 0 && o_od == 0 && o_oh == 0 && o_ow == 0 && i_bs == 0 && i_od == 0 && i_oh == 0) { // ic x bs matmul bs x oc @@ -512,17 +513,18 @@ bool gen_nested_conv1x1_backprop_weight_t::generate(context_ptr ctx, = has_stride ? std::vector {0, 0, 0, 0, 0, 0} : std::vector {obs_offset / im_bs_block_, - ic_offset / im_ic_block_, oh_offset, ow_offset, - obs_offset % im_bs_block_, - ic_offset % im_ic_block_}; + ic_offset / im_ic_block_, oh_offset, ow_offset, + obs_offset % im_bs_block_, + ic_offset % im_ic_block_}; std::vector temp_weight_idx = is_partial ? std::vector {p_bs * oh_threads * od_threads - + p_od * oh_threads + p_oh, - ic_offset / im_ic_block_, oc_offset / im_oc_block_, - 0, 0, im_ic_block_, im_oc_block_} + + p_od * oh_threads + p_oh, + ic_offset / im_ic_block_, + oc_offset / im_oc_block_, 0, 0, im_ic_block_, + im_oc_block_} : std::vector {ic_offset / im_ic_block_, - oc_offset / im_oc_block_, 0, 0, im_ic_block_, - im_oc_block_}; + oc_offset / im_oc_block_, 0, 0, im_ic_block_, + im_oc_block_}; inner_loop_call(ctx, temp_forward_input, temp_forward_idx_non_block, in_tensors_[1], delta_output, real_delta_weight_buf, temp_weight_idx, diff --git a/legacy/core/src/ops/templates/nested_convNxN_backprop_data.cpp b/legacy/core/src/ops/templates/nested_convNxN_backprop_data.cpp index d55c1d90..d4159591 100644 --- a/legacy/core/src/ops/templates/nested_convNxN_backprop_data.cpp +++ b/legacy/core/src/ops/templates/nested_convNxN_backprop_data.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2022-2023 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "nested_convNxN_backprop_data.hpp" #include @@ -267,9 +268,9 @@ void gen_nested_convNxN_backprop_data_t::inner_loop_call(const context_ptr &ctx, i_bs, oh_idx - oh_offset, ow_start, 0}; auto weight_index = dtype_block > 1 ? std::vector {ic_offset / im_ic_block_ + i_ic, 0, r, - s, 0, 0, 0} + s, 0, 0, 0} : std::vector { - ic_offset / im_ic_block_ + i_ic, 0, r, s, 0, 0}; + ic_offset / im_ic_block_ + i_ic, 0, r, s, 0, 0}; A_list[len] = tensor_ptr(temp_delta_output, tmp_delta_output_index); B_list[len] = tensor_ptr(weight, weight_index); diff --git a/legacy/core/src/ops/templates/nested_convNxN_backprop_weight.cpp b/legacy/core/src/ops/templates/nested_convNxN_backprop_weight.cpp index d9119623..edc1ce24 100644 --- a/legacy/core/src/ops/templates/nested_convNxN_backprop_weight.cpp +++ b/legacy/core/src/ops/templates/nested_convNxN_backprop_weight.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2022-2023 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "nested_convNxN_backprop_weight.hpp" #include @@ -236,25 +237,25 @@ void gen_nested_convNXN_bwd_weight_t::inner_loop_call(const context_ptr &ctx, // full shape based on delta_output's reorder result std::vector temp_output_delta_shape_full = dtype_block > 1 ? std::vector {BS / im_bs_block_, OC / im_oc_block_, OH, OW, - im_bs_block_ / 2, im_oc_block_, 2} - : std::vector { - BS / im_bs_block_, OC / im_oc_block_, OH, OW, im_bs_block_, im_oc_block_}; + im_bs_block_ / 2, im_oc_block_, 2} + : std::vector {BS / im_bs_block_, OC / im_oc_block_, OH, OW, + im_bs_block_, im_oc_block_}; _tensor_(temp_output_delta, dtype, temp_output_delta_shape_full); _for_(i_ic, 0, ic_block / im_ic_block_) { // shrinked_shape std::vector temp_output_delta_shape_shr = dtype_block > 1 ? std::vector {bs_block / im_bs_block_, oc_block / im_oc_block_, - oh_block, ow_block, im_bs_block_ / 2, im_oc_block_, 2} + oh_block, ow_block, im_bs_block_ / 2, im_oc_block_, 2} : std::vector {bs_block / im_bs_block_, oc_block / im_oc_block_, - oh_block, ow_block, im_bs_block_, im_oc_block_}; + oh_block, ow_block, im_bs_block_, im_oc_block_}; // f32 --> vectorized; bf16 --> vnni_reorder std::vector shrink_offset = dtype_block > 1 ? std::vector {obs_offset / im_bs_block_, oc_offset / im_oc_block_, - oh_offset, ow_offset, obs_offset % im_bs_block_ / 2, - oc_offset % im_oc_block_, obs_offset % im_bs_block_ % 2} + oh_offset, ow_offset, obs_offset % im_bs_block_ / 2, + oc_offset % im_oc_block_, obs_offset % im_bs_block_ % 2} : std::vector {obs_offset / im_bs_block_, oc_offset / im_oc_block_, - oh_offset, ow_offset, obs_offset % im_bs_block_, - oc_offset % im_oc_block_}; + oh_offset, ow_offset, obs_offset % im_bs_block_, + oc_offset % im_oc_block_}; // reorder temp_output_delta _if_(i_ic == 0) { trace_guard_t trg(ctx, "output_delta_reorder"); @@ -267,12 +268,12 @@ void gen_nested_convNXN_bwd_weight_t::inner_loop_call(const context_ptr &ctx, shrink_offset, temp_output_delta_shape_shr, stmts()}; slice_range tmp_output_slice_range = dtype_block > 1 ? slice_range {{obs_offset, bs_block / im_bs_block_}, - {oc_offset, oc_block / im_oc_block_}, {oh_offset, oh_block}, - {ow_offset, ow_block}, {0, im_bs_block_ / 2}, {0, im_oc_block_}, - {0, 2}} + {oc_offset, oc_block / im_oc_block_}, {oh_offset, oh_block}, + {ow_offset, ow_block}, {0, im_bs_block_ / 2}, {0, im_oc_block_}, + {0, 2}} : slice_range {{obs_offset, bs_block / im_bs_block_}, - {oc_offset, oc_block / im_oc_block_}, {oh_offset, oh_block}, - {ow_offset, ow_block}, {0, im_bs_block_}, {0, im_oc_block_}}; + {oc_offset, oc_block / im_oc_block_}, {oh_offset, oh_block}, + {ow_offset, ow_block}, {0, im_bs_block_}, {0, im_oc_block_}}; // TODO(yifei): figure out why expand loop based on output doesn't work ops::commit_op(ctx, "reorder", {tensor_slice(delta_output, @@ -285,9 +286,9 @@ void gen_nested_convNXN_bwd_weight_t::inner_loop_call(const context_ptr &ctx, {{"out_format", dtype_block > 1 ? sc_data_format_t(sc_data_format_kind_t(0, 1, 2, 3, 0, 1, 0), - {im_bs_block_, im_oc_block_, 2}) + {im_bs_block_, im_oc_block_, 2}) : sc_data_format_t(sc_data_format_kind_t(0, 1, 2, 3, 0, 1), - {im_bs_block_, im_oc_block_})}}); + {im_bs_block_, im_oc_block_})}}); } _for_(i_oc, 0, oc_block / im_oc_block_) { _for_(i_bs, 0, bs_block / im_bs_block_) { @@ -298,21 +299,21 @@ void gen_nested_convNXN_bwd_weight_t::inner_loop_call(const context_ptr &ctx, trace_guard_t trg(ctx, "brgemm"); auto temp_output_delta_brgemm_index = dtype_block > 1 ? std::vector {shrink_offset[0] + i_bs, - shrink_offset[1] + i_oc, shrink_offset[2] + i_oh, - shrink_offset[3], shrink_offset[4], shrink_offset[5], - shrink_offset[6]} + shrink_offset[1] + i_oc, shrink_offset[2] + i_oh, + shrink_offset[3], shrink_offset[4], shrink_offset[5], + shrink_offset[6]} : std::vector {shrink_offset[0] + i_bs, - shrink_offset[1] + i_oc, shrink_offset[2] + i_oh, - shrink_offset[3], shrink_offset[4], shrink_offset[5]}; + shrink_offset[1] + i_oc, shrink_offset[2] + i_oh, + shrink_offset[3], shrink_offset[4], shrink_offset[5]}; COMPILE_ASSERT( temp_weight_idx.size() == 2 || temp_weight_idx.size() == 3, "temp_weight_idx shall have length 2 or 3"); auto real_delta_weight_buf_index = temp_weight_idx.size() == 4 ? std::vector {temp_weight_idx[0] + i_ic, - temp_weight_idx[1] + i_oc, lr, ls, 0, 0} + temp_weight_idx[1] + i_oc, lr, ls, 0, 0} : std::vector {temp_weight_idx[0], - temp_weight_idx[1] + i_ic, temp_weight_idx[2] + i_oc, lr, - ls, 0, 0}; + temp_weight_idx[1] + i_ic, temp_weight_idx[2] + i_oc, lr, + ls, 0, 0}; _if_(o_bs == 0 && o_od == 0 && o_oh == 0 && o_ow == 0 && i_bs == 0 && i_od == 0 && i_oh == 0) { // ic x bs matmul bs x oc @@ -486,12 +487,12 @@ bool gen_nested_convNXN_bwd_weight_t::generate(context_ptr ctx, // so extra division needed auto temp_weight_idx = use_temp_weight ? std::vector {p_bs * oh_threads - * od_threads - + p_od * oh_threads + p_oh, - ic_offset / im_ic_block_, - oc_offset / im_oc_block_} + * od_threads + + p_od * oh_threads + p_oh, + ic_offset / im_ic_block_, + oc_offset / im_oc_block_} : std::vector {ic_offset / im_ic_block_, - oc_offset / im_oc_block_}; + oc_offset / im_oc_block_}; inner_loop_call(ctx, temp_forward_input, temp_forward_idx_non_block, in_tensors_[1], delta_output, real_delta_weight_buf, diff --git a/legacy/core/src/ops/templates/nested_conv_fwd.cpp b/legacy/core/src/ops/templates/nested_conv_fwd.cpp index 9d4fd060..89af2ac5 100644 --- a/legacy/core/src/ops/templates/nested_conv_fwd.cpp +++ b/legacy/core/src/ops/templates/nested_conv_fwd.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2022-2024 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include #include @@ -184,7 +185,7 @@ config_ptr gen_nested_conv_fwd_t::get_default_config(context_ptr ctx) const { cfg.bs_threads = mb_ > num_threads || (mb_ == num_threads && oc_ <= 128) ? num_threads : *(std::find_if(thread_split.rbegin(), thread_split.rend(), - [&](int split) { return split == 1 || split < mb_; })); + [&](int split) { return split == 1 || split < mb_; })); cfg.oc_threads = num_threads / cfg.bs_threads; cfg.h_threads = 1; cfg.w_threads = 1; @@ -306,12 +307,12 @@ config_ptr gen_nested_conv_fwd_t::get_default_config(context_ptr ctx) const { ? oh_ : (utils::divide_and_ceil( utils::divide_and_ceil(oh_, cfg.im_h_block), cfg.h_threads) - * cfg.im_h_block); + * cfg.im_h_block); cfg.w_block = cfg.w_threads == 1 ? ow_ : (utils::divide_and_ceil( utils::divide_and_ceil(ow_, cfg.im_w_block), cfg.w_threads) - * cfg.im_w_block); + * cfg.im_w_block); } } else { if (!is_1x1_conv_ && has_pad) { @@ -335,7 +336,7 @@ config_ptr gen_nested_conv_fwd_t::get_default_config(context_ptr ctx) const { ? oh_ : (utils::divide_and_ceil( utils::divide_and_ceil(oh_, cfg.im_h_block), cfg.h_threads) - * cfg.im_h_block); + * cfg.im_h_block); } } } @@ -403,7 +404,7 @@ config_ptr gen_nested_conv_fwd_t::get_default_config(context_ptr ctx) const { ? oh_ : (utils::divide_and_ceil( utils::divide_and_ceil(oh_, cfg.im_h_block), cfg.h_threads) - * cfg.im_h_block); + * cfg.im_h_block); } if (!is_1x1_conv_ && oc_ > 128 && cfg.im_oc_block % 32 != 0) { @@ -1235,15 +1236,15 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const { = blocking_input_ ? std::vector {n, ic, h, w, 0} : std::vector { - n, h, w, ic * im_ic_block}; + n, h, w, ic * im_ic_block}; A_list[i_c] = tensor_ptr(input1, input_pos); B_list[i_c] = tensor_ptr(weight, kpack > 1 ? std::vector {oc, ic, - 0, 0, 0, 0, 0} + 0, 0, 0, 0, 0} : std::vector { - oc, ic, 0, 0, 0, 0}); + oc, ic, 0, 0, 0, 0}); } } const auto hint_A_size @@ -1273,9 +1274,9 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const { std::vector output_pos = blocking_output_ ? std::vector {pic * mb_ + n, oc, h, - w, 0} + w, 0} : std::vector { - pic * mb_ + n, h, w, oc * im_oc_block}; + pic * mb_ + n, h, w, oc * im_oc_block}; if (ic_num_block_pt > 1) { _if_(o_ic == 0) { @@ -1327,11 +1328,11 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const { owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {oc, 1}, - {h, im_h_block}, {w, im_w_block}, - {0, im_oc_block}} + {h, im_h_block}, {w, im_w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, - {h, im_h_block}, {w, im_w_block}, - {oc * im_oc_block, im_oc_block}}); + {h, im_h_block}, {w, im_w_block}, + {oc * im_oc_block, im_oc_block}}); } } } @@ -1346,11 +1347,11 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const { owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {anch_c, 1}, - {h, im_h_block}, {w, im_w_block}, - {0, im_oc_block}} + {h, im_h_block}, {w, im_w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, {h, im_h_block}, - {w, im_w_block}, - {anch_c * im_oc_block, oc_block}}); + {w, im_w_block}, + {anch_c * im_oc_block, oc_block}}); } } } @@ -1371,11 +1372,11 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const { owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {anch_c, 1}, - {h, im_h_block}, {anch_w, w_block}, - {0, im_oc_block}} + {h, im_h_block}, {anch_w, w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, {h, im_h_block}, - {anch_w, w_block}, - {anch_c * im_oc_block, oc_block}}); + {anch_w, w_block}, + {anch_c * im_oc_block, oc_block}}); } } } @@ -1401,11 +1402,11 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const { create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {anch_c, 1}, - {anch_h, h_block}, {anch_w, w_block}, - {0, im_oc_block}} + {anch_h, h_block}, {anch_w, w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, {anch_h, h_block}, - {anch_w, w_block}, - {anch_c * im_oc_block, oc_block}}); + {anch_w, w_block}, + {anch_c * im_oc_block, oc_block}}); } } } @@ -1421,16 +1422,16 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const { create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block}, - {0, oh_expr_}, {0, ow_}, {0, im_oc_block}} + {0, oh_expr_}, {0, ow_}, {0, im_oc_block}} : slice_range { - {pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}}); + {pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}}); } } if (oc_threads == 1 && h_threads == 1 && w_threads == 1) { create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block}, - {0, oh_expr_}, {0, ow_}, {0, im_oc_block}} + {0, oh_expr_}, {0, ow_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}}); } } @@ -1438,7 +1439,7 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const { create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block}, {0, oh_expr_}, - {0, ow_}, {0, im_oc_block}} + {0, ow_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}}); } } @@ -1447,7 +1448,7 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const { create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block}, {0, oh_expr_}, - {0, ow_}, {0, im_oc_block}} + {0, ow_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}}); } } @@ -1455,7 +1456,7 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const { create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block}, {0, oh_expr_}, - {0, ow_}, {0, im_oc_block}} + {0, ow_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}}); } } @@ -1616,9 +1617,9 @@ void gen_nested_conv_fwd_t::dynamic_compute_1x1_pack_input_nested( = tensor_ptr(input1, input_pos); B_list[i_c] = tensor_ptr(weight, kpack > 1 ? std::vector {oc, ic, - 0, 0, 0, 0, 0} + 0, 0, 0, 0, 0} : std::vector { - oc, ic, 0, 0, 0, 0}); + oc, ic, 0, 0, 0, 0}); } } auto LDA = ic_; @@ -1628,7 +1629,7 @@ void gen_nested_conv_fwd_t::dynamic_compute_1x1_pack_input_nested( = blocking_output_ ? std::vector {n, oc, h, w, 0} : std::vector { - n, h, w, oc * im_oc_block}; + n, h, w, oc * im_oc_block}; auto im_w_tail_block = builder::make_cast( datatypes::s32, ow_expr_ - w); im_w_block = builder::make_select( @@ -1861,18 +1862,18 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested( std::vector input_pos = blocking_input_ ? std::vector {n, ic, - (h + im_h_i) * sh_, w * sw_, 0} + (h + im_h_i) * sh_, w * sw_, 0} : std::vector {n, - (h + im_h_i) * sh_, w * sw_, - ic * im_ic_block}; + (h + im_h_i) * sh_, w * sw_, + ic * im_ic_block}; A_list[i_c] = tensor_ptr(input, input_pos); B_list[i_c] = tensor_ptr(weight, kpack > 1 ? std::vector {oc, ic, - 0, 0, 0, 0, 0} + 0, 0, 0, 0, 0} : std::vector { - oc, ic, 0, 0, 0, 0}); + oc, ic, 0, 0, 0, 0}); } } const auto hint_A_size @@ -1904,9 +1905,9 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested( std::vector output_pos = blocking_output_ ? std::vector {pic * mb_ + n, oc, - h + im_h_i, w, 0} + h + im_h_i, w, 0} : std::vector {pic * mb_ + n, - h + im_h_i, w, oc * im_oc_block}; + h + im_h_i, w, oc * im_oc_block}; if (ic_num_block_pt > 1) { _if_(o_ic == 0) { @@ -1956,11 +1957,14 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested( owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {oc, 1}, - {h + im_h_i, 1}, {w, im_w_block}, - {0, im_oc_block}} + {h + im_h_i, 1}, + {w, im_w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, - {h + im_h_i, 1}, {w, im_w_block}, - {oc * im_oc_block, im_oc_block}}); + {h + im_h_i, 1}, + {w, im_w_block}, + {oc * im_oc_block, + im_oc_block}}); } } } @@ -1972,11 +1976,11 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested( owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {oc, 1}, - {h, im_h_block}, {w, im_w_block}, - {0, im_oc_block}} + {h, im_h_block}, {w, im_w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, - {h, im_h_block}, {w, im_w_block}, - {oc * im_oc_block, im_oc_block}}); + {h, im_h_block}, {w, im_w_block}, + {oc * im_oc_block, im_oc_block}}); } } } @@ -1991,11 +1995,11 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested( owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {anch_c, 1}, - {h, im_h_block}, {w, im_w_block}, - {0, im_oc_block}} + {h, im_h_block}, {w, im_w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, {h, im_h_block}, - {w, im_w_block}, - {anch_c * im_oc_block, oc_block}}); + {w, im_w_block}, + {anch_c * im_oc_block, oc_block}}); } } } @@ -2016,11 +2020,11 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested( owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {anch_c, 1}, - {h, im_h_block}, {anch_w, w_block}, - {0, im_oc_block}} + {h, im_h_block}, {anch_w, w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, {h, im_h_block}, - {anch_w, w_block}, - {anch_c * im_oc_block, oc_block}}); + {anch_w, w_block}, + {anch_c * im_oc_block, oc_block}}); } } } @@ -2044,11 +2048,11 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested( create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {anch_c, 1}, - {anch_h, h_block}, {anch_w, w_block}, - {0, im_oc_block}} + {anch_h, h_block}, {anch_w, w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, {anch_h, h_block}, - {anch_w, w_block}, - {anch_c * im_oc_block, oc_block}}); + {anch_w, w_block}, + {anch_c * im_oc_block, oc_block}}); } } } @@ -2064,16 +2068,16 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested( create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block}, - {0, oh_expr_}, {0, ow_}, {0, im_oc_block}} + {0, oh_expr_}, {0, ow_}, {0, im_oc_block}} : slice_range { - {pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}}); + {pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}}); } } if (oc_threads == 1 && h_threads == 1 && w_threads == 1) { create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block}, - {0, oh_expr_}, {0, ow_}, {0, im_oc_block}} + {0, oh_expr_}, {0, ow_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}}); } } @@ -2082,7 +2086,7 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested( create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block}, {0, oh_expr_}, - {0, ow_}, {0, im_oc_block}} + {0, ow_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}}); } } @@ -2091,7 +2095,7 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested( create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block}, {0, oh_expr_}, - {0, ow_}, {0, im_oc_block}} + {0, ow_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}}); } } @@ -2099,7 +2103,7 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested( create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block}, {0, oh_expr_}, - {0, ow_}, {0, im_oc_block}} + {0, ow_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}}); } } @@ -2234,12 +2238,12 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_os_blocking_nested( auto out_tsr = tensor_ptr(output, blocking_output_ ? std::vector {n, oc, - (im_s_block_idx * im_s_block) / ow_, - im_s_block_idx * im_s_block % ow_, 0} + (im_s_block_idx * im_s_block) / ow_, + im_s_block_idx * im_s_block % ow_, 0} : std::vector {n, - (im_s_block_idx * im_s_block) / ow_, - (im_s_block_idx * im_s_block) % ow_, - oc * im_oc_block}); + (im_s_block_idx * im_s_block) / ow_, + (im_s_block_idx * im_s_block) % ow_, + oc * im_oc_block}); int adj_ow = ow_ + num_elems_skip_per_ow_; @@ -2248,15 +2252,15 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_os_blocking_nested( blocking_output_ ? std::vector {n, oc, 0, 0, 0} : std::vector { - n, 0, 0, oc * config.im_oc_block}); + n, 0, 0, oc * config.im_oc_block}); } else { auto acc_m = os_acc_size[{im_s_block_idx}]; out_tsr = tensor_ptr(output, blocking_output_ ? std::vector {n, oc, acc_m / ow_, - acc_m % ow_, 0} + acc_m % ow_, 0} : std::vector {n, acc_m / ow_, - acc_m % ow_, oc * im_oc_block}); + acc_m % ow_, oc * im_oc_block}); } _for_(i_c, 0, ic_block / im_ic_block) { @@ -2273,15 +2277,15 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_os_blocking_nested( = ((im_s_block_idx * im_s_block) % adj_ow); std::vector input_pos = blocking_input_ ? std::vector {n, ic, - h * sh_ + dh_ * r, w * sw_ + dw_ * s, 0} + h * sh_ + dh_ * r, w * sw_ + dw_ * s, 0} : std::vector {n, h * sh_ + dh_ * r, - w * sw_ + dw_ * s, ic * im_ic_block}; + w * sw_ + dw_ * s, ic * im_ic_block}; A_list[idx] = tensor_ptr(input, input_pos); B_list[idx] = tensor_ptr(weight, kpack > 1 ? std::vector {oc, ic, r, s, 0, 0, - 0} + 0} : std::vector {oc, ic, r, s, 0, 0}); } } @@ -2321,14 +2325,14 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_os_blocking_nested( owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {oc, 1}, - {im_s_block_idx * (oh_ / os_num_block), - (oh_ / os_num_block)}, - {0, ow_}, {0, im_oc_block}} + {im_s_block_idx * (oh_ / os_num_block), + (oh_ / os_num_block)}, + {0, ow_}, {0, im_oc_block}} : slice_range {{n, 1UL}, - {im_s_block_idx * (oh_ / os_num_block), - (oh_ / os_num_block)}, - {0, ow_}, - {oc * im_oc_block, im_oc_block}}); + {im_s_block_idx * (oh_ / os_num_block), + (oh_ / os_num_block)}, + {0, ow_}, + {oc * im_oc_block, im_oc_block}}); } } } @@ -2342,43 +2346,44 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_os_blocking_nested( if (oc_threads == 1 && ic_threads == 1 && s_threads == 1) { create_fusion_anchor(fusion, owner_->get_outputs()[0], - blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_}, {0, ow_}, {0, im_oc_block}} - : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + blocking_output_ + ? slice_range {{pbs, 1UL}, + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_}, {0, ow_}, {0, im_oc_block}} + : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_}, + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } if (oc_threads == 1 && s_threads == 1) { create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_}, {0, ow_}, {0, im_oc_block}} + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_}, {0, ow_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } if (s_threads == 1) { create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_}, {0, ow_}, {0, im_oc_block}} + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_}, {0, ow_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } if (mb_ > 1) { create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_}, {0, ow_}, {0, im_oc_block}} + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_}, {0, ow_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } } @@ -2482,7 +2487,7 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested( auto L2_cache_size = ctx->machine_.cpu_flags_.getDCacheSize(2); int oc_split = (oc_threads == 1 && oc_num_block_pt == 1) ? get_oc_split_factor( - -1, weight_size, L2_cache_size, oc_block / im_oc_block) + -1, weight_size, L2_cache_size, oc_block / im_oc_block) : 1; auto LDA = blocking_input_ ? sw_ * im_ic_block : sw_ * ic_; @@ -2574,14 +2579,14 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested( std::vector input_pos = blocking_input_ ? std::vector {n, ic, - (h + im_h_i) * sh_ - + dh_ * r, - w * sw_ + dw_ * s, 0} + (h + im_h_i) * sh_ + + dh_ * r, + w * sw_ + dw_ * s, 0} : std::vector {n, - (h + im_h_i) * sh_ - + dh_ * r, - w * sw_ + dw_ * s, - ic * im_ic_block}; + (h + im_h_i) * sh_ + + dh_ * r, + w * sw_ + dw_ * s, + ic * im_ic_block}; A_list[idx] = tensor_ptr( input, input_pos); @@ -2589,9 +2594,9 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested( = tensor_ptr(weight, kpack > 1 ? std::vector {oc, - ic, r, s, 0, 0, 0} - : std::vector { - oc, ic, r, s, 0, 0}); + ic, r, s, 0, 0, 0} + : std::vector {oc, + ic, r, s, 0, 0}); } } } @@ -2599,11 +2604,11 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested( std::vector output_pos = blocking_output_ ? std::vector {pic * mb_expr_ - + n, - oc, h + im_h_i, w, 0} + + n, + oc, h + im_h_i, w, 0} : std::vector { - pic * mb_expr_ + n, h + im_h_i, w, - oc * im_oc_block}; + pic * mb_expr_ + n, h + im_h_i, + w, oc * im_oc_block}; generate_brgemm(real_im_w_block, im_ic_block, im_oc_block, ic_block, @@ -2619,14 +2624,14 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested( owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, - {oc, 1}, {h + im_h_i, 1}, - {w, real_im_w_block}, - {0, im_oc_block}} + {oc, 1}, {h + im_h_i, 1}, + {w, real_im_w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, - {h + im_h_i, 1}, - {w, real_im_w_block}, - {oc * im_oc_block, - im_oc_block}}); + {h + im_h_i, 1}, + {w, real_im_w_block}, + {oc * im_oc_block, + im_oc_block}}); } } // im_h_i } @@ -2638,14 +2643,14 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested( owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {oc, 1}, - {h, real_im_h_block}, - {w, real_im_w_block}, - {0, im_oc_block}} + {h, real_im_h_block}, + {w, real_im_w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, - {h, real_im_h_block}, - {w, real_im_w_block}, - {oc * im_oc_block, - im_oc_block}}); + {h, real_im_h_block}, + {w, real_im_w_block}, + {oc * im_oc_block, + im_oc_block}}); } } } // i_oc @@ -2663,14 +2668,14 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested( owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {anch_c, 1}, - {h, real_im_h_block}, - {w, real_im_w_block}, - {0, im_oc_block}} + {h, real_im_h_block}, + {w, real_im_w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, - {h, real_im_h_block}, - {w, real_im_w_block}, - {anch_c * im_oc_block, - im_oc_block}}); + {h, real_im_h_block}, + {w, real_im_w_block}, + {anch_c * im_oc_block, + im_oc_block}}); } } } // i_w @@ -2693,11 +2698,12 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested( owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {anch_c, 1}, - {h, real_im_h_block}, {anch_w, w_block}, - {0, im_oc_block}} + {h, real_im_h_block}, + {anch_w, w_block}, {0, im_oc_block}} : slice_range {{n, 1UL}, - {h, real_im_h_block}, {anch_w, w_block}, - {anch_c * im_oc_block, oc_block}}); + {h, real_im_h_block}, + {anch_w, w_block}, + {anch_c * im_oc_block, oc_block}}); } } } // i_h @@ -2725,13 +2731,13 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested( owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {anch_c, 1}, - {anch_h, oh_ / oh_used_threads}, - {anch_w, ow_ / ow_used_threads}, - {0, im_oc_block}} + {anch_h, oh_ / oh_used_threads}, + {anch_w, ow_ / ow_used_threads}, + {0, im_oc_block}} : slice_range {{n, 1UL}, - {anch_h, oh_ / oh_used_threads}, - {anch_w, ow_ / ow_used_threads}, - {anch_c * im_oc_block, oc_block}}); + {anch_h, oh_ / oh_used_threads}, + {anch_w, ow_ / ow_used_threads}, + {anch_c * im_oc_block, oc_block}}); } } } // o_ic @@ -2747,11 +2753,11 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested( create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_}, {0, ow_}, {0, im_oc_block}} + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_}, {0, ow_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } @@ -2759,22 +2765,22 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested( create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } if (h_threads == 1 && w_threads == 1) { create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } @@ -2782,22 +2788,22 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested( create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } bind_output_loop_axis(lpbs, "N"); @@ -2961,21 +2967,21 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_nested( std::vector input_pos = blocking_input_ ? std::vector {n, ic, - (h + im_h_i) * sh_ + r, - w * sw_ + s, 0} + (h + im_h_i) * sh_ + r, + w * sw_ + s, 0} : std::vector {n, - (h + im_h_i) * sh_ + r, - w * sw_ + s, - ic * im_ic_block}; + (h + im_h_i) * sh_ + r, + w * sw_ + s, + ic * im_ic_block}; A_list[idx] = tensor_ptr(input, input_pos); B_list[idx] = tensor_ptr(weight, kpack > 1 ? std::vector {oc, ic, - r, s, 0, 0, 0} + r, s, 0, 0, 0} : std::vector { - oc, ic, r, s, 0, 0}); + oc, ic, r, s, 0, 0}); } } } @@ -3005,9 +3011,9 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_nested( std::vector output_pos = blocking_output_ ? std::vector {pic * mb_ + n, oc, - h + im_h_i, w, 0} + h + im_h_i, w, 0} : std::vector {pic * mb_ + n, - h + im_h_i, w, oc * im_oc_block}; + h + im_h_i, w, oc * im_oc_block}; if (ic_num_block_pt > 1) { _if_(o_ic == 0) { @@ -3059,14 +3065,14 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_nested( owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {oc, 1}, - {h + im_h_i, 1}, - {w, im_w_block}, - {0, im_oc_block}} + {h + im_h_i, 1}, + {w, im_w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, - {h + im_h_i, 1}, - {w, im_w_block}, - {oc * im_oc_block, - im_oc_block}}); + {h + im_h_i, 1}, + {w, im_w_block}, + {oc * im_oc_block, + im_oc_block}}); } } } @@ -3078,11 +3084,11 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_nested( owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {oc, 1}, - {h, im_h_block}, {w, im_w_block}, - {0, im_oc_block}} + {h, im_h_block}, {w, im_w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, - {h, im_h_block}, {w, im_w_block}, - {oc * im_oc_block, im_oc_block}}); + {h, im_h_block}, {w, im_w_block}, + {oc * im_oc_block, im_oc_block}}); } } } @@ -3099,11 +3105,11 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_nested( owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {anch_c, 1}, - {h, im_h_block}, {w, im_w_block}, - {0, im_oc_block}} + {h, im_h_block}, {w, im_w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, {h, im_h_block}, - {w, im_w_block}, - {anch_c * im_oc_block, oc_block}}); + {w, im_w_block}, + {anch_c * im_oc_block, oc_block}}); } } } @@ -3125,11 +3131,11 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_nested( owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {anch_c, 1}, - {h, im_h_block}, {anch_w, w_block}, - {0, im_oc_block}} + {h, im_h_block}, {anch_w, w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, {h, im_h_block}, - {anch_w, w_block}, - {anch_c * im_oc_block, oc_block}}); + {anch_w, w_block}, + {anch_c * im_oc_block, oc_block}}); } } } @@ -3155,11 +3161,11 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_nested( owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {anch_c, 1}, - {anch_h, h_block}, {anch_w, w_block}, - {0, im_oc_block}} + {anch_h, h_block}, {anch_w, w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, {anch_h, h_block}, - {anch_w, w_block}, - {anch_c * im_oc_block, oc_block}}); + {anch_w, w_block}, + {anch_c * im_oc_block, oc_block}}); } } } @@ -3175,53 +3181,54 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_nested( create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_}, {0, ow_}, {0, im_oc_block}} + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_}, {0, ow_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } if (oc_threads == 1 && h_threads == 1 && w_threads == 1) { create_fusion_anchor(fusion, owner_->get_outputs()[0], - blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_}, {0, ow_}, {0, im_oc_block}} - : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + blocking_output_ + ? slice_range {{pbs, 1UL}, + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_}, {0, ow_}, {0, im_oc_block}} + : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_}, + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } if (h_threads == 1 && w_threads == 1) { create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_}, {0, ow_}, {0, im_oc_block}} + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_}, {0, ow_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } if (h_threads == 1) { create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_}, {0, ow_}, {0, im_oc_block}} + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_}, {0, ow_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } if (mb_ > 1) { create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_}, {0, ow_}, {0, im_oc_block}} + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_}, {0, ow_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } } @@ -3308,9 +3315,9 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output, _if_(h + im_h_i < oh_) { std::vector output_pos = blocking_output_ ? std::vector {pic * mb_ + n, oc, h + im_h_i, - w, 0} + w, 0} : std::vector { - pic * mb_ + n, h + im_h_i, w, oc * im_oc_block}; + pic * mb_ + n, h + im_h_i, w, oc * im_oc_block}; if (ic_num_block_pt > 1) { _if_(o_ic == 0) { @@ -3410,18 +3417,18 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output, lanes)] = input[blocking_input_ ? span_t( - {n, ic, - (h + im_h_i) * sh_ + i - - ph_b_, - w * sw_ + j - pw_b_, k}, - lanes) + {n, ic, + (h + im_h_i) * sh_ + i + - ph_b_, + w * sw_ + j - pw_b_, k}, + lanes) : span_t( - {n, - (h + im_h_i) * sh_ + i - - ph_b_, - w * sw_ + j - pw_b_, - ic * im_ic_block + k}, - lanes)]; + {n, + (h + im_h_i) * sh_ + i + - ph_b_, + w * sw_ + j - pw_b_, + ic * im_ic_block + k}, + lanes)]; } } } @@ -3471,18 +3478,18 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output, lanes)] = input[blocking_input_ ? span_t( - {n, ic, - (h + im_h_i) * sh_ + i - - ph_b_, - w * sw_ + j - pw_b_, k}, - lanes) + {n, ic, + (h + im_h_i) * sh_ + i + - ph_b_, + w * sw_ + j - pw_b_, k}, + lanes) : span_t( - {n, - (h + im_h_i) * sh_ + i - - ph_b_, - w * sw_ + j - pw_b_, - ic * im_ic_block + k}, - lanes)]; + {n, + (h + im_h_i) * sh_ + i + - ph_b_, + w * sw_ + j - pw_b_, + ic * im_ic_block + k}, + lanes)]; } } @@ -3520,12 +3527,12 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output, A_list[idx] = tensor_ptr(input, blocking_input_ ? std::vector {n, ic, - (h + im_h_i) * sh_ + r - ph_b_, - w * sw_ + s - pw_b_, 0} + (h + im_h_i) * sh_ + r - ph_b_, + w * sw_ + s - pw_b_, 0} : std::vector {n, - (h + im_h_i) * sh_ + r - ph_b_, - w * sw_ + s - pw_b_, - ic * im_ic_block}); + (h + im_h_i) * sh_ + r - ph_b_, + w * sw_ + s - pw_b_, + ic * im_ic_block}); } } } @@ -3548,18 +3555,18 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output, lanes)] = input[blocking_input_ ? span_t( - {n, ic, - (h + im_h_i) * sh_ + i - - ph_b_, - w * sw_ + j - pw_b_, k}, - lanes) + {n, ic, + (h + im_h_i) * sh_ + i + - ph_b_, + w * sw_ + j - pw_b_, k}, + lanes) : span_t( - {n, - (h + im_h_i) * sh_ + i - - ph_b_, - w * sw_ + j - pw_b_, - ic * im_ic_block + k}, - lanes)]; + {n, + (h + im_h_i) * sh_ + i + - ph_b_, + w * sw_ + j - pw_b_, + ic * im_ic_block + k}, + lanes)]; } } builtin::brgemm_init( @@ -3600,7 +3607,7 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output, B_list[idx] = tensor_ptr(weight, kpack > 1 ? std::vector {oc, ic, r, s, 0, 0, - 0} + 0} : std::vector {oc, ic, r, s, 0, 0}); } } @@ -3635,11 +3642,11 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output, owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {oc, 1}, - {h + im_h_i, 1}, {w, im_w_block}, - {0, im_oc_block}} + {h + im_h_i, 1}, {w, im_w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, {h + im_h_i, 1}, - {w, im_w_block}, - {oc * im_oc_block, im_oc_block}}); + {w, im_w_block}, + {oc * im_oc_block, im_oc_block}}); } } } // im_h_i @@ -3649,10 +3656,10 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output, create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {oc, 1}, {h, im_h_block}, - {w, im_w_block}, {0, im_oc_block}} + {w, im_w_block}, {0, im_oc_block}} : slice_range {{n, 1UL}, {h, im_h_block}, - {w, im_w_block}, - {oc * im_oc_block, im_oc_block}}); + {w, im_w_block}, + {oc * im_oc_block, im_oc_block}}); } } } // i_w @@ -3665,11 +3672,12 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output, + o_w * w_block / im_w_block) * im_w_block; create_fusion_anchor(fusion, owner_->get_outputs()[0], - blocking_output_ ? slice_range {{n, 1UL}, {oc, 1}, - {h, im_h_block}, {anch_w, w_block}, {0, im_oc_block}} - : slice_range {{n, 1UL}, - {h, im_h_block}, {anch_w, w_block}, - {oc * im_oc_block, im_oc_block}}); + blocking_output_ + ? slice_range {{n, 1UL}, {oc, 1}, {h, im_h_block}, + {anch_w, w_block}, {0, im_oc_block}} + : slice_range {{n, 1UL}, {h, im_h_block}, + {anch_w, w_block}, + {oc * im_oc_block, im_oc_block}}); } } } // i_h @@ -3687,9 +3695,10 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output, create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {oc, 1}, {anch_h, h_block}, - {anch_w, w_block}, {0, im_oc_block}} + {anch_w, w_block}, {0, im_oc_block}} : slice_range {{n, 1UL}, {anch_h, h_block}, - {anch_w, w_block}, {oc * im_oc_block, im_oc_block}}); + {anch_w, w_block}, + {oc * im_oc_block, im_oc_block}}); } } } // ioc @@ -3711,9 +3720,10 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output, create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {anch_oc, 1}, {anch_h, h_block}, - {anch_w, w_block}, {0, im_oc_block}} + {anch_w, w_block}, {0, im_oc_block}} : slice_range {{n, 1UL}, {anch_h, h_block}, - {anch_w, w_block}, {anch_oc * im_oc_block, im_oc_block}}); + {anch_w, w_block}, + {anch_oc * im_oc_block, im_oc_block}}); } } // o_ic } @@ -3810,9 +3820,9 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call( _var_(copy_width, datatypes::index); std::vector output_pos = blocking_output_ ? std::vector {pic * mb_ + n, oc, h + im_h_i, - w, 0} + w, 0} : std::vector { - pic * mb_ + n, h + im_h_i, w, oc * im_oc_block}; + pic * mb_ + n, h + im_h_i, w, oc * im_oc_block}; if (ic_num_block_pt > 1) { _if_(o_ic == 0) { @@ -3914,18 +3924,18 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call( lanes)] = input[blocking_input_ ? span_t( - {n, ic, - (h + im_h_i) * sh_ + i - - ph_b_, - w * sw_ + j - pw_b_, k}, - lanes) + {n, ic, + (h + im_h_i) * sh_ + i + - ph_b_, + w * sw_ + j - pw_b_, k}, + lanes) : span_t( - {n, - (h + im_h_i) * sh_ + i - - ph_b_, - w * sw_ + j - pw_b_, - ic * im_ic_block + k}, - lanes)]; + {n, + (h + im_h_i) * sh_ + i + - ph_b_, + w * sw_ + j - pw_b_, + ic * im_ic_block + k}, + lanes)]; } } } @@ -3975,18 +3985,18 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call( lanes)] = input[blocking_input_ ? span_t( - {n, ic, - (h + im_h_i) * sh_ + i - - ph_b_, - w * sw_ + j - pw_b_, k}, - lanes) + {n, ic, + (h + im_h_i) * sh_ + i + - ph_b_, + w * sw_ + j - pw_b_, k}, + lanes) : span_t( - {n, - (h + im_h_i) * sh_ + i - - ph_b_, - w * sw_ + j - pw_b_, - ic * im_ic_block + k}, - lanes)]; + {n, + (h + im_h_i) * sh_ + i + - ph_b_, + w * sw_ + j - pw_b_, + ic * im_ic_block + k}, + lanes)]; } } @@ -4025,12 +4035,12 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call( A_list[idx] = tensor_ptr(input, blocking_input_ ? std::vector {n, ic, - (h + im_h_i) * sh_ + r - ph_b_, - w * sw_ + s - pw_b_, 0} + (h + im_h_i) * sh_ + r - ph_b_, + w * sw_ + s - pw_b_, 0} : std::vector {n, - (h + im_h_i) * sh_ + r - ph_b_, - w * sw_ + s - pw_b_, - ic * im_ic_block}); + (h + im_h_i) * sh_ + r - ph_b_, + w * sw_ + s - pw_b_, + ic * im_ic_block}); } } } @@ -4054,18 +4064,18 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call( lanes)] = input[blocking_input_ ? span_t( - {n, ic, - (h + im_h_i) * sh_ + i - - ph_b_, - w * sw_ + j - pw_b_, k}, - lanes) + {n, ic, + (h + im_h_i) * sh_ + i + - ph_b_, + w * sw_ + j - pw_b_, k}, + lanes) : span_t( - {n, - (h + im_h_i) * sh_ + i - - ph_b_, - w * sw_ + j - pw_b_, - ic * im_ic_block + k}, - lanes)]; + {n, + (h + im_h_i) * sh_ + i + - ph_b_, + w * sw_ + j - pw_b_, + ic * im_ic_block + k}, + lanes)]; } } builtin::brgemm_init( @@ -4106,7 +4116,7 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call( B_list[idx] = tensor_ptr(weight, kpack > 1 ? std::vector {oc, ic, r, s, 0, 0, - 0} + 0} : std::vector {oc, ic, r, s, 0, 0}); } } @@ -4132,11 +4142,11 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call( owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1}, {oc, 1}, - {h + im_h_i, 1}, {w, real_im_w_block}, - {0, im_oc_block}} + {h + im_h_i, 1}, {w, real_im_w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, {h + im_h_i, 1}, - {w, real_im_w_block}, - {oc * im_oc_block, im_oc_block}}); + {w, real_im_w_block}, + {oc * im_oc_block, im_oc_block}}); } } // im_h_i } @@ -4146,11 +4156,11 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call( create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {oc, 1}, - {h, real_im_h_block}, {w, real_im_w_block}, - {0, im_oc_block}} + {h, real_im_h_block}, {w, real_im_w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, {h, real_im_h_block}, - {w, real_im_w_block}, - {oc * im_oc_block, im_oc_block}}); + {w, real_im_w_block}, + {oc * im_oc_block, im_oc_block}}); } } // i_w } @@ -4167,11 +4177,11 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call( create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {oc, 1}, - {h, real_im_h_block}, {anch_w, w_block}, - {0, im_oc_block}} + {h, real_im_h_block}, {anch_w, w_block}, + {0, im_oc_block}} : slice_range {{n, 1UL}, {h, real_im_h_block}, - {anch_w, w_block}, - {oc * im_oc_block, im_oc_block}}); + {anch_w, w_block}, + {oc * im_oc_block, im_oc_block}}); } } } // i_h @@ -4188,9 +4198,10 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call( create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {oc, 1}, {anch_h, h_block}, - {anch_w, w_block}, {0, im_oc_block}} + {anch_w, w_block}, {0, im_oc_block}} : slice_range {{n, 1UL}, {anch_h, h_block}, - {anch_w, w_block}, {oc * im_oc_block, im_oc_block}}); + {anch_w, w_block}, + {oc * im_oc_block, im_oc_block}}); } } // i_oc } @@ -4212,9 +4223,10 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call( create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{n, 1UL}, {anch_oc, 1}, {anch_h, h_block}, - {anch_w, w_block}, {0, im_oc_block}} + {anch_w, w_block}, {0, im_oc_block}} : slice_range {{n, 1UL}, {anch_h, h_block}, - {anch_w, w_block}, {anch_oc * im_oc_block, im_oc_block}}); + {anch_w, w_block}, + {anch_oc * im_oc_block, im_oc_block}}); } } } @@ -4358,7 +4370,7 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_padding_nested( auto L2_cache_size = ctx->machine_.cpu_flags_.getDCacheSize(2); int oc_split = (oc_threads == 1 && oc_num_block_pt == 1) ? get_oc_split_factor( - -1, weight_size, L2_cache_size, oc_block / im_oc_block) + -1, weight_size, L2_cache_size, oc_block / im_oc_block) : 1; // create a global shared zero-buffer referenced by padding @@ -4410,11 +4422,11 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_padding_nested( create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } @@ -4422,22 +4434,22 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_padding_nested( create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } if (h_threads == 1 && w_threads == 1) { create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } @@ -4445,21 +4457,21 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_padding_nested( create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } create_fusion_anchor(fusion, owner_->get_outputs()[0], blocking_output_ ? slice_range {{pbs, 1UL}, - {outer_k * oc_ / im_oc_block / oc_split, - oc_ / im_oc_block / oc_split}, - {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} + {outer_k * oc_ / im_oc_block / oc_split, + oc_ / im_oc_block / oc_split}, + {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}} : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_}, - {outer_k * oc_ / oc_split, oc_ / oc_split}}); + {outer_k * oc_ / oc_split, oc_ / oc_split}}); } } bind_output_loop_axis(lpbs, "N"); diff --git a/legacy/core/src/runtime/microkernel/cpu/brgemm_onednn.cpp b/legacy/core/src/runtime/microkernel/cpu/brgemm_onednn.cpp index edc8f909..49902ad5 100644 --- a/legacy/core/src/runtime/microkernel/cpu/brgemm_onednn.cpp +++ b/legacy/core/src/runtime/microkernel/cpu/brgemm_onednn.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2020-2024 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "brgemm_common.hpp" #include "brgemm_range_handle.hpp" @@ -880,7 +881,7 @@ SC_API void dnnl_brgemm_call(brgemm_kernel_info *brg_desc, const void *A, new brgemm_batch_element_t[num]); brgemm_batch_element_t *batch = batch_v.get(); #else - brgemm_batch_element_t batch[num]; // NOLINT + brgemm_batch_element_t batch[num]; // NOLINT #endif #endif if (top_pad) { @@ -937,7 +938,7 @@ SC_API void dnnl_brgemm_call_postops(brgemm_kernel_info *brg_desc, new brgemm_batch_element_t[num]); brgemm_batch_element_t *batch = batch_v.get(); #else - brgemm_batch_element_t batch[num]; // NOLINT + brgemm_batch_element_t batch[num]; // NOLINT #endif #endif if (top_pad) { @@ -1000,7 +1001,7 @@ SC_API void dnnl_brgemm_list_call(brgemm_kernel_info *brg_desc, new brgemm_batch_element_t[batch_num]); brgemm_batch_element_t *batch = batch_v.get(); #else - brgemm_batch_element_t batch[batch_num]; // NOLINT + brgemm_batch_element_t batch[batch_num]; // NOLINT #endif #endif @@ -1066,7 +1067,7 @@ SC_API void dnnl_brgemm_list_call_postops( new brgemm_batch_element_t[batch_num]); brgemm_batch_element_t *batch = batch_v.get(); #else - brgemm_batch_element_t batch[batch_num]; // NOLINT + brgemm_batch_element_t batch[batch_num]; // NOLINT #endif #endif @@ -1292,7 +1293,7 @@ static int dnnl_brgemm_list_update_func( new brgemm_batch_element_t[batch_num]); brgemm_batch_element_t *batch = batch_v.get(); #else - brgemm_batch_element_t batch[batch_num]; // NOLINT + brgemm_batch_element_t batch[batch_num]; // NOLINT #endif #endif int sizeofA = get_dtype_sizeof(dtypeA); diff --git a/legacy/core/src/util/reflection.cpp b/legacy/core/src/util/reflection.cpp index c588372f..6ad4b3eb 100644 --- a/legacy/core/src/util/reflection.cpp +++ b/legacy/core/src/util/reflection.cpp @@ -1,18 +1,19 @@ -/******************************************************************************* - * Copyright 2020-2023 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ #include "reflection.hpp" #include #include @@ -411,7 +412,7 @@ bool visitor_t::dispatch(general_ref_t *v, general_ref_t *v2) { } else if (v->type_.base_ == reflection::basic_type::t_class) { return visit_class(v, v2); } else { -// clang-format off + // clang-format off #define PUT_VALUE(TYPE) \ case basic_type::t_##TYPE: return visit(reinterpret_cast(v->data_), v2 ? reinterpret_cast(v2->data_) : nullptr); break; // NOLINT // clang-format on diff --git a/legacy/core/src/util/variant.hpp b/legacy/core/src/util/variant.hpp index 752bcf0e..ee804b6a 100644 --- a/legacy/core/src/util/variant.hpp +++ b/legacy/core/src/util/variant.hpp @@ -1,18 +1,20 @@ -/******************************************************************************* - * Copyright 2022-2023 Intel Corporation +/* + * Copyright (C) 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ + * See the License for the specific language governing permissions + * and limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + */ + #ifndef GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_UTIL_VARIANT_HPP #define GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_UTIL_VARIANT_HPP #include @@ -35,7 +37,9 @@ struct const_max { : const_max::value; }; -template struct const_max { static constexpr size_t value = v; }; +template struct const_max { + static constexpr size_t value = v; +}; template struct helper; diff --git a/lib/gc/Dialect/Microkernel/MicrokernelOps.cpp b/lib/gc/Dialect/Microkernel/MicrokernelOps.cpp index 9dc940fc..785a5bc0 100644 --- a/lib/gc/Dialect/Microkernel/MicrokernelOps.cpp +++ b/lib/gc/Dialect/Microkernel/MicrokernelOps.cpp @@ -612,8 +612,8 @@ LogicalResult BrgemmExecuteOp::verify() { // inputs for BRGEMM: kernel id, A memref, B memref, C memref, batch_size, // addr_len if (inputs.size() != 6) - return brgemmOp.emitOpError() << "expect 6" - << " inputs but got " << inputs.size(); + return brgemmOp.emitOpError() + << "expect 6" << " inputs but got " << inputs.size(); // Verify the dispatch to be an i64. Value dispatch = brgemmOp.getDispatch(); if (!dispatch.getType().isInteger(64)) diff --git a/lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp b/lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp index f9d0663c..2c48c214 100644 --- a/lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp +++ b/lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp @@ -128,10 +128,9 @@ struct Kernel { explicit Kernel(cl_program program, cl_kernel kernel, const size_t *gridSize, const size_t *blockSize, size_t argNum, const size_t *argSize) - : program(program), - kernel(kernel), globalSize{gridSize[0] * blockSize[0], - gridSize[1] * blockSize[1], - gridSize[2] * blockSize[2]}, + : program(program), kernel(kernel), + globalSize{gridSize[0] * blockSize[0], gridSize[1] * blockSize[1], + gridSize[2] * blockSize[2]}, localSize{blockSize[0], blockSize[1], blockSize[2]}, argSize(argSize, argSize + argNum) { #ifndef NDEBUG diff --git a/lib/gc/Transforms/Microkernel/ConvertLinalgToMicrokernel.cpp b/lib/gc/Transforms/Microkernel/ConvertLinalgToMicrokernel.cpp index 749ed807..0eabd6e1 100644 --- a/lib/gc/Transforms/Microkernel/ConvertLinalgToMicrokernel.cpp +++ b/lib/gc/Transforms/Microkernel/ConvertLinalgToMicrokernel.cpp @@ -175,8 +175,7 @@ static FailureOr inferBrgemmDims(linalg::LinalgOp linalgOp) { } } - LLVM_DEBUG(llvm::dbgs() << "[inferBrgemmDims] Candidate dims: " - << "\n"); + LLVM_DEBUG(llvm::dbgs() << "[inferBrgemmDims] Candidate dims: " << "\n"); LLVM_DEBUG(llvm::dbgs() << "[inferBrgemmDims] m pos in affine: " << mAffinePos << "\n"); LLVM_DEBUG(llvm::dbgs() << "[inferBrgemmDims] n pos in affine: " << nAffinePos