From 4f9b9277e61c920851ae085a6d7f1ac4d7b70275 Mon Sep 17 00:00:00 2001
From: Vadim Musin <wasd94@gmail.com>
Date: Mon, 20 Jan 2025 14:27:44 +0100
Subject: [PATCH 1/2] Bump clang-tidy to 16

---
 .github/workflows/clang-tidy.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml
index 52b44590..43df7e9b 100644
--- a/.github/workflows/clang-tidy.yml
+++ b/.github/workflows/clang-tidy.yml
@@ -102,4 +102,4 @@ jobs:
       shell: bash
       run: |
         cd build
-        python3 ../llvm-project/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py -warnings-as-errors=* -p ./ -config-file ../llvm-project/mlir/.clang-tidy -clang-tidy-binary $(which clang-tidy-15) ${{ env.CHANGED_FILES }}
+        python3 ../llvm-project/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py -warnings-as-errors=* -p ./ -config-file ../llvm-project/mlir/.clang-tidy -clang-tidy-binary $(which clang-tidy-16) ${{ env.CHANGED_FILES }}

From 7370a71ac202c3ff050b39b0e2af0b763eaf3548 Mon Sep 17 00:00:00 2001
From: Artem Kroviakov <artem.kroviakov@intel.com>
Date: Mon, 20 Jan 2025 14:35:54 +0000
Subject: [PATCH 2/2] fix style to satisfy clang-format 18.1.3

---
 .../core/src/compiler/codegen/codegen_c.cpp   |  22 +-
 .../compiler/codegen/llvm/intrinsic_impl.cpp  |  19 +-
 .../ir/graph/anchor_loop_generator.cpp        |  17 +-
 .../ir/graph/anchor_loop_generator.hpp        |  19 +-
 .../core/src/compiler/ir/graph/graph_map.hpp  |  21 +-
 .../core/src/compiler/ir/graph/graph_op.hpp   |  19 +-
 .../src/compiler/ir/graph/mixed_partition.cpp |  23 +-
 legacy/core/src/compiler/ir/intrinsics.hpp    |  19 +-
 .../transform/dynamic_parallel_transform.cpp  |  21 +-
 .../jit/xbyak/backend/stack_frame_model.cpp   |  44 +-
 .../xbyak/backend/xbyak_lowering_viewer.cpp   |  23 +-
 legacy/core/src/ops/fusible/transpose.cpp     |  33 +-
 .../core/src/ops/fusible/unary_elemwise.cpp   |  21 +-
 legacy/core/src/ops/managed_matmul_core.cpp   |  18 +-
 legacy/core/src/ops/matmul_core.cpp           |  18 +-
 .../ops/templates/conv1x1_backprop_data.cpp   |  55 +-
 .../ops/templates/conv1x1_backprop_weight.cpp |  27 +-
 .../ops/templates/convNxN_backprop_weight.cpp |  33 +-
 legacy/core/src/ops/templates/conv_dw_fwd.cpp |  37 +-
 legacy/core/src/ops/templates/conv_fwd.cpp    | 132 ++--
 legacy/core/src/ops/templates/conv_fwd.hpp    |  21 +-
 legacy/core/src/ops/templates/conv_rl.cpp     |  51 +-
 .../src/ops/templates/managed_matmul_core.cpp | 116 +--
 legacy/core/src/ops/templates/matmul_core.cpp |  45 +-
 .../nested_conv1x1_backprop_data.cpp          |  21 +-
 .../nested_conv1x1_backprop_weight.cpp        |  78 +-
 .../nested_convNxN_backprop_data.cpp          |  21 +-
 .../nested_convNxN_backprop_weight.cpp        |  75 +-
 .../src/ops/templates/nested_conv_fwd.cpp     | 746 +++++++++---------
 .../runtime/microkernel/cpu/brgemm_onednn.cpp |  27 +-
 legacy/core/src/util/reflection.cpp           |  19 +-
 legacy/core/src/util/variant.hpp              |  22 +-
 lib/gc/Dialect/Microkernel/MicrokernelOps.cpp |   4 +-
 .../GPURuntime/ocl/GpuOclRuntime.cpp          |   7 +-
 .../ConvertLinalgToMicrokernel.cpp            |   3 +-
 35 files changed, 963 insertions(+), 914 deletions(-)

diff --git a/legacy/core/src/compiler/codegen/codegen_c.cpp b/legacy/core/src/compiler/codegen/codegen_c.cpp
index 8d39d9f7..2b124e12 100644
--- a/legacy/core/src/compiler/codegen/codegen_c.cpp
+++ b/legacy/core/src/compiler/codegen/codegen_c.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2020-2023 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include "codegen_c.hpp"
 #include "../ir/viewer.hpp"
@@ -1219,9 +1220,8 @@ void c_generator_pass_t::operator()(func_t f) {
 c_generator_pass_t::c_generator_pass_t(std::ostream &source,
                                        const context_ptr &ctx, bool gen_wrapper,
                                        c_generator_optional_out_t *optional_out)
-    : source_(source), context_(ctx),
-      gen_wrapper_(gen_wrapper), pre_passes_{get_default_precodegen_passes(
-                                     ctx, gen_wrapper)},
+    : source_(source), context_(ctx), gen_wrapper_(gen_wrapper),
+      pre_passes_{get_default_precodegen_passes(ctx, gen_wrapper)},
       optional_out_(optional_out) {
   prepare_include(&source_);
   if (optional_out_) {
diff --git a/legacy/core/src/compiler/codegen/llvm/intrinsic_impl.cpp b/legacy/core/src/compiler/codegen/llvm/intrinsic_impl.cpp
index d92bdc61..5e4d0e3b 100644
--- a/legacy/core/src/compiler/codegen/llvm/intrinsic_impl.cpp
+++ b/legacy/core/src/compiler/codegen/llvm/intrinsic_impl.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2023-2024 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 #include <algorithm>
 #include <fstream>
 #include <memory>
@@ -74,7 +75,7 @@ Value *codegen_llvm_vis_t::make_int_min_max(const intrin_call_c &v, bool ismin,
 Value *codegen_llvm_vis_t::make_int_min_max(Value *v1, Value *v2, bool ismin,
                                             type_category cate) {
   // fix-me: use smax/smin for newer LLVM
-  llvm::Value *(llvm::IRBuilder<>::*ptr)(llvm::Value * LHS, llvm::Value * RHS,
+  llvm::Value *(llvm::IRBuilder<>::*ptr)(llvm::Value *LHS, llvm::Value *RHS,
                                          const llvm::Twine &Name);
   if (ismin) {
     if (cate == CATE_INT) {
diff --git a/legacy/core/src/compiler/ir/graph/anchor_loop_generator.cpp b/legacy/core/src/compiler/ir/graph/anchor_loop_generator.cpp
index 948a6a80..09fb4447 100644
--- a/legacy/core/src/compiler/ir/graph/anchor_loop_generator.cpp
+++ b/legacy/core/src/compiler/ir/graph/anchor_loop_generator.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2023 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include "anchor_loop_generator.hpp"
 #include "fusible_op_utils.hpp"
diff --git a/legacy/core/src/compiler/ir/graph/anchor_loop_generator.hpp b/legacy/core/src/compiler/ir/graph/anchor_loop_generator.hpp
index 891c1f00..8f2357b4 100644
--- a/legacy/core/src/compiler/ir/graph/anchor_loop_generator.hpp
+++ b/legacy/core/src/compiler/ir/graph/anchor_loop_generator.hpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2023 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #ifndef GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_COMPILER_IR_GRAPH_ANCHOR_LOOP_GENERATOR_HPP
 #define GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_COMPILER_IR_GRAPH_ANCHOR_LOOP_GENERATOR_HPP
@@ -54,7 +55,7 @@ class anchor_loop_generator_t : public body_generator_base_t {
       const std::shared_ptr<fusion_anchor_t> &parent_fanchor) const;
 
   void schedule_loops(context_ptr ctx, const void *config, stmt body,
-                      std::vector<for_loop> &fors) const override{};
+                      std::vector<for_loop> &fors) const override {};
   float get_gflop() const override { return 0; }
 };
 
diff --git a/legacy/core/src/compiler/ir/graph/graph_map.hpp b/legacy/core/src/compiler/ir/graph/graph_map.hpp
index 3de7a4ae..a42807c8 100644
--- a/legacy/core/src/compiler/ir/graph/graph_map.hpp
+++ b/legacy/core/src/compiler/ir/graph/graph_map.hpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2022-2023 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #ifndef GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_COMPILER_IR_GRAPH_GRAPH_MAP_HPP
 #define GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_COMPILER_IR_GRAPH_GRAPH_MAP_HPP
@@ -26,7 +27,9 @@ namespace impl {
 namespace graph {
 namespace gc {
 
-template <typename T> struct is_vector { static constexpr bool value = false; };
+template <typename T> struct is_vector {
+  static constexpr bool value = false;
+};
 
 template <typename T, typename Alloc> struct is_vector<std::vector<T, Alloc>> {
   static constexpr bool value = true;
diff --git a/legacy/core/src/compiler/ir/graph/graph_op.hpp b/legacy/core/src/compiler/ir/graph/graph_op.hpp
index 56cac186..f2f18401 100644
--- a/legacy/core/src/compiler/ir/graph/graph_op.hpp
+++ b/legacy/core/src/compiler/ir/graph/graph_op.hpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2020-2023 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 #ifndef GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_COMPILER_IR_GRAPH_GRAPH_OP_HPP
 #define GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_COMPILER_IR_GRAPH_GRAPH_OP_HPP
 
@@ -48,7 +49,7 @@ class graph_op_t : public sc_op {
   void query_format(
       context_ptr ctx,
       std::vector<std::vector<format_stride_pair>> &supported_ins,
-      std::vector<std::vector<format_stride_pair>> &supported_outs) override{};
+      std::vector<std::vector<format_stride_pair>> &supported_outs) override {};
 
   // the param graph is created by upper function and passed to this function.
   // It should be an empty graph and already synced with external graph.
diff --git a/legacy/core/src/compiler/ir/graph/mixed_partition.cpp b/legacy/core/src/compiler/ir/graph/mixed_partition.cpp
index 06cee83a..6e75af34 100644
--- a/legacy/core/src/compiler/ir/graph/mixed_partition.cpp
+++ b/legacy/core/src/compiler/ir/graph/mixed_partition.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2022-2024 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include "mixed_partition.hpp"
 #include "binding_axis.hpp"
@@ -1849,8 +1850,8 @@ static bool try_merge_mixed_parti_parallel(mixed_parti_t *A, mixed_parti_t *B) {
 
   auto append_parti = (dep == parti_dep::l_dep_r) ? A : B,
        target_parti = (dep == parti_dep::l_dep_r) ? B : A;
-  SC_MODULE_INFO << "Start try_merge_mixed_parti_parallel: "
-                 << "Target: " << target_parti->func_->name_
+  SC_MODULE_INFO << "Start try_merge_mixed_parti_parallel: " << "Target: "
+                 << target_parti->func_->name_
                  << ", Append: " << append_parti->func_->name_;
 
   auto outer_loops_target = target_parti->get_outer_loops(),
@@ -4028,7 +4029,7 @@ static void
 crossover_dispatcher(const std::vector<mixed_parti_t::ptr> &parti_vec,
                      parti_merge_kind merge_kind) {
   // select merger by merge kind
-  bool (*merger)(mixed_parti_t * A, mixed_parti_t * B);
+  bool (*merger)(mixed_parti_t *A, mixed_parti_t *B);
   switch (merge_kind) {
   case parti_merge_kind::vertical: {
     merger = try_merge_mixed_parti_vertically;
diff --git a/legacy/core/src/compiler/ir/intrinsics.hpp b/legacy/core/src/compiler/ir/intrinsics.hpp
index 7d402070..58a1be8a 100644
--- a/legacy/core/src/compiler/ir/intrinsics.hpp
+++ b/legacy/core/src/compiler/ir/intrinsics.hpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2020-2024 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #ifndef GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_COMPILER_IR_INTRINSICS_HPP
 #define GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_COMPILER_IR_INTRINSICS_HPP
@@ -38,7 +39,7 @@ struct intrinsic_handler_t {
 };
 
 struct x86_intrinsic_handler_t : public intrinsic_handler_t {
-  virtual void on_initialize(intrin_call_node &node){};
+  virtual void on_initialize(intrin_call_node &node) {};
   virtual void on_initialize(low_level_intrin_node &node) = 0;
   x86_intrinsic_handler_t(const std::string &name);
   virtual ~x86_intrinsic_handler_t() = default;
diff --git a/legacy/core/src/compiler/ir/transform/dynamic_parallel_transform.cpp b/legacy/core/src/compiler/ir/transform/dynamic_parallel_transform.cpp
index 7095ec36..ccd4ef59 100644
--- a/legacy/core/src/compiler/ir/transform/dynamic_parallel_transform.cpp
+++ b/legacy/core/src/compiler/ir/transform/dynamic_parallel_transform.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2023-2024 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include "dynamic_parallel_transform.hpp"
 #include <algorithm>
@@ -123,8 +124,8 @@ struct parallel_for_scope_t {
                        bool is_start, uint64_t tid_step,
                        const std::vector<expr_c> *parent_iters,
                        const expr_c &cur_iter)
-      : loop_{loop},
-        nested_level_{nested_level}, is_start_{is_start}, tid_step_{tid_step} {
+      : loop_{loop}, nested_level_{nested_level}, is_start_{is_start},
+        tid_step_{tid_step} {
     if (parent_iters) {
       iters_ = *parent_iters;
       if (cur_iter.defined()) {
diff --git a/legacy/core/src/compiler/jit/xbyak/backend/stack_frame_model.cpp b/legacy/core/src/compiler/jit/xbyak/backend/stack_frame_model.cpp
index 3b69380c..9db937ea 100644
--- a/legacy/core/src/compiler/jit/xbyak/backend/stack_frame_model.cpp
+++ b/legacy/core/src/compiler/jit/xbyak/backend/stack_frame_model.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2021-2023 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include <algorithm>
 #include <cassert>
@@ -210,8 +211,8 @@ std::string stack_frame_model::one_line_summary() const {
 
 #define LOG_LINE(S1, ...)                                                      \
   if (logging_enabled_) {                                                      \
-    cout << "[" << utils::brief_lineloc(__FILE__, __LINE__) << "]"             \
-         << " " << S1 __VA_ARGS__ << endl;                                     \
+    cout << "[" << utils::brief_lineloc(__FILE__, __LINE__) << "]" << " "      \
+         << S1 __VA_ARGS__ << endl;                                            \
   }
 
 #define LOG_FUNC_ENTRY                                                         \
@@ -309,10 +310,9 @@ void stack_frame_model::push_named_object(const std::string &name,
                                           x86_64::cpu_data_type val_type,
                                           size_t num_bytes,
                                           const std::string &debug_comment) {
-  LOG_FUNC_ENTRY_WITH_TEXT("name=\"" << name << "\""
-                                     << " num_bytes=" << num_bytes
-                                     << " debug_comment=\"" << debug_comment
-                                     << "\"")
+  LOG_FUNC_ENTRY_WITH_TEXT("name=\""
+                           << name << "\"" << " num_bytes=" << num_bytes
+                           << " debug_comment=\"" << debug_comment << "\"")
   assert_unused_name(name);
   COMPILE_ASSERT(!name.empty(), "named objects cannot have blank name");
   COMPILE_ASSERT(num_bytes > 0,
@@ -328,11 +328,10 @@ void stack_frame_model::push_named_object(const std::string &name,
 void stack_frame_model::push_named_tensor_buffer_object(
     const std::string &name, x86_64::cpu_data_type val_type,
     size_t num_elements, size_t num_bytes, const std::string &debug_comment) {
-  LOG_FUNC_ENTRY_WITH_TEXT("name=\"" << name << "\""
-                                     << " num_elements=" << num_elements
-                                     << " num_bytes=" << num_bytes
-                                     << " debug_comment=\"" << debug_comment
-                                     << "\"")
+  LOG_FUNC_ENTRY_WITH_TEXT("name=\""
+                           << name << "\"" << " num_elements=" << num_elements
+                           << " num_bytes=" << num_bytes << " debug_comment=\""
+                           << debug_comment << "\"")
   assert_unused_name(name);
   COMPILE_ASSERT(!name.empty(), "named objects cannot have blank name");
   COMPILE_ASSERT(num_bytes > 0,
@@ -455,10 +454,9 @@ void stack_frame_model::assert_unused_name(const std::string &name) {
 }
 
 void stack_frame_model::add_caller_param_slot(const caller_param_slot &s) {
-  LOG_FUNC_ENTRY_WITH_TEXT("name=\"" << s.name_ << "\""
-                                     << " slot_size=" << s.slot_size_
-                                     << " debug_comment=\"" << s.debug_comment_
-                                     << "\"")
+  LOG_FUNC_ENTRY_WITH_TEXT("name=\""
+                           << s.name_ << "\"" << " slot_size=" << s.slot_size_
+                           << " debug_comment=\"" << s.debug_comment_ << "\"")
   COMPILE_ASSERT(!s.name_.empty(), "named objects cannot have blank name");
   COMPILE_ASSERT(s.slot_size_ > 0,
                  "stack_frame_model items must have positive sizes");
diff --git a/legacy/core/src/compiler/jit/xbyak/backend/xbyak_lowering_viewer.cpp b/legacy/core/src/compiler/jit/xbyak/backend/xbyak_lowering_viewer.cpp
index 2434bf1d..07cc7748 100644
--- a/legacy/core/src/compiler/jit/xbyak/backend/xbyak_lowering_viewer.cpp
+++ b/legacy/core/src/compiler/jit/xbyak/backend/xbyak_lowering_viewer.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2020-2024 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include <fstream>
 #include <iomanip>
@@ -650,8 +651,7 @@ void xbyak_lowering_viewer::handle_x86_intrisic(
     XBYAK_GEN(pext, X86_R64_R64_R64, op_dst, op_lhs, op_rhs);
   } break;
   default: {
-    COMPILE_ASSERT(false, FUNC_INFO << "Invalid intrisic: "
-                                    << "intrin");
+    COMPILE_ASSERT(false, FUNC_INFO << "Invalid intrisic: " << "intrin");
   } break;
   }
 }
@@ -925,8 +925,7 @@ void xbyak_lowering_viewer::handle_avx_intrisic(
     handle_avx_mov_mask(op_dst, op_src, src_dtype);
   } break;
   default: {
-    COMPILE_ASSERT(false, FUNC_INFO << "Invalid intrisic: "
-                                    << "intrin");
+    COMPILE_ASSERT(false, FUNC_INFO << "Invalid intrisic: " << "intrin");
   } break;
   }
 }
diff --git a/legacy/core/src/ops/fusible/transpose.cpp b/legacy/core/src/ops/fusible/transpose.cpp
index 5238cf33..299e0b79 100644
--- a/legacy/core/src/ops/fusible/transpose.cpp
+++ b/legacy/core/src/ops/fusible/transpose.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2023-2024 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 #include "compiler/ir/graph/fusible_op_utils.hpp"
 #include "reorder.hpp"
 #include "util/math_utils.hpp"
@@ -348,23 +349,25 @@ bool can_be_fast_transpose(
 
 #define TRANS2D_ASSIGN(dst, src)                                               \
   cur_list.emplace_back(                                                       \
-      builder::make_assign_unattached(rows[((dst)-1)], rows[((src)-1)]));
+      builder::make_assign_unattached(rows[((dst) - 1)], rows[((src) - 1)]));
 // unpack and interleave
 #define TRANS2D_UNPACK_ASSIGN(option, dst, src1, src2, elem_bits)              \
   cur_list.emplace_back(builder::make_assign_unattached(                       \
-      rows[((dst)-1)], builder::make_unpack_##option(                          \
-                           rows[((src1)-1)], rows[((src2)-1)], elem_bits)));
+      rows[((dst) - 1)],                                                       \
+      builder::make_unpack_##option(rows[((src1) - 1)], rows[((src2) - 1)],    \
+                                    elem_bits)));
 #define TRANS2D_SHUFFLE_PERMUTE_ASSIGN_F32(command, dst, src1, src2, imm,      \
                                            elem_bits)                          \
   cur_list.emplace_back(builder::make_assign_unattached(                       \
-      rows[((dst)-1)],                                                         \
-      builder::make_##command(rows[((src1)-1)], rows[((src2)-1)], imm,         \
+      rows[((dst) - 1)],                                                       \
+      builder::make_##command(rows[((src1) - 1)], rows[((src2) - 1)], imm,     \
                               elem_bits)));
 
 #define PERMUTEX_ASSIGN_F32(dst, src1, src2, imm, mask)                        \
   cur_list.emplace_back(builder::make_assign_unattached(                       \
-      rows[((dst)-1)],                                                         \
-      builder::make_permute(rows[((src1)-1)], rows[((src2)-1)], imm, mask)));
+      rows[((dst) - 1)],                                                       \
+      builder::make_permute(rows[((src1) - 1)], rows[((src2) - 1)], imm,       \
+                            mask)));
 
 #define TRANS2D_REG_CALCULATION_F32(type_bits)                                 \
   TRANS2D_UNPACK_ASSIGN(low, 9, 1, 2, 32)                                      \
diff --git a/legacy/core/src/ops/fusible/unary_elemwise.cpp b/legacy/core/src/ops/fusible/unary_elemwise.cpp
index bef110c5..da4d8366 100644
--- a/legacy/core/src/ops/fusible/unary_elemwise.cpp
+++ b/legacy/core/src/ops/fusible/unary_elemwise.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2020-2024 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include <assert.h>
 
@@ -282,7 +283,7 @@ expr tanh_op_t::compute_element(expr in) {
 #define DECL_VEC_CONSTANT(name, dtype, value)                                  \
   expr name = make_expr<constant_node>(value, sc_data_type_t::dtype(lanes));
 
-// clang-format off
+  // clang-format off
 // NOLINTNEXTLINE
 #define DECL_VEC_VAR(name, dtype) auto name = builder::make_var( \
             sc_data_type_t::dtype(lanes), #name + fusion_create_var_idx()); \
@@ -290,7 +291,7 @@ expr tanh_op_t::compute_element(expr in) {
 // clang-format on
 #define DECL_CONSTANT(name, dtype, value)                                      \
   expr name = make_expr<constant_node>(value, datatypes::dtype);
-// clang-format off
+  // clang-format off
 // NOLINTNEXTLINE
 #define DECL_VAR(name, dtype) auto name = builder::make_var( \
             datatypes::dtype, #name + fusion_create_var_idx()); \
diff --git a/legacy/core/src/ops/managed_matmul_core.cpp b/legacy/core/src/ops/managed_matmul_core.cpp
index acf691c4..1fa13b75 100644
--- a/legacy/core/src/ops/managed_matmul_core.cpp
+++ b/legacy/core/src/ops/managed_matmul_core.cpp
@@ -1,18 +1,20 @@
-/*******************************************************************************
- * Copyright 2022-2024 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
 #include "managed_matmul_core.hpp"
 #include "matmul_core.hpp"
 #include "templates/managed_matmul_core.hpp"
diff --git a/legacy/core/src/ops/matmul_core.cpp b/legacy/core/src/ops/matmul_core.cpp
index 9e2187b5..97750214 100644
--- a/legacy/core/src/ops/matmul_core.cpp
+++ b/legacy/core/src/ops/matmul_core.cpp
@@ -1,18 +1,20 @@
-/*******************************************************************************
- * Copyright 2022-2023 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
 #include "matmul_core.hpp"
 #include "templates/matmul_core.hpp"
 #include "templates/utils.hpp"
diff --git a/legacy/core/src/ops/templates/conv1x1_backprop_data.cpp b/legacy/core/src/ops/templates/conv1x1_backprop_data.cpp
index 8fe1dfee..7b0563c9 100644
--- a/legacy/core/src/ops/templates/conv1x1_backprop_data.cpp
+++ b/legacy/core/src/ops/templates/conv1x1_backprop_data.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2022-2023 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include "conv1x1_backprop_data.hpp"
 #include <memory>
@@ -198,18 +199,18 @@ bool gen_conv1x1_backprop_data_t::generate(context_ptr ctx,
       assert(tile_d == 1 && tile_p == 1 && tile_q == 1);
       int C_shift_d = padding_d > 0
         ? (padding_d > stride_d
-            ? (stride_d == 1 ? 0 : stride_d - padding_d % stride_d)
-            : stride_d - padding_d)
+              ? (stride_d == 1 ? 0 : stride_d - padding_d % stride_d)
+              : stride_d - padding_d)
         : 0;
       int C_shift_h = padding_h > 0
         ? (padding_h > stride_h
-            ? (stride_h == 1 ? 0 : stride_h - padding_h % stride_h)
-            : stride_h - padding_h)
+              ? (stride_h == 1 ? 0 : stride_h - padding_h % stride_h)
+              : stride_h - padding_h)
         : 0;
       int C_shift_w = padding_w > 0
         ? (padding_w > stride_w
-            ? (stride_w == 1 ? 0 : stride_w - padding_w % stride_w)
-            : stride_w - padding_w)
+              ? (stride_w == 1 ? 0 : stride_w - padding_w % stride_w)
+              : stride_w - padding_w)
         : 0;
       C_shift_d = C_shift_d < 0 ? 0 : C_shift_d;
       C_shift_h = C_shift_h < 0 ? 0 : C_shift_h;
@@ -246,17 +247,17 @@ bool gen_conv1x1_backprop_data_t::generate(context_ptr ctx,
                   stride_a = is_3d ? O * P * Q * K_block : P * Q * K_block;
                   a_idx = is_3d
                     ? std::vector<expr> {n, 0, d_o * tile_d + A_shift_d,
-                      p_o * tile_p + A_shift_h, q_o * tile_q + A_shift_w, 0}
+                        p_o * tile_p + A_shift_h, q_o * tile_q + A_shift_w, 0}
                     : std::vector<expr> {n, 0, p_o * tile_p + A_shift_h,
-                      q_o * tile_q + A_shift_w, 0};
+                        q_o * tile_q + A_shift_w, 0};
                 } else {
                   LDA = K;
                   stride_a = K_block;
                   a_idx = is_3d
                     ? std::vector<expr> {n, d_o * tile_d + A_shift_d,
-                      p_o * tile_p + A_shift_h, q_o * tile_q + A_shift_w, 0}
-                    : std::vector<expr> {
-                      n, p_o * tile_p + A_shift_h, q_o * tile_q + A_shift_w, 0};
+                        p_o * tile_p + A_shift_h, q_o * tile_q + A_shift_w, 0}
+                    : std::vector<expr> {n, p_o * tile_p + A_shift_h,
+                        q_o * tile_q + A_shift_w, 0};
                 }
                 b_idx = std::vector<expr> {c_o, 0, 0, 0, 0, 0};
                 if (is_3d) b_idx.emplace_back(0);
@@ -264,22 +265,22 @@ bool gen_conv1x1_backprop_data_t::generate(context_ptr ctx,
                 if (is_out_blocking) {
                   LDC = C_block * stride_w;
                   c_idx = is_3d ? std::vector<expr> {n, c_o,
-                            d_o * tile_d * stride_d + C_shift_d,
-                            p_o * tile_p * stride_h + C_shift_h,
-                            q_o * tile_q * stride_w + C_shift_w, 0}
+                                    d_o * tile_d * stride_d + C_shift_d,
+                                    p_o * tile_p * stride_h + C_shift_h,
+                                    q_o * tile_q * stride_w + C_shift_w, 0}
                                 : std::vector<expr> {n, c_o,
-                                  p_o * tile_p * stride_h + C_shift_h,
-                                  q_o * tile_q * stride_w + C_shift_w, 0};
+                                    p_o * tile_p * stride_h + C_shift_h,
+                                    q_o * tile_q * stride_w + C_shift_w, 0};
                 } else {
                   LDC = C * stride_w;
                   LDC->attr().set("N_axis",
                     is_3d ? std::vector<size_t> {4} : std::vector<size_t> {3});
                   c_idx = is_3d
                     ? std::vector<expr> {n, d_o * tile_d * stride_d + C_shift_d,
-                      p_o * tile_p * stride_h + C_shift_h,
-                      q_o * tile_q * stride_w + C_shift_w, c_o * C_block}
+                        p_o * tile_p * stride_h + C_shift_h,
+                        q_o * tile_q * stride_w + C_shift_w, c_o * C_block}
                     : std::vector<expr> {n, p_o * tile_p * stride_h + C_shift_h,
-                      q_o * tile_q * stride_w + C_shift_w, c_o * C_block};
+                        q_o * tile_q * stride_w + C_shift_w, c_o * C_block};
                 }
                 LDC->attr().set("stride_w", stride_w);
                 builtin::brgemm_init_update(tensor_ptr(output, a_idx),
diff --git a/legacy/core/src/ops/templates/conv1x1_backprop_weight.cpp b/legacy/core/src/ops/templates/conv1x1_backprop_weight.cpp
index 8b0fdf58..2150c429 100644
--- a/legacy/core/src/ops/templates/conv1x1_backprop_weight.cpp
+++ b/legacy/core/src/ops/templates/conv1x1_backprop_weight.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2022-2023 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include "conv1x1_backprop_weight.hpp"
 #include <algorithm>
@@ -498,7 +499,7 @@ bool gen_conv1x1_backprop_weight_t::generate_reduce_ALL(const context_ptr &ctx,
             _tensor_(output_tmp, dtype,
               dtype_block > 1
                 ? std::vector<expr> {NPQ_tile, NPQ_block_pad / dtype_block,
-                  K_block, dtype_block}
+                    K_block, dtype_block}
                 : std::vector<expr> {NPQ_tile, NPQ_block, K_block});
             _named_for_(lnt, nt_i, 0, NPQ_tile) {
               _named_for_(lnpq, npq_i, 0, NPQ_block_pad) {
@@ -572,9 +573,9 @@ bool gen_conv1x1_backprop_weight_t::generate_reduce_ALL(const context_ptr &ctx,
                                   : std::vector<expr> {0, 0, 0}),
                 tensor_ptr(del_weight_tmp_buf,
                   is_3d ? std::vector<expr> {n_o * K_num_block + k_o, c_o, 0, 0,
-                    0, 0, 0}
+                            0, 0, 0}
                         : std::vector<expr> {n_o * K_num_block + k_o, c_o, 0, 0,
-                          0, 0}),
+                            0, 0}),
                 NPQ_tile, C_block, K_block, NPQ_block_pad, NPQ_block_pad,
                 K_block, K_block, C_block * NPQ_block_pad,
                 K_block
@@ -589,9 +590,9 @@ bool gen_conv1x1_backprop_weight_t::generate_reduce_ALL(const context_ptr &ctx,
                                   : std::vector<expr> {0, 0, 0}),
                 tensor_ptr(del_weight_tmp_buf,
                   is_3d ? std::vector<expr> {n_o * K_num_block + k_o, c_o, 0, 0,
-                    0, 0, 0}
+                            0, 0, 0}
                         : std::vector<expr> {n_o * K_num_block + k_o, c_o, 0, 0,
-                          0, 0}),
+                            0, 0}),
                 NPQ_tile, C_block, K_block, NPQ_block_pad, NPQ_block_pad,
                 K_block, K_block, C_block * NPQ_block_pad,
                 K_block
diff --git a/legacy/core/src/ops/templates/convNxN_backprop_weight.cpp b/legacy/core/src/ops/templates/convNxN_backprop_weight.cpp
index 69b446de..9c943493 100644
--- a/legacy/core/src/ops/templates/convNxN_backprop_weight.cpp
+++ b/legacy/core/src/ops/templates/convNxN_backprop_weight.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2022-2023 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include "convNxN_backprop_weight.hpp"
 #include <memory>
@@ -287,11 +288,11 @@ bool gen_convNxN_backprop_weight::generate_reduce_N(const context_ptr &ctx,
                             output, {n_o, k_o, p_o, output_q_start, 0, 0}),
                           tensor_ptr(data,
                             dtype_block > 1 ? std::vector<expr> {n_o, c_o,
-                              p_o * stride_h + r - padding_h, q_start_valid, 0,
-                              0, 0}
+                                                p_o * stride_h + r - padding_h,
+                                                q_start_valid, 0, 0, 0}
                                             : std::vector<expr> {n_o, c_o,
-                                              p_o * stride_h + r - padding_h,
-                                              q_start_valid, 0, 0}),
+                                                p_o * stride_h + r - padding_h,
+                                                q_start_valid, 0, 0}),
                           tensor_ptr(
                             N_num_block > 1 ? del_weight_tmp_buf : del_weight,
                             {n_o * K_num_block + k_o, c_o, r, s, 0, 0}),
@@ -309,11 +310,11 @@ bool gen_convNxN_backprop_weight::generate_reduce_N(const context_ptr &ctx,
                             output, {n_o, k_o, p_o, output_q_start, 0, 0}),
                           tensor_ptr(data,
                             dtype_block > 1 ? std::vector<expr> {n_o, c_o,
-                              p_o * stride_h + r - padding_h, q_start_valid, 0,
-                              0, 0}
+                                                p_o * stride_h + r - padding_h,
+                                                q_start_valid, 0, 0, 0}
                                             : std::vector<expr> {n_o, c_o,
-                                              p_o * stride_h + r - padding_h,
-                                              q_start_valid, 0, 0}),
+                                                p_o * stride_h + r - padding_h,
+                                                q_start_valid, 0, 0}),
                           tensor_ptr(
                             N_num_block > 1 ? del_weight_tmp_buf : del_weight,
                             {n_o * K_num_block + k_o, c_o, r, s, 0, 0}),
diff --git a/legacy/core/src/ops/templates/conv_dw_fwd.cpp b/legacy/core/src/ops/templates/conv_dw_fwd.cpp
index 8d7cad29..e9215ea1 100644
--- a/legacy/core/src/ops/templates/conv_dw_fwd.cpp
+++ b/legacy/core/src/ops/templates/conv_dw_fwd.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2023-2024 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include "conv_dw_fwd.hpp"
 #include <utility>
@@ -71,7 +72,7 @@ config_ptr gen_conv_dw_fwd_t::get_default_config(context_ptr ctx) const {
   cfg.bs_threads = mb_ > num_threads
     ? num_threads
     : *(std::find_if(thread_split.rbegin(), thread_split.rend(),
-      [&](int split) { return split == 1 || split < mb_; }));
+        [&](int split) { return split == 1 || split < mb_; }));
   cfg.h_threads = num_threads / cfg.bs_threads;
   cfg.w_threads = 1;
   cfg.g_threads = 1;
@@ -608,7 +609,7 @@ void gen_conv_dw_fwd_t::compute_conv_physical_padding(CONV_ARG_LIST) const {
                       builtin::brgemm_init(
                         tensor_ptr(global_aux_buffer,
                           is_3d_ ? std::vector<expr> {tid, aux_buf_d, aux_buf_h,
-                            0, 0}
+                                     0, 0}
                                  : std::vector<expr> {tid, aux_buf_h, 0, 0}),
                         builder::make_cast(datatypes::s32, left_pad), g_block,
                         LDA, dtype_input, padding_value);
@@ -640,9 +641,9 @@ void gen_conv_dw_fwd_t::compute_conv_physical_padding(CONV_ARG_LIST) const {
                       builtin::brgemm_init(
                         tensor_ptr(global_aux_buffer,
                           is_3d_ ? std::vector<expr> {tid, aux_buf_d, aux_buf_h,
-                            w_block_size_without_pad, 0}
+                                     w_block_size_without_pad, 0}
                                  : std::vector<expr> {tid, aux_buf_h,
-                                   w_block_size_without_pad, 0}),
+                                     w_block_size_without_pad, 0}),
                         builder::make_cast(datatypes::s32,
                           aux_w_block_size - w_block_size_without_pad),
                         g_block, LDA, dtype_input, padding_value);
@@ -654,8 +655,8 @@ void gen_conv_dw_fwd_t::compute_conv_physical_padding(CONV_ARG_LIST) const {
                         auto valid_kh
                           = (h_nopad_end_idx - h_nopad_begin_idx - 1) / dh_ + 1;
                         idx = builder::make_cast(datatypes::u32,
-                          use_var_bs ? (
-                            aux_buf_d * valid_kh * kw_ + aux_buf_h * kw_ + kw)
+                          use_var_bs ? (aux_buf_d * valid_kh * kw_
+                                         + aux_buf_h * kw_ + kw)
                                      : (kd * kh_ * kw_ + kh * kw_ + kw));
                       } else {
                         idx = builder::make_cast(datatypes::u32,
@@ -668,7 +669,7 @@ void gen_conv_dw_fwd_t::compute_conv_physical_padding(CONV_ARG_LIST) const {
                       A_list[idx] = tensor_ptr(global_aux_buffer,
                         is_3d_
                           ? std::vector<expr> {tid, aux_buf_d, aux_buf_h,
-                            kw * dw_, 0}
+                              kw * dw_, 0}
                           : std::vector<expr> {tid, aux_buf_h, kw * dw_, 0});
                     }
                   }
@@ -848,7 +849,7 @@ void gen_conv_dw_fwd_t::compute_conv_physical_padding(CONV_ARG_LIST) const {
               if (is_3d_) {
                 auto cond = large_pad
                   ? (((cur_iw + aux_w_block_size <= 0) || (cur_iw > iw_))
-                    || (num_d_pad >= kd_ || num_h_pad >= kh_))
+                      || (num_d_pad >= kd_ || num_h_pad >= kh_))
                   : (num_d_pad >= kd_ || num_h_pad >= kh_);
                 _if_(cond && padding_value == 0) {
                   zero_out_aux_buffer();
@@ -891,7 +892,7 @@ void gen_conv_dw_fwd_t::compute_conv_physical_padding(CONV_ARG_LIST) const {
                           expr idx = builder::make_cast(datatypes::u32,
                             use_var_bs
                               ? ((kd - d_nopad_begin_idx) * valid_kh * kw_
-                                + (kh - h_nopad_begin_idx) * kw_ + kw)
+                                  + (kh - h_nopad_begin_idx) * kw_ + kw)
                               : (kd * kh_ * kw_ + kh * kw_ + kw));
                           A_list[idx] = tensor_ptr(input,
                             std::vector<expr> {n, cur_id + kd * dd_,
@@ -962,7 +963,7 @@ void gen_conv_dw_fwd_t::compute_conv_physical_padding(CONV_ARG_LIST) const {
               } else {
                 auto cond = large_pad
                   ? (((cur_iw + aux_w_block_size <= 0) || (cur_iw > iw_))
-                    || (num_h_pad >= kh_))
+                      || (num_h_pad >= kh_))
                   : (num_h_pad >= kh_);
                 _if_(cond && padding_value == 0) {
                   zero_out_aux_buffer();
diff --git a/legacy/core/src/ops/templates/conv_fwd.cpp b/legacy/core/src/ops/templates/conv_fwd.cpp
index d95f3653..e6dc5fc8 100644
--- a/legacy/core/src/ops/templates/conv_fwd.cpp
+++ b/legacy/core/src/ops/templates/conv_fwd.cpp
@@ -1,18 +1,20 @@
-/*******************************************************************************
- * Copyright 2020-2024 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
 #include "conv_fwd.hpp"
 #include <algorithm>
 #include <functional>
@@ -509,18 +511,18 @@ std::vector<expr> gen_conv_fwd_t::data_offset(const expr &N, const expr &G,
     !(is_3d_ && force_3d), "Force_3d is only capable for 2d inputs");
   return is_group_conv_
     ? ((is_3d_ || force_3d)
-        ? (!blocking_input_
-            ? std::vector<expr> {N, D, H, W, G, C * C_block + c_idx}
-            : std::vector<expr> {N, G, C, D, H, W, c_idx})
-        : (!blocking_input_
-            ? std::vector<expr> {N, H, W, G, C * C_block + c_idx}
-            : std::vector<expr> {N, G, C, H, W, c_idx}))
+          ? (!blocking_input_
+                ? std::vector<expr> {N, D, H, W, G, C * C_block + c_idx}
+                : std::vector<expr> {N, G, C, D, H, W, c_idx})
+          : (!blocking_input_
+                ? std::vector<expr> {N, H, W, G, C * C_block + c_idx}
+                : std::vector<expr> {N, G, C, H, W, c_idx}))
     : ((is_3d_ || force_3d)
-        ? (!blocking_input_
-            ? std::vector<expr> {N, D, H, W, C * C_block + c_idx}
-            : std::vector<expr> {N, C, D, H, W, c_idx})
-        : (!blocking_input_ ? std::vector<expr> {N, H, W, C * C_block + c_idx}
-                            : std::vector<expr> {N, C, H, W, c_idx}));
+          ? (!blocking_input_
+                ? std::vector<expr> {N, D, H, W, C * C_block + c_idx}
+                : std::vector<expr> {N, C, D, H, W, c_idx})
+          : (!blocking_input_ ? std::vector<expr> {N, H, W, C * C_block + c_idx}
+                              : std::vector<expr> {N, C, H, W, c_idx}));
 }
 
 std::vector<expr> gen_conv_fwd_t::output_offset(const expr &N, const expr &G,
@@ -528,17 +530,17 @@ std::vector<expr> gen_conv_fwd_t::output_offset(const expr &N, const expr &G,
   const expr &C_block, const expr &c_idx) const {
   return is_group_conv_
     ? (is_3d_ ? (!blocking_output_
-           ? std::vector<expr> {N, D, H, W, G, C * C_block + c_idx}
-           : std::vector<expr> {N, G, C, D, H, W, c_idx})
+                    ? std::vector<expr> {N, D, H, W, G, C * C_block + c_idx}
+                    : std::vector<expr> {N, G, C, D, H, W, c_idx})
+              : (!blocking_output_
+                    ? std::vector<expr> {N, H, W, G, C * C_block + c_idx}
+                    : std::vector<expr> {N, G, C, H, W, c_idx}))
+    : (is_3d_ ? (!blocking_output_
+                    ? std::vector<expr> {N, D, H, W, C * C_block + c_idx}
+                    : std::vector<expr> {N, C, D, H, W, c_idx})
               : (!blocking_output_
-                  ? std::vector<expr> {N, H, W, G, C * C_block + c_idx}
-                  : std::vector<expr> {N, G, C, H, W, c_idx}))
-    : (is_3d_
-        ? (!blocking_output_
-            ? std::vector<expr> {N, D, H, W, C * C_block + c_idx}
-            : std::vector<expr> {N, C, D, H, W, c_idx})
-        : (!blocking_output_ ? std::vector<expr> {N, H, W, C * C_block + c_idx}
-                             : std::vector<expr> {N, C, H, W, c_idx}));
+                    ? std::vector<expr> {N, H, W, C * C_block + c_idx}
+                    : std::vector<expr> {N, C, H, W, c_idx}));
 }
 
 void gen_conv_fwd_t::bind_output_loop_axis(const for_loop &loop,
@@ -643,31 +645,31 @@ void gen_conv_fwd_t::create_anchor(fusion_anchor_mgr_t *fusion,
         fusion->create_fusion_anchor(slice_map {{output_gt.get(),
           blocking_output_
             ? slice_range_list {{{n, n_len}, {g, g_len}, {k, k_len}, {d, d_len},
-              {p, p_len}, {q, q_len}, {0, K_block}}}
+                {p, p_len}, {q, q_len}, {0, K_block}}}
             : slice_range_list {{{n, n_len}, {d, d_len}, {p, p_len}, {q, q_len},
-              {g, g_len}, {k * K_block, k_len * K_block}}}}});
+                {g, g_len}, {k * K_block, k_len * K_block}}}}});
       } else {
         fusion->create_fusion_anchor(slice_map {{output_gt.get(),
           blocking_output_
             ? slice_range_list {{{n, n_len}, {g, g_len}, {k, k_len}, {p, p_len},
-              {q, q_len}, {0, K_block}}}
+                {q, q_len}, {0, K_block}}}
             : slice_range_list {{{n, n_len}, {p, p_len}, {q, q_len}, {g, g_len},
-              {k * K_block, k_len * K_block}}}}});
+                {k * K_block, k_len * K_block}}}}});
       }
     } else {
       if (is_3d_) {
         fusion->create_fusion_anchor(slice_map {{output_gt.get(),
           blocking_output_
             ? slice_range_list {{{n, n_len}, {k, k_len}, {d, d_len}, {p, p_len},
-              {q, q_len}, {0, K_block}}}
+                {q, q_len}, {0, K_block}}}
             : slice_range_list {{{n, n_len}, {d, d_len}, {p, p_len}, {q, q_len},
-              {k * K_block, k_len * K_block}}}}});
+                {k * K_block, k_len * K_block}}}}});
       } else {
         fusion->create_fusion_anchor(slice_map {{output_gt.get(),
           blocking_output_ ? slice_range_list {{{n, n_len}, {k, k_len},
-            {p, p_len}, {q, q_len}, {0, K_block}}}
+                               {p, p_len}, {q, q_len}, {0, K_block}}}
                            : slice_range_list {{{n, n_len}, {p, p_len},
-                             {q, q_len}, {k * K_block, k_len * K_block}}}}});
+                               {q, q_len}, {k * K_block, k_len * K_block}}}}});
       }
     }
   }
@@ -804,9 +806,9 @@ void gen_conv_fwd_t::compute_1x1_pack_input(CONV_ARG_LIST) const {
     if (blocking_input_) {
       _tensor_(input_tmp, get_input_dtype(),
         is_group_conv_ ? std::vector<expr> {mb_expr_, groups_, C_num_block,
-          oh_expr_, ow_, config.C_block}
-                       : std::vector<expr> {
-                         mb_expr_, C_num_block, oh_expr_, ow_, config.C_block});
+                           oh_expr_, ow_, config.C_block}
+                       : std::vector<expr> {mb_expr_, C_num_block, oh_expr_,
+                           ow_, config.C_block});
       _named_for_(ln, n, 0, mb_expr_, 1, for_type::PARALLEL) {
         _named_for_(lg, g, 0, groups_) {
           _named_for_(lk, c_o, 0, C_num_block) {
@@ -1042,9 +1044,9 @@ void gen_conv_fwd_t::compute_conv_no_padding(CONV_ARG_LIST) const {
     if (blocking_input_) {
       _tensor_(input_tmp, get_input_dtype(),
         is_group_conv_ ? std::vector<expr> {mb_expr_, groups_, C_num_block, sh_,
-          pack_ih, iw_, config.C_block}
+                           pack_ih, iw_, config.C_block}
                        : std::vector<expr> {mb_expr_, C_num_block, sh_, pack_ih,
-                         iw_, config.C_block});
+                           iw_, config.C_block});
       for_loop ls;
       _named_for_(ln, n, 0, mb_expr_, 1, for_type::PARALLEL) {
         _named_for_(lg, g, 0, groups_) {
@@ -1136,13 +1138,13 @@ void gen_conv_fwd_t::compute_conv_no_padding(CONV_ARG_LIST) const {
                     auto idx = c_o * kh_ * kw_ + r * kw_ + s;
                     std::vector<expr> input_pos = need_pack_strided_input
                       ? data_offset(n, g, c_o, dh_ * r % sh_,
-                        ((o_o * config.tile_os) / adj_ow) + dh_ * r / sh_,
-                        ((o_o * config.tile_os) % adj_ow) * sw_ + dw_ * s,
-                        config.C_block, 0, need_pack_strided_input)
+                          ((o_o * config.tile_os) / adj_ow) + dh_ * r / sh_,
+                          ((o_o * config.tile_os) % adj_ow) * sw_ + dw_ * s,
+                          config.C_block, 0, need_pack_strided_input)
                       : data_offset(n, g, c_o, 0,
-                        ((o_o * config.tile_os) / adj_ow) * sh_ + dh_ * r,
-                        ((o_o * config.tile_os) % adj_ow) * sw_ + dw_ * s,
-                        config.C_block, 0, need_pack_strided_input);
+                          ((o_o * config.tile_os) / adj_ow) * sh_ + dh_ * r,
+                          ((o_o * config.tile_os) % adj_ow) * sw_ + dw_ * s,
+                          config.C_block, 0, need_pack_strided_input);
                     A_list[idx] = tensor_ptr(real_input, input_pos);
                     B_list[idx]
                       = tensor_ptr(weight, weight_offset(g, k_o, c_o, 0, r, s));
@@ -1921,9 +1923,9 @@ void gen_conv_fwd_t::compute_conv_padding_v2(CONV_ARG_LIST) const {
                                 builtin::brgemm_init(
                                   tensor_ptr(g_sub_tensor,
                                     is_3d_ ? std::vector<expr> {tid, sub_tsr_d,
-                                      sub_tsr_h, 0, 0}
+                                               sub_tsr_h, 0, 0}
                                            : std::vector<expr> {tid, sub_tsr_h,
-                                             0, 0}),
+                                               0, 0}),
                                   builder::make_cast(datatypes::s32, left_pad),
                                   config.C_block, LDA, dtype_input,
                                   padding_value);
@@ -1936,9 +1938,9 @@ void gen_conv_fwd_t::compute_conv_padding_v2(CONV_ARG_LIST) const {
                                 _for_(k, 0, config.C_block, (int)lanes) {
                                   g_sub_tensor[span_t(is_3d_
                                       ? std::vector<expr> {tid, sub_tsr_d,
-                                        sub_tsr_h, j, k}
+                                          sub_tsr_h, j, k}
                                       : std::vector<expr> {tid, sub_tsr_h, j,
-                                        k},
+                                          k},
                                     lanes)]
                                     = input[span_t(
                                       data_offset(n, g, c_o,
@@ -1953,9 +1955,10 @@ void gen_conv_fwd_t::compute_conv_padding_v2(CONV_ARG_LIST) const {
                                 builtin::brgemm_init(
                                   tensor_ptr(g_sub_tensor,
                                     is_3d_ ? std::vector<expr> {tid, sub_tsr_d,
-                                      sub_tsr_h, tile_size_exclude_right_pad, 0}
+                                               sub_tsr_h,
+                                               tile_size_exclude_right_pad, 0}
                                            : std::vector<expr> {tid, sub_tsr_h,
-                                             tile_size_exclude_right_pad, 0}),
+                                               tile_size_exclude_right_pad, 0}),
                                   builder::make_cast(datatypes::s32,
                                     src_row_tile_size
                                       - tile_size_exclude_right_pad),
@@ -1973,7 +1976,7 @@ void gen_conv_fwd_t::compute_conv_padding_v2(CONV_ARG_LIST) const {
                                   idx = builder::make_cast(datatypes::u32,
                                     use_var_bs
                                       ? (sub_tsr_d * valid_kh * num_kw
-                                        + sub_tsr_h * num_kw + wi)
+                                          + sub_tsr_h * num_kw + wi)
                                       : (di * kh_ * num_kw + hi * num_kw + wi));
                                 } else {
                                   idx = builder::make_cast(datatypes::u32,
@@ -1984,9 +1987,9 @@ void gen_conv_fwd_t::compute_conv_padding_v2(CONV_ARG_LIST) const {
                                 // conv
                                 A_list[idx] = tensor_ptr(g_sub_tensor,
                                   is_3d_ ? std::vector<expr> {tid, sub_tsr_d,
-                                    sub_tsr_h, wi * dw_ * kw_step, 0}
+                                             sub_tsr_h, wi * dw_ * kw_step, 0}
                                          : std::vector<expr> {tid, sub_tsr_h,
-                                           wi * dw_ * kw_step, 0});
+                                             wi * dw_ * kw_step, 0});
                               }
                             }
                           }
@@ -2194,7 +2197,7 @@ void gen_conv_fwd_t::compute_conv_padding_v2(CONV_ARG_LIST) const {
                         auto cond = large_pad
                           ? (((cur_iw + src_row_tile_size <= pw_b_)
                                || (cur_iw > iw_ + pw_b_))
-                            || (num_d_pad >= kd_ || num_h_pad >= kh_))
+                              || (num_d_pad >= kd_ || num_h_pad >= kh_))
                           : (num_d_pad >= kd_ || num_h_pad >= kh_);
                         _if_(cond && padding_value == 0) {
                           zero_out_sub_tensor();
@@ -2240,11 +2243,12 @@ void gen_conv_fwd_t::compute_conv_padding_v2(CONV_ARG_LIST) const {
                                   auto valid_kh
                                     = h_unpad_end_idx - h_unpad_begin_idx;
                                   idx = builder::make_cast(datatypes::u32,
-                                    use_var_bs ? ((di - d_unpad_begin_idx)
-                                        * valid_kh * num_kw
-                                      + (hi - h_unpad_begin_idx) * num_kw + wi)
-                                               : (di * kh_ * num_kw
-                                                 + hi * num_kw + wi));
+                                    use_var_bs
+                                      ? ((di - d_unpad_begin_idx) * valid_kh
+                                            * num_kw
+                                          + (hi - h_unpad_begin_idx) * num_kw
+                                          + wi)
+                                      : (di * kh_ * num_kw + hi * num_kw + wi));
                                   A_list[idx] = tensor_ptr(input,
                                     data_offset(n, g, c_o,
                                       cur_id + di * dd_ - pd_b_,
@@ -2337,7 +2341,7 @@ void gen_conv_fwd_t::compute_conv_padding_v2(CONV_ARG_LIST) const {
                         auto cond = large_pad
                           ? (((cur_iw + src_row_tile_size <= pw_b_)
                                || (cur_iw > iw_ + pw_b_))
-                            || (num_h_pad >= kh_))
+                              || (num_h_pad >= kh_))
                           : (num_h_pad >= kh_);
                         _if_(cond && padding_value == 0) {
                           zero_out_sub_tensor();
diff --git a/legacy/core/src/ops/templates/conv_fwd.hpp b/legacy/core/src/ops/templates/conv_fwd.hpp
index 6c8f0299..7c0f0494 100644
--- a/legacy/core/src/ops/templates/conv_fwd.hpp
+++ b/legacy/core/src/ops/templates/conv_fwd.hpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2020-2024 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #ifndef GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_OPS_TEMPLATES_CONV_FWD_HPP
 #define GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_OPS_TEMPLATES_CONV_FWD_HPP
@@ -73,13 +74,13 @@ class gen_conv_fwd_t : public body_generator_t<conv_fwd_config_t> {
   gen_conv_fwd_t(sc_op *owner, const sc_dims &stride, const sc_dims &pads_begin,
     std::vector<logical_tensor_t> &&ins, std::vector<logical_tensor_t> &&outs)
     : gen_conv_fwd_t(owner, stride, sc_dims {1}, pads_begin, pads_begin,
-      std::move(ins), std::move(outs)) {}
+        std::move(ins), std::move(outs)) {}
 
   gen_conv_fwd_t(sc_op *owner, const sc_dims &stride, const sc_dims &pads_begin,
     const sc_dims &pads_end, std::vector<logical_tensor_t> &&ins,
     std::vector<logical_tensor_t> &&outs)
     : gen_conv_fwd_t(owner, stride, sc_dims {1}, pads_begin, pads_end,
-      std::move(ins), std::move(outs)) {}
+        std::move(ins), std::move(outs)) {}
 
   gen_conv_fwd_t(sc_op *owner, const sc_dims &stride, const sc_dims &dilation,
     const sc_dims &pads_begin, const sc_dims &pads_end,
diff --git a/legacy/core/src/ops/templates/conv_rl.cpp b/legacy/core/src/ops/templates/conv_rl.cpp
index 025caf98..0aa19856 100644
--- a/legacy/core/src/ops/templates/conv_rl.cpp
+++ b/legacy/core/src/ops/templates/conv_rl.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2023-2024 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 #include "conv_rl.hpp"
 #include <algorithm>
 #include <utility>
@@ -96,16 +97,16 @@ void gen_conv_fwd_rl_t::create_anchor(fusion_anchor_mgr_t *fusion,
       fusion->create_fusion_anchor(slice_map {{output_gt.get(),
         blocking_output_
           ? slice_range_list {{{n, n_len}, {g, g_len}, {k, k_len}, {p, p_len},
-            {q, q_len}, {0, K_block}}}
+              {q, q_len}, {0, K_block}}}
           : slice_range_list {{{n, n_len}, {p, p_len}, {q, q_len}, {g, g_len},
-            {k * K_block, k_len * K_block}}}}});
+              {k * K_block, k_len * K_block}}}}});
 
     } else {
       fusion->create_fusion_anchor(slice_map {{output_gt.get(),
         blocking_output_ ? slice_range_list {{{n, n_len}, {k, k_len},
-          {p, p_len}, {q, q_len}, {0, K_block}}}
+                             {p, p_len}, {q, q_len}, {0, K_block}}}
                          : slice_range_list {{{n, n_len}, {p, p_len},
-                           {q, q_len}, {k * K_block, k_len * K_block}}}}});
+                             {q, q_len}, {k * K_block, k_len * K_block}}}}});
     }
   }
 }
@@ -192,8 +193,8 @@ gen_conv_fwd_rl_t::gen_conv_fwd_rl_t(sc_op *owner, const sc_dims &stride,
   parallel_axis_ = (mb_ >= num_threads)
     ? parallel_kind::BATCH
     : ((int)utils::divide_and_ceil(oh_, num_threads) > height_threshold
-        ? parallel_kind::HEIGHT
-        : parallel_kind::BATCH);
+          ? parallel_kind::HEIGHT
+          : parallel_kind::BATCH);
 
   num_brgemm_k_ = attrs_.get<int>("num_brgemm_k");
   brgemm_k_ = attrs_.get<int>("brgemm_k");
@@ -457,19 +458,19 @@ bool gen_conv_fwd_rl_t::generate(context_ptr ctx,
         create_fusion_anchor(fusion, owner_->get_outputs()[0],
           is_group_conv_
             ? (blocking_output_
-                ? slice_range {{n_o, 1}, {g, 1}, {0, 1}, {p, 1},
-                  {q * config.brgemm_m, config.brgemm_m},
-                  {k_o * config.brgemm_n, config.brgemm_n}}
-                : slice_range {{n_o, 1}, {p, 1},
-                  {q * config.brgemm_m, config.brgemm_m}, {g, 1},
-                  {k_o * config.brgemm_n, config.brgemm_n}})
+                  ? slice_range {{n_o, 1}, {g, 1}, {0, 1}, {p, 1},
+                      {q * config.brgemm_m, config.brgemm_m},
+                      {k_o * config.brgemm_n, config.brgemm_n}}
+                  : slice_range {{n_o, 1}, {p, 1},
+                      {q * config.brgemm_m, config.brgemm_m}, {g, 1},
+                      {k_o * config.brgemm_n, config.brgemm_n}})
             : (blocking_output_ ? slice_range {{n_o, 1}, {g, 1}, {p, 1},
-                 {q * config.brgemm_m, config.brgemm_m},
-                 {k_o * config.brgemm_n, config.brgemm_n}}
+                                    {q * config.brgemm_m, config.brgemm_m},
+                                    {k_o * config.brgemm_n, config.brgemm_n}}
                                 : slice_range {{n_o, 1}, {p, 1},
-                                  {q * config.brgemm_m, config.brgemm_m},
-                                  {(g * K_num_block + k_o) * config.brgemm_n,
-                                    config.brgemm_n}}));
+                                    {q * config.brgemm_m, config.brgemm_m},
+                                    {(g * K_num_block + k_o) * config.brgemm_n,
+                                      config.brgemm_n}}));
       }
       // brgemm_m * oc_
       create_anchor(fusion, owner_->get_outputs()[0], n_o, 1, g, 1, 0, 1, 0, 1,
diff --git a/legacy/core/src/ops/templates/managed_matmul_core.cpp b/legacy/core/src/ops/templates/managed_matmul_core.cpp
index 72399ff9..26a69023 100644
--- a/legacy/core/src/ops/templates/managed_matmul_core.cpp
+++ b/legacy/core/src/ops/templates/managed_matmul_core.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2022-2024 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include "managed_matmul_core.hpp"
 #include <algorithm>
@@ -954,28 +955,28 @@ void gen_managed_matmul_core_t::single_thread_matmul_call(
             std::vector<expr> aidx = ta.get_format() == sc_data_format_t::MK()
               ? std::vector<expr> {m_start_idx, k_start_idx}
               : std::vector<expr> {
-                m_start_idx / iim_block_, k_start_idx / iik_block_, 0, 0};
+                  m_start_idx / iim_block_, k_start_idx / iik_block_, 0, 0};
             std::vector<expr> bidx = dtype_block > 1
               ? std::vector<expr> {n_start_idx / iin_block_,
-                k_start_idx / iik_block_, 0, 0, 0}
+                  k_start_idx / iik_block_, 0, 0, 0}
               : (!tb.get_format().is_blocking()
-                  ? std::vector<expr> {k_start_idx, n_start_idx}
-                  : std::vector<expr> {
-                    n_start_idx / iin_block_, k_start_idx / iik_block_, 0, 0});
+                    ? std::vector<expr> {k_start_idx, n_start_idx}
+                    : std::vector<expr> {n_start_idx / iin_block_,
+                        k_start_idx / iik_block_, 0, 0});
             std::vector<expr> cidx;
             if (is_partial) {
               cidx = !tc.get_format().is_blocking()
                 ? std::vector<expr> {m_b_idx * iim_block_
-                    + (doroll(m_o, m_o_end)) * iim_block_,
-                  n_b_idx * iin_block_ + (doroll(n_o, n_o_end)) * iin_block_}
+                      + (doroll(m_o, m_o_end)) * iim_block_,
+                    n_b_idx * iin_block_ + (doroll(n_o, n_o_end)) * iin_block_}
                 : std::vector<expr> {m_b_idx + doroll(m_o, m_o_end),
-                  n_b_idx + doroll(n_o, n_o_end), 0, 0};
+                    n_b_idx + doroll(n_o, n_o_end), 0, 0};
               cidx.insert(cidx.begin(), k_s);
             } else {
               cidx = !tc.get_format().is_blocking()
                 ? std::vector<expr> {m_start_idx, n_start_idx}
                 : std::vector<expr> {
-                  m_start_idx / iim_block_, n_start_idx / iin_block_, 0, 0};
+                    m_start_idx / iim_block_, n_start_idx / iin_block_, 0, 0};
             }
             expr LDA = ta.get_format() == sc_data_format_t::MK()
               ? graph.dim_to_expr(ori_K)
@@ -1046,11 +1047,11 @@ void gen_managed_matmul_core_t::single_thread_matmul_call(
                   !tc.get_format().is_blocking()
                     ? std::vector<std::pair<expr, expr>> {{m_start_idx,
                                                             expr(iim_block_)},
-                      {n_start_idx, expr(iin_block_)}}
+                        {n_start_idx, expr(iin_block_)}}
                     : std::vector<std::pair<expr, expr>> {
-                      {m_start_idx / iim_block_, 1},
-                      {n_start_idx / iin_block_, 1}, {0, expr(iim_block_)},
-                      {0, expr(iin_block_)}});
+                        {m_start_idx / iim_block_, 1},
+                        {n_start_idx / iin_block_, 1}, {0, expr(iim_block_)},
+                        {0, expr(iin_block_)}});
               }
             }
           }
@@ -1066,14 +1067,14 @@ void gen_managed_matmul_core_t::single_thread_matmul_call(
                                                                 m_o, m_o_end)
                                                               * iim_block_,
                                                           expr(iim_block_)},
-                    {0, utils::rnd_up(ori_N, iin_block_)}}
+                      {0, utils::rnd_up(ori_N, iin_block_)}}
                   : std::vector<std::pair<expr, expr>> {
-                    {(m_idx + m_b_idx * iim_block_
-                       + (doroll(m_o, m_o_end)) * iim_block_)
-                        / iim_block_,
-                      1},
-                    {0, utils::divide_and_ceil(ori_N, iin_block_)},
-                    {0, expr(iim_block_)}, {0, expr(iin_block_)}});
+                      {(m_idx + m_b_idx * iim_block_
+                         + (doroll(m_o, m_o_end)) * iim_block_)
+                          / iim_block_,
+                        1},
+                      {0, utils::divide_and_ceil(ori_N, iin_block_)},
+                      {0, expr(iim_block_)}, {0, expr(iin_block_)}});
             }
           }
         }
@@ -1092,14 +1093,14 @@ void gen_managed_matmul_core_t::single_thread_matmul_call(
                                                         + m_b_idx * iim_block_,
                                                       M_anchor_info[1]
                                                         / config.M_sub_block},
-                {n_idx + n_b_idx * iin_block_,
-                  N_anchor_info[1] / config.N_sub_block}}
+                  {n_idx + n_b_idx * iin_block_,
+                    N_anchor_info[1] / config.N_sub_block}}
               : std::vector<std::pair<expr, expr>> {
-                {(m_idx + m_b_idx * iim_block_) / expr(iim_block_),
-                  M_anchor_info[1] / iim_block_ / config.M_sub_block},
-                {(n_idx + n_b_idx * iin_block_) / expr(iin_block_),
-                  N_anchor_info[1] / iin_block_ / config.N_sub_block},
-                {0, expr(iim_block_)}, {0, expr(iin_block_)}});
+                  {(m_idx + m_b_idx * iim_block_) / expr(iim_block_),
+                    M_anchor_info[1] / iim_block_ / config.M_sub_block},
+                  {(n_idx + n_b_idx * iin_block_) / expr(iin_block_),
+                    N_anchor_info[1] / iin_block_ / config.N_sub_block},
+                  {0, expr(iim_block_)}, {0, expr(iin_block_)}});
         } else {
           slice_range_list mm_multi_slice;
           // order:X_anchor_info[1] -> X_anchor_info[2]
@@ -1407,26 +1408,27 @@ func_t gen_managed_matmul_core_t::get_single_core_func(context_ptr ctx,
                 std::vector<expr> aidx = input_plain
                   ? std::vector<expr> {m_start_idx, k_start_idx}
                   : std::vector<expr> {
-                    m_start_idx / iim_block_, k_start_idx / iik_block_, 0, 0};
+                      m_start_idx / iim_block_, k_start_idx / iik_block_, 0, 0};
                 std::vector<expr> bidx = dtype_block > 1
                   ? std::vector<expr> {n_start_idx / iin_block_,
-                    k_start_idx / iik_block_, 0, 0, 0}
+                      k_start_idx / iik_block_, 0, 0, 0}
                   : (!tb.get_format().is_blocking()
-                      ? std::vector<expr> {k_start_idx, n_start_idx}
-                      : std::vector<expr> {n_start_idx / iin_block_,
-                        k_start_idx / iik_block_, 0, 0});
+                        ? std::vector<expr> {k_start_idx, n_start_idx}
+                        : std::vector<expr> {n_start_idx / iin_block_,
+                            k_start_idx / iik_block_, 0, 0});
                 std::vector<expr> partial_cidx, full_cidx;
                 partial_cidx = !tc.get_format().is_blocking()
                   ? std::vector<expr> {m_b_idx * iim_block_
-                      + (doroll(m_o, m_o_end)) * iim_block_,
-                    n_b_idx * iin_block_ + (doroll(n_o, n_o_end)) * iin_block_}
+                        + (doroll(m_o, m_o_end)) * iim_block_,
+                      n_b_idx * iin_block_
+                        + (doroll(n_o, n_o_end)) * iin_block_}
                   : std::vector<expr> {m_b_idx + doroll(m_o, m_o_end),
-                    n_b_idx + doroll(n_o, n_o_end), 0, 0};
+                      n_b_idx + doroll(n_o, n_o_end), 0, 0};
                 partial_cidx.insert(partial_cidx.begin(), k_s);
                 full_cidx = !tc.get_format().is_blocking()
                   ? std::vector<expr> {m_start_idx, n_start_idx}
                   : std::vector<expr> {
-                    m_start_idx / iim_block_, n_start_idx / iin_block_, 0, 0};
+                      m_start_idx / iim_block_, n_start_idx / iin_block_, 0, 0};
                 auto partial_C_ptr = tensor_ptr(C_tptr, partial_cidx);
                 auto full_C_ptr = tensor_ptr(C_tptr, full_cidx);
                 expr LDA = input_plain ? ori_K_expr : expr(iik_block_);
@@ -1434,7 +1436,7 @@ func_t gen_managed_matmul_core_t::get_single_core_func(context_ptr ctx,
                                                           : expr(iin_block_);
                 expr partial_LDC = !tc.get_format().is_blocking()
                   ? do_cast_and_fold(
-                    divide_and_ceil(N / iin_block_, N_split_num) * iin_block_)
+                      divide_and_ceil(N / iin_block_, N_split_num) * iin_block_)
                   : iin_block_;
                 expr full_LDC
                   = !tc.get_format().is_blocking() ? ori_N_expr : iin_block_;
@@ -1517,11 +1519,11 @@ func_t gen_managed_matmul_core_t::get_single_core_func(context_ptr ctx,
                   std::vector<expr> tail_aidx = {m_start_idx, k_tail_idx};
                   std::vector<expr> tail_bidx = dtype_block > 1
                     ? std::vector<expr> {n_start_idx / iin_block_,
-                      k_tail_idx / iik_block_, 0, 0, 0}
+                        k_tail_idx / iik_block_, 0, 0, 0}
                     : (!tb.get_format().is_blocking()
-                        ? std::vector<expr> {k_tail_idx, n_start_idx}
-                        : std::vector<expr> {n_start_idx / iin_block_,
-                          k_tail_idx / iik_block_, 0, 0});
+                          ? std::vector<expr> {k_tail_idx, n_start_idx}
+                          : std::vector<expr> {n_start_idx / iin_block_,
+                              k_tail_idx / iik_block_, 0, 0});
                   _if_(K_tail_cond) {
                     _if_(k_b == 0 && bs == 0) {
                       call_init_update_brgemm(1, K_tail, tail_aidx, tail_bidx);
@@ -1538,11 +1540,11 @@ func_t gen_managed_matmul_core_t::get_single_core_func(context_ptr ctx,
                       !tc.get_format().is_blocking()
                         ? std::vector<std::pair<expr, expr>> {{m_start_idx,
                                                                 expr(m_block)},
-                          {n_start_idx, expr(n_block)}}
+                            {n_start_idx, expr(n_block)}}
                         : std::vector<std::pair<expr, expr>> {
-                          {m_start_idx / iim_block_, 1},
-                          {n_start_idx / iin_block_, 1}, {0, expr(iim_block_)},
-                          {0, expr(iin_block_)}});
+                            {m_start_idx / iim_block_, 1},
+                            {n_start_idx / iin_block_, 1},
+                            {0, expr(iim_block_)}, {0, expr(iin_block_)}});
                   }
                 }
               }
@@ -1674,15 +1676,15 @@ bool gen_managed_matmul_core_t::generate(context_ptr ctx,
   expr M_real_split = is_dynamic
     ? M_split_num
     : do_cast_and_fold(
-      builder::make_min(divide_and_ceil(M_expr, iim_block_), M_split_num));
+        builder::make_min(divide_and_ceil(M_expr, iim_block_), M_split_num));
   expr N_real_split = is_dynamic
     ? N_split_num
     : do_cast_and_fold(
-      builder::make_min(divide_and_ceil(N_expr, iin_block_), N_split_num));
+        builder::make_min(divide_and_ceil(N_expr, iin_block_), N_split_num));
   expr K_real_split = is_dynamic
     ? K_split_num
     : do_cast_and_fold(
-      builder::make_min(divide_and_ceil(K_expr, iik_block_), K_split_num));
+        builder::make_min(divide_and_ceil(K_expr, iik_block_), K_split_num));
 
   if (K_split_num == 1) {
     expr m_idx, n_idx, M_single_thr_size, N_single_thr_size, X_bigger_num;
@@ -1958,7 +1960,7 @@ bool gen_managed_matmul_core_t::generate(context_ptr ctx,
     std::vector<expr> out_tmp_buf_shape_expr
       = out_tensors_[0].get_format().is_blocking()
       ? std::vector<expr> {K_real_split, M_block_size_expr / iim_block_,
-        N_block_size_expr / iin_block_, iim_block_, iin_block_}
+          N_block_size_expr / iin_block_, iim_block_, iin_block_}
       : std::vector<expr> {K_real_split, M_block_size_expr, N_block_size_expr};
     if (is_dynamic) {
       out_tmp_buf_shape_expr = std::vector<expr> {K_real_split,
diff --git a/legacy/core/src/ops/templates/matmul_core.cpp b/legacy/core/src/ops/templates/matmul_core.cpp
index 1fc34104..4df7107f 100644
--- a/legacy/core/src/ops/templates/matmul_core.cpp
+++ b/legacy/core/src/ops/templates/matmul_core.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2022-2023 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include "matmul_core.hpp"
 #include <algorithm>
@@ -389,22 +390,22 @@ void gen_matmul_core_t::get_and_check_blocks(sc_graph_t &graph,
   // divide and ceil(x, 1) to convert x to index datatype.
   M_num_blocks = blocking_axis_.A_m.size() == 1
     ? divide_and_ceil(
-      A_dims[blocking_axis_.A_m.at(0)], graph.dim_to_expr(M_block))
+        A_dims[blocking_axis_.A_m.at(0)], graph.dim_to_expr(M_block))
     : A_dims[blocking_axis_.A_m.at(0)];
 
   K_num_blocks = blocking_axis_.A_k.size() == 1
     ? divide_and_ceil(
-      A_dims[blocking_axis_.A_k.at(0)], graph.dim_to_expr(K_block))
+        A_dims[blocking_axis_.A_k.at(0)], graph.dim_to_expr(K_block))
     : A_dims[blocking_axis_.A_k.at(0)];
 
   B_K_num_blocks = blocking_axis_.B_k.size() == 1
     ? divide_and_ceil(
-      B_dims[blocking_axis_.B_k.at(0)], graph.dim_to_expr(K_block))
+        B_dims[blocking_axis_.B_k.at(0)], graph.dim_to_expr(K_block))
     : B_dims[blocking_axis_.B_k.at(0)];
 
   N_num_blocks = blocking_axis_.B_n.size() == 1
     ? divide_and_ceil(
-      B_dims[blocking_axis_.B_n.at(0)], graph.dim_to_expr(N_block))
+        B_dims[blocking_axis_.B_n.at(0)], graph.dim_to_expr(N_block))
     : B_dims[blocking_axis_.B_n.at(0)];
 
   COMPILE_ASSERT(
@@ -734,9 +735,9 @@ bool gen_matmul_core_t::generate(context_ptr ctx,
       std::vector<std::pair<expr, expr>> fidx3
         = blocking_axis_.C_m.size() == 1 && blocking_axis_.C_n.size() == 1
         ? concat_vec(batch_tensor_slice_ranges,
-          {{0, M_num_blocks * M_block}, {0, N_num_blocks * N_block}})
+            {{0, M_num_blocks * M_block}, {0, N_num_blocks * N_block}})
         : concat_vec(batch_tensor_slice_ranges,
-          {{0, M_num_blocks}, {0, N_num_blocks}, {0, M_block}, {0, N_block}});
+            {{0, M_num_blocks}, {0, N_num_blocks}, {0, M_block}, {0, N_block}});
 
       _named_for_(lm_c, m_o, 0, M_num_blocks) {
         _named_for_(ln_c, n_o, 0, N_num_blocks) {
@@ -757,15 +758,15 @@ bool gen_matmul_core_t::generate(context_ptr ctx,
           fidx1
             = blocking_axis_.C_m.size() == 1 && blocking_axis_.C_n.size() == 1
             ? concat_vec(batch_tensor_slice_ranges,
-              {{m_o * M_block, M_block}, {n_o * N_block, N_block}})
+                {{m_o * M_block, M_block}, {n_o * N_block, N_block}})
             : concat_vec(batch_tensor_slice_ranges,
-              {{m_o, 1}, {n_o, 1}, {0, M_block}, {0, N_block}});
+                {{m_o, 1}, {n_o, 1}, {0, M_block}, {0, N_block}});
           fidx2
             = blocking_axis_.C_m.size() == 1 && blocking_axis_.C_n.size() == 1
             ? concat_vec(batch_tensor_slice_ranges,
-              {{m_o * M_block, M_block}, {0, N_num_blocks * N_block}})
+                {{m_o * M_block, M_block}, {0, N_num_blocks * N_block}})
             : concat_vec(batch_tensor_slice_ranges,
-              {{m_o, 1}, {0, N_num_blocks}, {0, M_block}, {0, N_block}});
+                {{m_o, 1}, {0, N_num_blocks}, {0, M_block}, {0, N_block}});
 
           if (dtype_block > 1) bidx.emplace_back(0);
           expr LDA = K_block, LDB = N_block, LDC = N_block,
@@ -820,8 +821,8 @@ bool gen_matmul_core_t::generate(context_ptr ctx,
           tensor_ptr(B,
             dtype_block > 1 ? std::vector<expr> {n_o, 0, 0, 0, 0}
                             : (!in_tensors_[1].get_format().is_blocking()
-                                ? std::vector<expr> {0, n_o * N_block}
-                                : std::vector<expr> {n_o, 0, 0, 0})),
+                                  ? std::vector<expr> {0, n_o * N_block}
+                                  : std::vector<expr> {n_o, 0, 0, 0})),
           tensor_ptr(C,
             !out_tensors_[0].get_format().is_blocking()
               ? std::vector<expr> {m_o * M_block, n_o * N_block}
@@ -842,9 +843,9 @@ bool gen_matmul_core_t::generate(context_ptr ctx,
         create_fusion_anchor(fusion, owner_->get_outputs()[0],
           !out_tensors_[0].get_format().is_blocking()
             ? slice_range {{m_o * M_block, M_block},
-              {0, N_num_blocks * N_block}}
+                {0, N_num_blocks * N_block}}
             : slice_range {
-              {m_o, 1}, {0, N_num_blocks}, {0, M_block}, {0, N_block}});
+                {m_o, 1}, {0, N_num_blocks}, {0, M_block}, {0, N_block}});
       }
     }
   }
diff --git a/legacy/core/src/ops/templates/nested_conv1x1_backprop_data.cpp b/legacy/core/src/ops/templates/nested_conv1x1_backprop_data.cpp
index 50f40cb1..74ab7035 100644
--- a/legacy/core/src/ops/templates/nested_conv1x1_backprop_data.cpp
+++ b/legacy/core/src/ops/templates/nested_conv1x1_backprop_data.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2022-2023 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include "nested_conv1x1_backprop_data.hpp"
 #include <algorithm>
@@ -355,11 +356,11 @@ void gen_nested_conv1x1_backprop_data_t::
                   m_start_idx / ori_W, m_start_idx % ori_W, k_start_idx};
                 std::vector<expr> bidx = dtype_block > 1
                   ? std::vector<expr> {n_start_idx / im_ic_block_,
-                    k_start_idx / im_oc_block_ / 2, 0, 0, 0, 0, 0}
+                      k_start_idx / im_oc_block_ / 2, 0, 0, 0, 0, 0}
                   : !tb.get_format().is_blocking()
                   ? std::vector<expr> {k_start_idx, n_start_idx, 0, 0}
                   : std::vector<expr> {n_start_idx / im_ic_block_,
-                    k_start_idx / im_oc_block_, 0, 0, 0, 0};
+                      k_start_idx / im_oc_block_, 0, 0, 0, 0};
                 std::vector<expr> cidx
                   = {bs_start_idx, m_start_idx / ori_W * stride_h,
                     m_start_idx % ori_W * stride_w * ori_W, n_start_idx};
diff --git a/legacy/core/src/ops/templates/nested_conv1x1_backprop_weight.cpp b/legacy/core/src/ops/templates/nested_conv1x1_backprop_weight.cpp
index 1521c609..f564a78c 100644
--- a/legacy/core/src/ops/templates/nested_conv1x1_backprop_weight.cpp
+++ b/legacy/core/src/ops/templates/nested_conv1x1_backprop_weight.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2022-2023 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include "nested_conv1x1_backprop_weight.hpp"
 #include <algorithm>
@@ -287,26 +288,26 @@ void gen_nested_conv1x1_backprop_weight_t::inner_loop_call(context_ptr &ctx,
   // full shape based on delta_output's reorder result
   std::vector<expr> temp_output_delta_shape_full = dtype_block > 1
     ? std::vector<expr> {BS / im_bs_block_, OC / im_oc_block_, OH, OW,
-      im_bs_block_ / dtype_block, im_oc_block_, dtype_block}
-    : std::vector<expr> {
-      BS / im_bs_block_, OC / im_oc_block_, OH, OW, im_bs_block_, im_oc_block_};
+        im_bs_block_ / dtype_block, im_oc_block_, dtype_block}
+    : std::vector<expr> {BS / im_bs_block_, OC / im_oc_block_, OH, OW,
+        im_bs_block_, im_oc_block_};
   _tensor_(temp_output_delta, dtype, temp_output_delta_shape_full);
   _for_(i_ic, 0, ic_block / im_ic_block_) {
     // shrinked_shape
     std::vector<expr> temp_output_delta_shape_shr = dtype_block > 1
       ? std::vector<expr> {bs_block / im_bs_block_, oc_block / im_oc_block_,
-        oh_block, ow_block, im_bs_block_ / dtype_block, im_oc_block_,
-        dtype_block}
+          oh_block, ow_block, im_bs_block_ / dtype_block, im_oc_block_,
+          dtype_block}
       : std::vector<expr> {bs_block / im_bs_block_, oc_block / im_oc_block_,
-        oh_block, ow_block, im_bs_block_, im_oc_block_};
+          oh_block, ow_block, im_bs_block_, im_oc_block_};
     // f32 --> vectorized; bf16 --> vnni_reorder
     std::vector<expr> shrink_offset = dtype_block > 1
       ? std::vector<expr> {obs_offset / im_bs_block_, oc_offset / im_oc_block_,
-        oh_offset, ow_offset, obs_offset % im_bs_block_ / dtype_block,
-        oc_offset % im_oc_block_, obs_offset % im_bs_block_ % dtype_block}
+          oh_offset, ow_offset, obs_offset % im_bs_block_ / dtype_block,
+          oc_offset % im_oc_block_, obs_offset % im_bs_block_ % dtype_block}
       : std::vector<expr> {obs_offset / im_bs_block_, oc_offset / im_oc_block_,
-        oh_offset, ow_offset, obs_offset % im_bs_block_,
-        oc_offset % im_oc_block_};
+          oh_offset, ow_offset, obs_offset % im_bs_block_,
+          oc_offset % im_oc_block_};
 
     _if_(i_ic == 0) {
       // reorder temp_output_delta
@@ -319,12 +320,12 @@ void gen_nested_conv1x1_backprop_weight_t::inner_loop_call(context_ptr &ctx,
           shrink_offset, temp_output_delta_shape_shr, stmts()};
       slice_range tmp_output_slice_range = dtype_block > 1
         ? slice_range {{obs_offset, bs_block / im_bs_block_},
-          {oc_offset, oc_block / im_oc_block_}, {oh_offset, oh_block},
-          {ow_offset, ow_block}, {0, im_bs_block_ / dtype_block},
-          {0, im_oc_block_}, {0, dtype_block}}
+            {oc_offset, oc_block / im_oc_block_}, {oh_offset, oh_block},
+            {ow_offset, ow_block}, {0, im_bs_block_ / dtype_block},
+            {0, im_oc_block_}, {0, dtype_block}}
         : slice_range {{obs_offset, bs_block / im_bs_block_},
-          {oc_offset, oc_block / im_oc_block_}, {oh_offset, oh_block},
-          {ow_offset, ow_block}, {0, im_bs_block_}, {0, im_oc_block_}};
+            {oc_offset, oc_block / im_oc_block_}, {oh_offset, oh_block},
+            {ow_offset, ow_block}, {0, im_bs_block_}, {0, im_oc_block_}};
       ops::commit_op(ctx, "reorder",
         {tensor_slice(delta_output,
           {{obs_offset, bs_block}, {oh_offset, oh_block}, {ow_offset, ow_block},
@@ -342,20 +343,20 @@ void gen_nested_conv1x1_backprop_weight_t::inner_loop_call(context_ptr &ctx,
     _for_(i_oc, 0, oc_block / im_oc_block_) {
       auto real_weight_idx = is_partial
         ? std::vector<expr> {temp_weight_idx[0], temp_weight_idx[1] + i_ic,
-          temp_weight_idx[2] + i_oc, 0, 0, 0, 0}
+            temp_weight_idx[2] + i_oc, 0, 0, 0, 0}
         : std::vector<expr> {
-          temp_weight_idx[0] + i_ic, temp_weight_idx[1] + i_oc, 0, 0, 0, 0};
+            temp_weight_idx[0] + i_ic, temp_weight_idx[1] + i_oc, 0, 0, 0, 0};
       _for_(i_bs, 0, bs_block / im_bs_block_) {
         _for_(i_od, 0, od_block) {
           _for_(i_oh, 0, oh_block) {
             auto temp_output_delta_brgemm_index = dtype_block > 1
               ? std::vector<expr> {shrink_offset[0] + i_bs,
-                shrink_offset[1] + i_oc, shrink_offset[2] + i_oh,
-                shrink_offset[3], shrink_offset[4], shrink_offset[5],
-                shrink_offset[6]}
+                  shrink_offset[1] + i_oc, shrink_offset[2] + i_oh,
+                  shrink_offset[3], shrink_offset[4], shrink_offset[5],
+                  shrink_offset[6]}
               : std::vector<expr> {shrink_offset[0] + i_bs,
-                shrink_offset[1] + i_oc, shrink_offset[2] + i_oh,
-                shrink_offset[3], shrink_offset[4], shrink_offset[5]};
+                  shrink_offset[1] + i_oc, shrink_offset[2] + i_oh,
+                  shrink_offset[3], shrink_offset[4], shrink_offset[5]};
             _if_(o_bs == 0 && o_od == 0 && o_oh == 0 && o_ow == 0 && i_bs == 0
               && i_od == 0 && i_oh == 0) {
               // ic x bs matmul bs x oc
@@ -512,17 +513,18 @@ bool gen_nested_conv1x1_backprop_weight_t::generate(context_ptr ctx,
                           = has_stride
                           ? std::vector<expr> {0, 0, 0, 0, 0, 0}
                           : std::vector<expr> {obs_offset / im_bs_block_,
-                            ic_offset / im_ic_block_, oh_offset, ow_offset,
-                            obs_offset % im_bs_block_,
-                            ic_offset % im_ic_block_};
+                              ic_offset / im_ic_block_, oh_offset, ow_offset,
+                              obs_offset % im_bs_block_,
+                              ic_offset % im_ic_block_};
                         std::vector<expr> temp_weight_idx = is_partial
                           ? std::vector<expr> {p_bs * oh_threads * od_threads
-                              + p_od * oh_threads + p_oh,
-                            ic_offset / im_ic_block_, oc_offset / im_oc_block_,
-                            0, 0, im_ic_block_, im_oc_block_}
+                                + p_od * oh_threads + p_oh,
+                              ic_offset / im_ic_block_,
+                              oc_offset / im_oc_block_, 0, 0, im_ic_block_,
+                              im_oc_block_}
                           : std::vector<expr> {ic_offset / im_ic_block_,
-                            oc_offset / im_oc_block_, 0, 0, im_ic_block_,
-                            im_oc_block_};
+                              oc_offset / im_oc_block_, 0, 0, im_ic_block_,
+                              im_oc_block_};
                         inner_loop_call(ctx, temp_forward_input,
                           temp_forward_idx_non_block, in_tensors_[1],
                           delta_output, real_delta_weight_buf, temp_weight_idx,
diff --git a/legacy/core/src/ops/templates/nested_convNxN_backprop_data.cpp b/legacy/core/src/ops/templates/nested_convNxN_backprop_data.cpp
index d55c1d90..d4159591 100644
--- a/legacy/core/src/ops/templates/nested_convNxN_backprop_data.cpp
+++ b/legacy/core/src/ops/templates/nested_convNxN_backprop_data.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2022-2023 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include "nested_convNxN_backprop_data.hpp"
 #include <algorithm>
@@ -267,9 +268,9 @@ void gen_nested_convNxN_backprop_data_t::inner_loop_call(const context_ptr &ctx,
                     i_bs, oh_idx - oh_offset, ow_start, 0};
                   auto weight_index = dtype_block > 1
                     ? std::vector<expr> {ic_offset / im_ic_block_ + i_ic, 0, r,
-                      s, 0, 0, 0}
+                        s, 0, 0, 0}
                     : std::vector<expr> {
-                      ic_offset / im_ic_block_ + i_ic, 0, r, s, 0, 0};
+                        ic_offset / im_ic_block_ + i_ic, 0, r, s, 0, 0};
                   A_list[len]
                     = tensor_ptr(temp_delta_output, tmp_delta_output_index);
                   B_list[len] = tensor_ptr(weight, weight_index);
diff --git a/legacy/core/src/ops/templates/nested_convNxN_backprop_weight.cpp b/legacy/core/src/ops/templates/nested_convNxN_backprop_weight.cpp
index d9119623..edc1ce24 100644
--- a/legacy/core/src/ops/templates/nested_convNxN_backprop_weight.cpp
+++ b/legacy/core/src/ops/templates/nested_convNxN_backprop_weight.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2022-2023 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include "nested_convNxN_backprop_weight.hpp"
 #include <algorithm>
@@ -236,25 +237,25 @@ void gen_nested_convNXN_bwd_weight_t::inner_loop_call(const context_ptr &ctx,
   // full shape based on delta_output's reorder result
   std::vector<expr> temp_output_delta_shape_full = dtype_block > 1
     ? std::vector<expr> {BS / im_bs_block_, OC / im_oc_block_, OH, OW,
-      im_bs_block_ / 2, im_oc_block_, 2}
-    : std::vector<expr> {
-      BS / im_bs_block_, OC / im_oc_block_, OH, OW, im_bs_block_, im_oc_block_};
+        im_bs_block_ / 2, im_oc_block_, 2}
+    : std::vector<expr> {BS / im_bs_block_, OC / im_oc_block_, OH, OW,
+        im_bs_block_, im_oc_block_};
   _tensor_(temp_output_delta, dtype, temp_output_delta_shape_full);
   _for_(i_ic, 0, ic_block / im_ic_block_) {
     // shrinked_shape
     std::vector<expr> temp_output_delta_shape_shr = dtype_block > 1
       ? std::vector<expr> {bs_block / im_bs_block_, oc_block / im_oc_block_,
-        oh_block, ow_block, im_bs_block_ / 2, im_oc_block_, 2}
+          oh_block, ow_block, im_bs_block_ / 2, im_oc_block_, 2}
       : std::vector<expr> {bs_block / im_bs_block_, oc_block / im_oc_block_,
-        oh_block, ow_block, im_bs_block_, im_oc_block_};
+          oh_block, ow_block, im_bs_block_, im_oc_block_};
     // f32 --> vectorized; bf16 --> vnni_reorder
     std::vector<expr> shrink_offset = dtype_block > 1
       ? std::vector<expr> {obs_offset / im_bs_block_, oc_offset / im_oc_block_,
-        oh_offset, ow_offset, obs_offset % im_bs_block_ / 2,
-        oc_offset % im_oc_block_, obs_offset % im_bs_block_ % 2}
+          oh_offset, ow_offset, obs_offset % im_bs_block_ / 2,
+          oc_offset % im_oc_block_, obs_offset % im_bs_block_ % 2}
       : std::vector<expr> {obs_offset / im_bs_block_, oc_offset / im_oc_block_,
-        oh_offset, ow_offset, obs_offset % im_bs_block_,
-        oc_offset % im_oc_block_};
+          oh_offset, ow_offset, obs_offset % im_bs_block_,
+          oc_offset % im_oc_block_};
     // reorder temp_output_delta
     _if_(i_ic == 0) {
       trace_guard_t trg(ctx, "output_delta_reorder");
@@ -267,12 +268,12 @@ void gen_nested_convNXN_bwd_weight_t::inner_loop_call(const context_ptr &ctx,
           shrink_offset, temp_output_delta_shape_shr, stmts()};
       slice_range tmp_output_slice_range = dtype_block > 1
         ? slice_range {{obs_offset, bs_block / im_bs_block_},
-          {oc_offset, oc_block / im_oc_block_}, {oh_offset, oh_block},
-          {ow_offset, ow_block}, {0, im_bs_block_ / 2}, {0, im_oc_block_},
-          {0, 2}}
+            {oc_offset, oc_block / im_oc_block_}, {oh_offset, oh_block},
+            {ow_offset, ow_block}, {0, im_bs_block_ / 2}, {0, im_oc_block_},
+            {0, 2}}
         : slice_range {{obs_offset, bs_block / im_bs_block_},
-          {oc_offset, oc_block / im_oc_block_}, {oh_offset, oh_block},
-          {ow_offset, ow_block}, {0, im_bs_block_}, {0, im_oc_block_}};
+            {oc_offset, oc_block / im_oc_block_}, {oh_offset, oh_block},
+            {ow_offset, ow_block}, {0, im_bs_block_}, {0, im_oc_block_}};
       // TODO(yifei): figure out why expand loop based on output doesn't work
       ops::commit_op(ctx, "reorder",
         {tensor_slice(delta_output,
@@ -285,9 +286,9 @@ void gen_nested_convNXN_bwd_weight_t::inner_loop_call(const context_ptr &ctx,
         {{"out_format",
           dtype_block > 1
             ? sc_data_format_t(sc_data_format_kind_t(0, 1, 2, 3, 0, 1, 0),
-              {im_bs_block_, im_oc_block_, 2})
+                {im_bs_block_, im_oc_block_, 2})
             : sc_data_format_t(sc_data_format_kind_t(0, 1, 2, 3, 0, 1),
-              {im_bs_block_, im_oc_block_})}});
+                {im_bs_block_, im_oc_block_})}});
     }
     _for_(i_oc, 0, oc_block / im_oc_block_) {
       _for_(i_bs, 0, bs_block / im_bs_block_) {
@@ -298,21 +299,21 @@ void gen_nested_convNXN_bwd_weight_t::inner_loop_call(const context_ptr &ctx,
                 trace_guard_t trg(ctx, "brgemm");
                 auto temp_output_delta_brgemm_index = dtype_block > 1
                   ? std::vector<expr> {shrink_offset[0] + i_bs,
-                    shrink_offset[1] + i_oc, shrink_offset[2] + i_oh,
-                    shrink_offset[3], shrink_offset[4], shrink_offset[5],
-                    shrink_offset[6]}
+                      shrink_offset[1] + i_oc, shrink_offset[2] + i_oh,
+                      shrink_offset[3], shrink_offset[4], shrink_offset[5],
+                      shrink_offset[6]}
                   : std::vector<expr> {shrink_offset[0] + i_bs,
-                    shrink_offset[1] + i_oc, shrink_offset[2] + i_oh,
-                    shrink_offset[3], shrink_offset[4], shrink_offset[5]};
+                      shrink_offset[1] + i_oc, shrink_offset[2] + i_oh,
+                      shrink_offset[3], shrink_offset[4], shrink_offset[5]};
                 COMPILE_ASSERT(
                   temp_weight_idx.size() == 2 || temp_weight_idx.size() == 3,
                   "temp_weight_idx shall have length 2 or 3");
                 auto real_delta_weight_buf_index = temp_weight_idx.size() == 4
                   ? std::vector<expr> {temp_weight_idx[0] + i_ic,
-                    temp_weight_idx[1] + i_oc, lr, ls, 0, 0}
+                      temp_weight_idx[1] + i_oc, lr, ls, 0, 0}
                   : std::vector<expr> {temp_weight_idx[0],
-                    temp_weight_idx[1] + i_ic, temp_weight_idx[2] + i_oc, lr,
-                    ls, 0, 0};
+                      temp_weight_idx[1] + i_ic, temp_weight_idx[2] + i_oc, lr,
+                      ls, 0, 0};
                 _if_(o_bs == 0 && o_od == 0 && o_oh == 0 && o_ow == 0
                   && i_bs == 0 && i_od == 0 && i_oh == 0) {
                   // ic x bs matmul bs x oc
@@ -486,12 +487,12 @@ bool gen_nested_convNXN_bwd_weight_t::generate(context_ptr ctx,
                               // so extra division needed
                               auto temp_weight_idx = use_temp_weight
                                 ? std::vector<expr> {p_bs * oh_threads
-                                      * od_threads
-                                    + p_od * oh_threads + p_oh,
-                                  ic_offset / im_ic_block_,
-                                  oc_offset / im_oc_block_}
+                                        * od_threads
+                                      + p_od * oh_threads + p_oh,
+                                    ic_offset / im_ic_block_,
+                                    oc_offset / im_oc_block_}
                                 : std::vector<expr> {ic_offset / im_ic_block_,
-                                  oc_offset / im_oc_block_};
+                                    oc_offset / im_oc_block_};
                               inner_loop_call(ctx, temp_forward_input,
                                 temp_forward_idx_non_block, in_tensors_[1],
                                 delta_output, real_delta_weight_buf,
diff --git a/legacy/core/src/ops/templates/nested_conv_fwd.cpp b/legacy/core/src/ops/templates/nested_conv_fwd.cpp
index 9d4fd060..89af2ac5 100644
--- a/legacy/core/src/ops/templates/nested_conv_fwd.cpp
+++ b/legacy/core/src/ops/templates/nested_conv_fwd.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2022-2024 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include <algorithm>
 #include <functional>
@@ -184,7 +185,7 @@ config_ptr gen_nested_conv_fwd_t::get_default_config(context_ptr ctx) const {
     cfg.bs_threads = mb_ > num_threads || (mb_ == num_threads && oc_ <= 128)
       ? num_threads
       : *(std::find_if(thread_split.rbegin(), thread_split.rend(),
-        [&](int split) { return split == 1 || split < mb_; }));
+          [&](int split) { return split == 1 || split < mb_; }));
     cfg.oc_threads = num_threads / cfg.bs_threads;
     cfg.h_threads = 1;
     cfg.w_threads = 1;
@@ -306,12 +307,12 @@ config_ptr gen_nested_conv_fwd_t::get_default_config(context_ptr ctx) const {
           ? oh_
           : (utils::divide_and_ceil(
                utils::divide_and_ceil(oh_, cfg.im_h_block), cfg.h_threads)
-            * cfg.im_h_block);
+              * cfg.im_h_block);
         cfg.w_block = cfg.w_threads == 1
           ? ow_
           : (utils::divide_and_ceil(
                utils::divide_and_ceil(ow_, cfg.im_w_block), cfg.w_threads)
-            * cfg.im_w_block);
+              * cfg.im_w_block);
       }
     } else {
       if (!is_1x1_conv_ && has_pad) {
@@ -335,7 +336,7 @@ config_ptr gen_nested_conv_fwd_t::get_default_config(context_ptr ctx) const {
               ? oh_
               : (utils::divide_and_ceil(
                    utils::divide_and_ceil(oh_, cfg.im_h_block), cfg.h_threads)
-                * cfg.im_h_block);
+                  * cfg.im_h_block);
           }
         }
       }
@@ -403,7 +404,7 @@ config_ptr gen_nested_conv_fwd_t::get_default_config(context_ptr ctx) const {
         ? oh_
         : (utils::divide_and_ceil(
              utils::divide_and_ceil(oh_, cfg.im_h_block), cfg.h_threads)
-          * cfg.im_h_block);
+            * cfg.im_h_block);
     }
 
     if (!is_1x1_conv_ && oc_ > 128 && cfg.im_oc_block % 32 != 0) {
@@ -1235,15 +1236,15 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const {
                                         = blocking_input_
                                         ? std::vector<expr> {n, ic, h, w, 0}
                                         : std::vector<expr> {
-                                          n, h, w, ic * im_ic_block};
+                                            n, h, w, ic * im_ic_block};
 
                                       A_list[i_c]
                                         = tensor_ptr(input1, input_pos);
                                       B_list[i_c] = tensor_ptr(weight,
                                         kpack > 1 ? std::vector<expr> {oc, ic,
-                                          0, 0, 0, 0, 0}
+                                                      0, 0, 0, 0, 0}
                                                   : std::vector<expr> {
-                                                    oc, ic, 0, 0, 0, 0});
+                                                      oc, ic, 0, 0, 0, 0});
                                     }
                                   }
                                   const auto hint_A_size
@@ -1273,9 +1274,9 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const {
                                   std::vector<expr> output_pos
                                     = blocking_output_
                                     ? std::vector<expr> {pic * mb_ + n, oc, h,
-                                      w, 0}
+                                        w, 0}
                                     : std::vector<expr> {
-                                      pic * mb_ + n, h, w, oc * im_oc_block};
+                                        pic * mb_ + n, h, w, oc * im_oc_block};
 
                                   if (ic_num_block_pt > 1) {
                                     _if_(o_ic == 0) {
@@ -1327,11 +1328,11 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const {
                                         owner_->get_outputs()[0],
                                         blocking_output_
                                           ? slice_range {{n, 1UL}, {oc, 1},
-                                            {h, im_h_block}, {w, im_w_block},
-                                            {0, im_oc_block}}
+                                              {h, im_h_block}, {w, im_w_block},
+                                              {0, im_oc_block}}
                                           : slice_range {{n, 1UL},
-                                            {h, im_h_block}, {w, im_w_block},
-                                            {oc * im_oc_block, im_oc_block}});
+                                              {h, im_h_block}, {w, im_w_block},
+                                              {oc * im_oc_block, im_oc_block}});
                                     }
                                   }
                                 }
@@ -1346,11 +1347,11 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const {
                                     owner_->get_outputs()[0],
                                     blocking_output_
                                       ? slice_range {{n, 1UL}, {anch_c, 1},
-                                        {h, im_h_block}, {w, im_w_block},
-                                        {0, im_oc_block}}
+                                          {h, im_h_block}, {w, im_w_block},
+                                          {0, im_oc_block}}
                                       : slice_range {{n, 1UL}, {h, im_h_block},
-                                        {w, im_w_block},
-                                        {anch_c * im_oc_block, oc_block}});
+                                          {w, im_w_block},
+                                          {anch_c * im_oc_block, oc_block}});
                                 }
                               }
                             }
@@ -1371,11 +1372,11 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const {
                                 owner_->get_outputs()[0],
                                 blocking_output_
                                   ? slice_range {{n, 1UL}, {anch_c, 1},
-                                    {h, im_h_block}, {anch_w, w_block},
-                                    {0, im_oc_block}}
+                                      {h, im_h_block}, {anch_w, w_block},
+                                      {0, im_oc_block}}
                                   : slice_range {{n, 1UL}, {h, im_h_block},
-                                    {anch_w, w_block},
-                                    {anch_c * im_oc_block, oc_block}});
+                                      {anch_w, w_block},
+                                      {anch_c * im_oc_block, oc_block}});
                             }
                           }
                         }
@@ -1401,11 +1402,11 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const {
                           create_fusion_anchor(fusion, owner_->get_outputs()[0],
                             blocking_output_
                               ? slice_range {{n, 1UL}, {anch_c, 1},
-                                {anch_h, h_block}, {anch_w, w_block},
-                                {0, im_oc_block}}
+                                  {anch_h, h_block}, {anch_w, w_block},
+                                  {0, im_oc_block}}
                               : slice_range {{n, 1UL}, {anch_h, h_block},
-                                {anch_w, w_block},
-                                {anch_c * im_oc_block, oc_block}});
+                                  {anch_w, w_block},
+                                  {anch_c * im_oc_block, oc_block}});
                         }
                       }
                     }
@@ -1421,16 +1422,16 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const {
               create_fusion_anchor(fusion, owner_->get_outputs()[0],
                 blocking_output_
                   ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block},
-                    {0, oh_expr_}, {0, ow_}, {0, im_oc_block}}
+                      {0, oh_expr_}, {0, ow_}, {0, im_oc_block}}
                   : slice_range {
-                    {pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}});
+                      {pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}});
             }
           }
           if (oc_threads == 1 && h_threads == 1 && w_threads == 1) {
             create_fusion_anchor(fusion, owner_->get_outputs()[0],
               blocking_output_
                 ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block},
-                  {0, oh_expr_}, {0, ow_}, {0, im_oc_block}}
+                    {0, oh_expr_}, {0, ow_}, {0, im_oc_block}}
                 : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}});
           }
         }
@@ -1438,7 +1439,7 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const {
           create_fusion_anchor(fusion, owner_->get_outputs()[0],
             blocking_output_
               ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block}, {0, oh_expr_},
-                {0, ow_}, {0, im_oc_block}}
+                  {0, ow_}, {0, im_oc_block}}
               : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}});
         }
       }
@@ -1447,7 +1448,7 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const {
         create_fusion_anchor(fusion, owner_->get_outputs()[0],
           blocking_output_
             ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block}, {0, oh_expr_},
-              {0, ow_}, {0, im_oc_block}}
+                {0, ow_}, {0, im_oc_block}}
             : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}});
       }
     }
@@ -1455,7 +1456,7 @@ void gen_nested_conv_fwd_t::compute_1x1_pack_input_nested(CONV_ARG_LIST) const {
       create_fusion_anchor(fusion, owner_->get_outputs()[0],
         blocking_output_
           ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block}, {0, oh_expr_},
-            {0, ow_}, {0, im_oc_block}}
+              {0, ow_}, {0, im_oc_block}}
           : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}});
     }
   }
@@ -1616,9 +1617,9 @@ void gen_nested_conv_fwd_t::dynamic_compute_1x1_pack_input_nested(
                                         = tensor_ptr(input1, input_pos);
                                       B_list[i_c] = tensor_ptr(weight,
                                         kpack > 1 ? std::vector<expr> {oc, ic,
-                                          0, 0, 0, 0, 0}
+                                                      0, 0, 0, 0, 0}
                                                   : std::vector<expr> {
-                                                    oc, ic, 0, 0, 0, 0});
+                                                      oc, ic, 0, 0, 0, 0});
                                     }
                                   }
                                   auto LDA = ic_;
@@ -1628,7 +1629,7 @@ void gen_nested_conv_fwd_t::dynamic_compute_1x1_pack_input_nested(
                                     = blocking_output_
                                     ? std::vector<expr> {n, oc, h, w, 0}
                                     : std::vector<expr> {
-                                      n, h, w, oc * im_oc_block};
+                                        n, h, w, oc * im_oc_block};
                                   auto im_w_tail_block = builder::make_cast(
                                     datatypes::s32, ow_expr_ - w);
                                   im_w_block = builder::make_select(
@@ -1861,18 +1862,18 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested(
                                         std::vector<expr> input_pos
                                           = blocking_input_
                                           ? std::vector<expr> {n, ic,
-                                            (h + im_h_i) * sh_, w * sw_, 0}
+                                              (h + im_h_i) * sh_, w * sw_, 0}
                                           : std::vector<expr> {n,
-                                            (h + im_h_i) * sh_, w * sw_,
-                                            ic * im_ic_block};
+                                              (h + im_h_i) * sh_, w * sw_,
+                                              ic * im_ic_block};
 
                                         A_list[i_c]
                                           = tensor_ptr(input, input_pos);
                                         B_list[i_c] = tensor_ptr(weight,
                                           kpack > 1 ? std::vector<expr> {oc, ic,
-                                            0, 0, 0, 0, 0}
+                                                        0, 0, 0, 0, 0}
                                                     : std::vector<expr> {
-                                                      oc, ic, 0, 0, 0, 0});
+                                                        oc, ic, 0, 0, 0, 0});
                                       }
                                     }
                                     const auto hint_A_size
@@ -1904,9 +1905,9 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested(
                                     std::vector<expr> output_pos
                                       = blocking_output_
                                       ? std::vector<expr> {pic * mb_ + n, oc,
-                                        h + im_h_i, w, 0}
+                                          h + im_h_i, w, 0}
                                       : std::vector<expr> {pic * mb_ + n,
-                                        h + im_h_i, w, oc * im_oc_block};
+                                          h + im_h_i, w, oc * im_oc_block};
 
                                     if (ic_num_block_pt > 1) {
                                       _if_(o_ic == 0) {
@@ -1956,11 +1957,14 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested(
                                           owner_->get_outputs()[0],
                                           blocking_output_
                                             ? slice_range {{n, 1UL}, {oc, 1},
-                                              {h + im_h_i, 1}, {w, im_w_block},
-                                              {0, im_oc_block}}
+                                                {h + im_h_i, 1},
+                                                {w, im_w_block},
+                                                {0, im_oc_block}}
                                             : slice_range {{n, 1UL},
-                                              {h + im_h_i, 1}, {w, im_w_block},
-                                              {oc * im_oc_block, im_oc_block}});
+                                                {h + im_h_i, 1},
+                                                {w, im_w_block},
+                                                {oc * im_oc_block,
+                                                  im_oc_block}});
                                       }
                                     }
                                   }
@@ -1972,11 +1976,11 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested(
                                       owner_->get_outputs()[0],
                                       blocking_output_
                                         ? slice_range {{n, 1UL}, {oc, 1},
-                                          {h, im_h_block}, {w, im_w_block},
-                                          {0, im_oc_block}}
+                                            {h, im_h_block}, {w, im_w_block},
+                                            {0, im_oc_block}}
                                         : slice_range {{n, 1UL},
-                                          {h, im_h_block}, {w, im_w_block},
-                                          {oc * im_oc_block, im_oc_block}});
+                                            {h, im_h_block}, {w, im_w_block},
+                                            {oc * im_oc_block, im_oc_block}});
                                   }
                                 }
                               }
@@ -1991,11 +1995,11 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested(
                                   owner_->get_outputs()[0],
                                   blocking_output_
                                     ? slice_range {{n, 1UL}, {anch_c, 1},
-                                      {h, im_h_block}, {w, im_w_block},
-                                      {0, im_oc_block}}
+                                        {h, im_h_block}, {w, im_w_block},
+                                        {0, im_oc_block}}
                                     : slice_range {{n, 1UL}, {h, im_h_block},
-                                      {w, im_w_block},
-                                      {anch_c * im_oc_block, oc_block}});
+                                        {w, im_w_block},
+                                        {anch_c * im_oc_block, oc_block}});
                               }
                             }
                           }
@@ -2016,11 +2020,11 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested(
                               owner_->get_outputs()[0],
                               blocking_output_
                                 ? slice_range {{n, 1UL}, {anch_c, 1},
-                                  {h, im_h_block}, {anch_w, w_block},
-                                  {0, im_oc_block}}
+                                    {h, im_h_block}, {anch_w, w_block},
+                                    {0, im_oc_block}}
                                 : slice_range {{n, 1UL}, {h, im_h_block},
-                                  {anch_w, w_block},
-                                  {anch_c * im_oc_block, oc_block}});
+                                    {anch_w, w_block},
+                                    {anch_c * im_oc_block, oc_block}});
                           }
                         }
                       }
@@ -2044,11 +2048,11 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested(
                           create_fusion_anchor(fusion, owner_->get_outputs()[0],
                             blocking_output_
                               ? slice_range {{n, 1UL}, {anch_c, 1},
-                                {anch_h, h_block}, {anch_w, w_block},
-                                {0, im_oc_block}}
+                                  {anch_h, h_block}, {anch_w, w_block},
+                                  {0, im_oc_block}}
                               : slice_range {{n, 1UL}, {anch_h, h_block},
-                                {anch_w, w_block},
-                                {anch_c * im_oc_block, oc_block}});
+                                  {anch_w, w_block},
+                                  {anch_c * im_oc_block, oc_block}});
                         }
                       }
                     }
@@ -2064,16 +2068,16 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested(
               create_fusion_anchor(fusion, owner_->get_outputs()[0],
                 blocking_output_
                   ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block},
-                    {0, oh_expr_}, {0, ow_}, {0, im_oc_block}}
+                      {0, oh_expr_}, {0, ow_}, {0, im_oc_block}}
                   : slice_range {
-                    {pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}});
+                      {pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}});
             }
           }
           if (oc_threads == 1 && h_threads == 1 && w_threads == 1) {
             create_fusion_anchor(fusion, owner_->get_outputs()[0],
               blocking_output_
                 ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block},
-                  {0, oh_expr_}, {0, ow_}, {0, im_oc_block}}
+                    {0, oh_expr_}, {0, ow_}, {0, im_oc_block}}
                 : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}});
           }
         }
@@ -2082,7 +2086,7 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested(
           create_fusion_anchor(fusion, owner_->get_outputs()[0],
             blocking_output_
               ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block}, {0, oh_expr_},
-                {0, ow_}, {0, im_oc_block}}
+                  {0, ow_}, {0, im_oc_block}}
               : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}});
         }
       }
@@ -2091,7 +2095,7 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested(
         create_fusion_anchor(fusion, owner_->get_outputs()[0],
           blocking_output_
             ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block}, {0, oh_expr_},
-              {0, ow_}, {0, im_oc_block}}
+                {0, ow_}, {0, im_oc_block}}
             : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}});
       }
     }
@@ -2099,7 +2103,7 @@ void gen_nested_conv_fwd_t::compute_1x1_no_pack_input_nested(
       create_fusion_anchor(fusion, owner_->get_outputs()[0],
         blocking_output_
           ? slice_range {{pbs, 1UL}, {0, oc_ / im_oc_block}, {0, oh_expr_},
-            {0, ow_}, {0, im_oc_block}}
+              {0, ow_}, {0, im_oc_block}}
           : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_}, {0, oc_}});
     }
   }
@@ -2234,12 +2238,12 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_os_blocking_nested(
                           auto out_tsr = tensor_ptr(output,
                             blocking_output_
                               ? std::vector<expr> {n, oc,
-                                (im_s_block_idx * im_s_block) / ow_,
-                                im_s_block_idx * im_s_block % ow_, 0}
+                                  (im_s_block_idx * im_s_block) / ow_,
+                                  im_s_block_idx * im_s_block % ow_, 0}
                               : std::vector<expr> {n,
-                                (im_s_block_idx * im_s_block) / ow_,
-                                (im_s_block_idx * im_s_block) % ow_,
-                                oc * im_oc_block});
+                                  (im_s_block_idx * im_s_block) / ow_,
+                                  (im_s_block_idx * im_s_block) % ow_,
+                                  oc * im_oc_block});
 
                           int adj_ow = ow_ + num_elems_skip_per_ow_;
 
@@ -2248,15 +2252,15 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_os_blocking_nested(
                               blocking_output_
                                 ? std::vector<expr> {n, oc, 0, 0, 0}
                                 : std::vector<expr> {
-                                  n, 0, 0, oc * config.im_oc_block});
+                                    n, 0, 0, oc * config.im_oc_block});
                           } else {
                             auto acc_m = os_acc_size[{im_s_block_idx}];
                             out_tsr = tensor_ptr(output,
                               blocking_output_
                                 ? std::vector<expr> {n, oc, acc_m / ow_,
-                                  acc_m % ow_, 0}
+                                    acc_m % ow_, 0}
                                 : std::vector<expr> {n, acc_m / ow_,
-                                  acc_m % ow_, oc * im_oc_block});
+                                    acc_m % ow_, oc * im_oc_block});
                           }
 
                           _for_(i_c, 0, ic_block / im_ic_block) {
@@ -2273,15 +2277,15 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_os_blocking_nested(
                                     = ((im_s_block_idx * im_s_block) % adj_ow);
                                   std::vector<expr> input_pos = blocking_input_
                                     ? std::vector<expr> {n, ic,
-                                      h * sh_ + dh_ * r, w * sw_ + dw_ * s, 0}
+                                        h * sh_ + dh_ * r, w * sw_ + dw_ * s, 0}
                                     : std::vector<expr> {n, h * sh_ + dh_ * r,
-                                      w * sw_ + dw_ * s, ic * im_ic_block};
+                                        w * sw_ + dw_ * s, ic * im_ic_block};
 
                                   A_list[idx] = tensor_ptr(input, input_pos);
                                   B_list[idx] = tensor_ptr(weight,
                                     kpack > 1
                                       ? std::vector<expr> {oc, ic, r, s, 0, 0,
-                                        0}
+                                          0}
                                       : std::vector<expr> {oc, ic, r, s, 0, 0});
                                 }
                               }
@@ -2321,14 +2325,14 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_os_blocking_nested(
                                   owner_->get_outputs()[0],
                                   blocking_output_
                                     ? slice_range {{n, 1UL}, {oc, 1},
-                                      {im_s_block_idx * (oh_ / os_num_block),
-                                        (oh_ / os_num_block)},
-                                      {0, ow_}, {0, im_oc_block}}
+                                        {im_s_block_idx * (oh_ / os_num_block),
+                                          (oh_ / os_num_block)},
+                                        {0, ow_}, {0, im_oc_block}}
                                     : slice_range {{n, 1UL},
-                                      {im_s_block_idx * (oh_ / os_num_block),
-                                        (oh_ / os_num_block)},
-                                      {0, ow_},
-                                      {oc * im_oc_block, im_oc_block}});
+                                        {im_s_block_idx * (oh_ / os_num_block),
+                                          (oh_ / os_num_block)},
+                                        {0, ow_},
+                                        {oc * im_oc_block, im_oc_block}});
                               }
                             }
                           }
@@ -2342,43 +2346,44 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_os_blocking_nested(
 
             if (oc_threads == 1 && ic_threads == 1 && s_threads == 1) {
               create_fusion_anchor(fusion, owner_->get_outputs()[0],
-                blocking_output_ ? slice_range {{pbs, 1UL},
-                  {outer_k * oc_ / im_oc_block / oc_split,
-                    oc_ / im_oc_block / oc_split},
-                  {0, oh_}, {0, ow_}, {0, im_oc_block}}
-                                 : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_},
-                                   {outer_k * oc_ / oc_split, oc_ / oc_split}});
+                blocking_output_
+                  ? slice_range {{pbs, 1UL},
+                      {outer_k * oc_ / im_oc_block / oc_split,
+                        oc_ / im_oc_block / oc_split},
+                      {0, oh_}, {0, ow_}, {0, im_oc_block}}
+                  : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_},
+                      {outer_k * oc_ / oc_split, oc_ / oc_split}});
             }
           }
 
           if (oc_threads == 1 && s_threads == 1) {
             create_fusion_anchor(fusion, owner_->get_outputs()[0],
               blocking_output_ ? slice_range {{pbs, 1UL},
-                {outer_k * oc_ / im_oc_block / oc_split,
-                  oc_ / im_oc_block / oc_split},
-                {0, oh_}, {0, ow_}, {0, im_oc_block}}
+                                   {outer_k * oc_ / im_oc_block / oc_split,
+                                     oc_ / im_oc_block / oc_split},
+                                   {0, oh_}, {0, ow_}, {0, im_oc_block}}
                                : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_},
-                                 {outer_k * oc_ / oc_split, oc_ / oc_split}});
+                                   {outer_k * oc_ / oc_split, oc_ / oc_split}});
           }
         }
         if (s_threads == 1) {
           create_fusion_anchor(fusion, owner_->get_outputs()[0],
             blocking_output_ ? slice_range {{pbs, 1UL},
-              {outer_k * oc_ / im_oc_block / oc_split,
-                oc_ / im_oc_block / oc_split},
-              {0, oh_}, {0, ow_}, {0, im_oc_block}}
+                                 {outer_k * oc_ / im_oc_block / oc_split,
+                                   oc_ / im_oc_block / oc_split},
+                                 {0, oh_}, {0, ow_}, {0, im_oc_block}}
                              : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_},
-                               {outer_k * oc_ / oc_split, oc_ / oc_split}});
+                                 {outer_k * oc_ / oc_split, oc_ / oc_split}});
         }
       }
       if (mb_ > 1) {
         create_fusion_anchor(fusion, owner_->get_outputs()[0],
           blocking_output_ ? slice_range {{pbs, 1UL},
-            {outer_k * oc_ / im_oc_block / oc_split,
-              oc_ / im_oc_block / oc_split},
-            {0, oh_}, {0, ow_}, {0, im_oc_block}}
+                               {outer_k * oc_ / im_oc_block / oc_split,
+                                 oc_ / im_oc_block / oc_split},
+                               {0, oh_}, {0, ow_}, {0, im_oc_block}}
                            : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_},
-                             {outer_k * oc_ / oc_split, oc_ / oc_split}});
+                               {outer_k * oc_ / oc_split, oc_ / oc_split}});
       }
     }
   }
@@ -2482,7 +2487,7 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested(
   auto L2_cache_size = ctx->machine_.cpu_flags_.getDCacheSize(2);
   int oc_split = (oc_threads == 1 && oc_num_block_pt == 1)
     ? get_oc_split_factor(
-      -1, weight_size, L2_cache_size, oc_block / im_oc_block)
+        -1, weight_size, L2_cache_size, oc_block / im_oc_block)
     : 1;
 
   auto LDA = blocking_input_ ? sw_ * im_ic_block : sw_ * ic_;
@@ -2574,14 +2579,14 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested(
                                                   std::vector<expr> input_pos
                                                     = blocking_input_
                                                     ? std::vector<expr> {n, ic,
-                                                      (h + im_h_i) * sh_
-                                                        + dh_ * r,
-                                                      w * sw_ + dw_ * s, 0}
+                                                        (h + im_h_i) * sh_
+                                                          + dh_ * r,
+                                                        w * sw_ + dw_ * s, 0}
                                                     : std::vector<expr> {n,
-                                                      (h + im_h_i) * sh_
-                                                        + dh_ * r,
-                                                      w * sw_ + dw_ * s,
-                                                      ic * im_ic_block};
+                                                        (h + im_h_i) * sh_
+                                                          + dh_ * r,
+                                                        w * sw_ + dw_ * s,
+                                                        ic * im_ic_block};
 
                                                   A_list[idx] = tensor_ptr(
                                                     input, input_pos);
@@ -2589,9 +2594,9 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested(
                                                     = tensor_ptr(weight,
                                                       kpack > 1
                                                         ? std::vector<expr> {oc,
-                                                          ic, r, s, 0, 0, 0}
-                                                        : std::vector<expr> {
-                                                          oc, ic, r, s, 0, 0});
+                                                            ic, r, s, 0, 0, 0}
+                                                        : std::vector<expr> {oc,
+                                                            ic, r, s, 0, 0});
                                                 }
                                               }
                                             }
@@ -2599,11 +2604,11 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested(
                                           std::vector<expr> output_pos
                                             = blocking_output_
                                             ? std::vector<expr> {pic * mb_expr_
-                                                + n,
-                                              oc, h + im_h_i, w, 0}
+                                                  + n,
+                                                oc, h + im_h_i, w, 0}
                                             : std::vector<expr> {
-                                              pic * mb_expr_ + n, h + im_h_i, w,
-                                              oc * im_oc_block};
+                                                pic * mb_expr_ + n, h + im_h_i,
+                                                w, oc * im_oc_block};
 
                                           generate_brgemm(real_im_w_block,
                                             im_ic_block, im_oc_block, ic_block,
@@ -2619,14 +2624,14 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested(
                                                 owner_->get_outputs()[0],
                                                 blocking_output_
                                                   ? slice_range {{n, 1UL},
-                                                    {oc, 1}, {h + im_h_i, 1},
-                                                    {w, real_im_w_block},
-                                                    {0, im_oc_block}}
+                                                      {oc, 1}, {h + im_h_i, 1},
+                                                      {w, real_im_w_block},
+                                                      {0, im_oc_block}}
                                                   : slice_range {{n, 1UL},
-                                                    {h + im_h_i, 1},
-                                                    {w, real_im_w_block},
-                                                    {oc * im_oc_block,
-                                                      im_oc_block}});
+                                                      {h + im_h_i, 1},
+                                                      {w, real_im_w_block},
+                                                      {oc * im_oc_block,
+                                                        im_oc_block}});
                                             }
                                           } // im_h_i
                                         }
@@ -2638,14 +2643,14 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested(
                                             owner_->get_outputs()[0],
                                             blocking_output_
                                               ? slice_range {{n, 1UL}, {oc, 1},
-                                                {h, real_im_h_block},
-                                                {w, real_im_w_block},
-                                                {0, im_oc_block}}
+                                                  {h, real_im_h_block},
+                                                  {w, real_im_w_block},
+                                                  {0, im_oc_block}}
                                               : slice_range {{n, 1UL},
-                                                {h, real_im_h_block},
-                                                {w, real_im_w_block},
-                                                {oc * im_oc_block,
-                                                  im_oc_block}});
+                                                  {h, real_im_h_block},
+                                                  {w, real_im_w_block},
+                                                  {oc * im_oc_block,
+                                                    im_oc_block}});
                                         }
                                       }
                                     } // i_oc
@@ -2663,14 +2668,14 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested(
                                         owner_->get_outputs()[0],
                                         blocking_output_
                                           ? slice_range {{n, 1UL}, {anch_c, 1},
-                                            {h, real_im_h_block},
-                                            {w, real_im_w_block},
-                                            {0, im_oc_block}}
+                                              {h, real_im_h_block},
+                                              {w, real_im_w_block},
+                                              {0, im_oc_block}}
                                           : slice_range {{n, 1UL},
-                                            {h, real_im_h_block},
-                                            {w, real_im_w_block},
-                                            {anch_c * im_oc_block,
-                                              im_oc_block}});
+                                              {h, real_im_h_block},
+                                              {w, real_im_w_block},
+                                              {anch_c * im_oc_block,
+                                                im_oc_block}});
                                     }
                                   }
                                 } // i_w
@@ -2693,11 +2698,12 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested(
                                     owner_->get_outputs()[0],
                                     blocking_output_
                                       ? slice_range {{n, 1UL}, {anch_c, 1},
-                                        {h, real_im_h_block}, {anch_w, w_block},
-                                        {0, im_oc_block}}
+                                          {h, real_im_h_block},
+                                          {anch_w, w_block}, {0, im_oc_block}}
                                       : slice_range {{n, 1UL},
-                                        {h, real_im_h_block}, {anch_w, w_block},
-                                        {anch_c * im_oc_block, oc_block}});
+                                          {h, real_im_h_block},
+                                          {anch_w, w_block},
+                                          {anch_c * im_oc_block, oc_block}});
                                 }
                               }
                             } // i_h
@@ -2725,13 +2731,13 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested(
                                 owner_->get_outputs()[0],
                                 blocking_output_
                                   ? slice_range {{n, 1UL}, {anch_c, 1},
-                                    {anch_h, oh_ / oh_used_threads},
-                                    {anch_w, ow_ / ow_used_threads},
-                                    {0, im_oc_block}}
+                                      {anch_h, oh_ / oh_used_threads},
+                                      {anch_w, ow_ / ow_used_threads},
+                                      {0, im_oc_block}}
                                   : slice_range {{n, 1UL},
-                                    {anch_h, oh_ / oh_used_threads},
-                                    {anch_w, ow_ / ow_used_threads},
-                                    {anch_c * im_oc_block, oc_block}});
+                                      {anch_h, oh_ / oh_used_threads},
+                                      {anch_w, ow_ / ow_used_threads},
+                                      {anch_c * im_oc_block, oc_block}});
                             }
                           }
                         } // o_ic
@@ -2747,11 +2753,11 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested(
                 create_fusion_anchor(fusion, owner_->get_outputs()[0],
                   blocking_output_
                     ? slice_range {{pbs, 1UL},
-                      {outer_k * oc_ / im_oc_block / oc_split,
-                        oc_ / im_oc_block / oc_split},
-                      {0, oh_}, {0, ow_}, {0, im_oc_block}}
+                        {outer_k * oc_ / im_oc_block / oc_split,
+                          oc_ / im_oc_block / oc_split},
+                        {0, oh_}, {0, ow_}, {0, im_oc_block}}
                     : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_},
-                      {outer_k * oc_ / oc_split, oc_ / oc_split}});
+                        {outer_k * oc_ / oc_split, oc_ / oc_split}});
               }
             }
 
@@ -2759,22 +2765,22 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested(
               create_fusion_anchor(fusion, owner_->get_outputs()[0],
                 blocking_output_
                   ? slice_range {{pbs, 1UL},
-                    {outer_k * oc_ / im_oc_block / oc_split,
-                      oc_ / im_oc_block / oc_split},
-                    {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
+                      {outer_k * oc_ / im_oc_block / oc_split,
+                        oc_ / im_oc_block / oc_split},
+                      {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
                   : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_},
-                    {outer_k * oc_ / oc_split, oc_ / oc_split}});
+                      {outer_k * oc_ / oc_split, oc_ / oc_split}});
             }
           }
           if (h_threads == 1 && w_threads == 1) {
             create_fusion_anchor(fusion, owner_->get_outputs()[0],
               blocking_output_
                 ? slice_range {{pbs, 1UL},
-                  {outer_k * oc_ / im_oc_block / oc_split,
-                    oc_ / im_oc_block / oc_split},
-                  {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
+                    {outer_k * oc_ / im_oc_block / oc_split,
+                      oc_ / im_oc_block / oc_split},
+                    {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
                 : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_},
-                  {outer_k * oc_ / oc_split, oc_ / oc_split}});
+                    {outer_k * oc_ / oc_split, oc_ / oc_split}});
           }
         }
 
@@ -2782,22 +2788,22 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_no_padding_nested(
           create_fusion_anchor(fusion, owner_->get_outputs()[0],
             blocking_output_
               ? slice_range {{pbs, 1UL},
-                {outer_k * oc_ / im_oc_block / oc_split,
-                  oc_ / im_oc_block / oc_split},
-                {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
+                  {outer_k * oc_ / im_oc_block / oc_split,
+                    oc_ / im_oc_block / oc_split},
+                  {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
               : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_},
-                {outer_k * oc_ / oc_split, oc_ / oc_split}});
+                  {outer_k * oc_ / oc_split, oc_ / oc_split}});
         }
       }
 
       create_fusion_anchor(fusion, owner_->get_outputs()[0],
         blocking_output_
           ? slice_range {{pbs, 1UL},
-            {outer_k * oc_ / im_oc_block / oc_split,
-              oc_ / im_oc_block / oc_split},
-            {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
+              {outer_k * oc_ / im_oc_block / oc_split,
+                oc_ / im_oc_block / oc_split},
+              {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
           : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_},
-            {outer_k * oc_ / oc_split, oc_ / oc_split}});
+              {outer_k * oc_ / oc_split, oc_ / oc_split}});
     }
   }
   bind_output_loop_axis(lpbs, "N");
@@ -2961,21 +2967,21 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_nested(
                                               std::vector<expr> input_pos
                                                 = blocking_input_
                                                 ? std::vector<expr> {n, ic,
-                                                  (h + im_h_i) * sh_ + r,
-                                                  w * sw_ + s, 0}
+                                                    (h + im_h_i) * sh_ + r,
+                                                    w * sw_ + s, 0}
                                                 : std::vector<expr> {n,
-                                                  (h + im_h_i) * sh_ + r,
-                                                  w * sw_ + s,
-                                                  ic * im_ic_block};
+                                                    (h + im_h_i) * sh_ + r,
+                                                    w * sw_ + s,
+                                                    ic * im_ic_block};
 
                                               A_list[idx]
                                                 = tensor_ptr(input, input_pos);
                                               B_list[idx] = tensor_ptr(weight,
                                                 kpack > 1
                                                   ? std::vector<expr> {oc, ic,
-                                                    r, s, 0, 0, 0}
+                                                      r, s, 0, 0, 0}
                                                   : std::vector<expr> {
-                                                    oc, ic, r, s, 0, 0});
+                                                      oc, ic, r, s, 0, 0});
                                             }
                                           }
                                         }
@@ -3005,9 +3011,9 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_nested(
                                       std::vector<expr> output_pos
                                         = blocking_output_
                                         ? std::vector<expr> {pic * mb_ + n, oc,
-                                          h + im_h_i, w, 0}
+                                            h + im_h_i, w, 0}
                                         : std::vector<expr> {pic * mb_ + n,
-                                          h + im_h_i, w, oc * im_oc_block};
+                                            h + im_h_i, w, oc * im_oc_block};
 
                                       if (ic_num_block_pt > 1) {
                                         _if_(o_ic == 0) {
@@ -3059,14 +3065,14 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_nested(
                                             owner_->get_outputs()[0],
                                             blocking_output_
                                               ? slice_range {{n, 1UL}, {oc, 1},
-                                                {h + im_h_i, 1},
-                                                {w, im_w_block},
-                                                {0, im_oc_block}}
+                                                  {h + im_h_i, 1},
+                                                  {w, im_w_block},
+                                                  {0, im_oc_block}}
                                               : slice_range {{n, 1UL},
-                                                {h + im_h_i, 1},
-                                                {w, im_w_block},
-                                                {oc * im_oc_block,
-                                                  im_oc_block}});
+                                                  {h + im_h_i, 1},
+                                                  {w, im_w_block},
+                                                  {oc * im_oc_block,
+                                                    im_oc_block}});
                                         }
                                       }
                                     }
@@ -3078,11 +3084,11 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_nested(
                                         owner_->get_outputs()[0],
                                         blocking_output_
                                           ? slice_range {{n, 1UL}, {oc, 1},
-                                            {h, im_h_block}, {w, im_w_block},
-                                            {0, im_oc_block}}
+                                              {h, im_h_block}, {w, im_w_block},
+                                              {0, im_oc_block}}
                                           : slice_range {{n, 1UL},
-                                            {h, im_h_block}, {w, im_w_block},
-                                            {oc * im_oc_block, im_oc_block}});
+                                              {h, im_h_block}, {w, im_w_block},
+                                              {oc * im_oc_block, im_oc_block}});
                                     }
                                   }
                                 }
@@ -3099,11 +3105,11 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_nested(
                                     owner_->get_outputs()[0],
                                     blocking_output_
                                       ? slice_range {{n, 1UL}, {anch_c, 1},
-                                        {h, im_h_block}, {w, im_w_block},
-                                        {0, im_oc_block}}
+                                          {h, im_h_block}, {w, im_w_block},
+                                          {0, im_oc_block}}
                                       : slice_range {{n, 1UL}, {h, im_h_block},
-                                        {w, im_w_block},
-                                        {anch_c * im_oc_block, oc_block}});
+                                          {w, im_w_block},
+                                          {anch_c * im_oc_block, oc_block}});
                                 }
                               }
                             }
@@ -3125,11 +3131,11 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_nested(
                                 owner_->get_outputs()[0],
                                 blocking_output_
                                   ? slice_range {{n, 1UL}, {anch_c, 1},
-                                    {h, im_h_block}, {anch_w, w_block},
-                                    {0, im_oc_block}}
+                                      {h, im_h_block}, {anch_w, w_block},
+                                      {0, im_oc_block}}
                                   : slice_range {{n, 1UL}, {h, im_h_block},
-                                    {anch_w, w_block},
-                                    {anch_c * im_oc_block, oc_block}});
+                                      {anch_w, w_block},
+                                      {anch_c * im_oc_block, oc_block}});
                             }
                           }
                         }
@@ -3155,11 +3161,11 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_nested(
                               owner_->get_outputs()[0],
                               blocking_output_
                                 ? slice_range {{n, 1UL}, {anch_c, 1},
-                                  {anch_h, h_block}, {anch_w, w_block},
-                                  {0, im_oc_block}}
+                                    {anch_h, h_block}, {anch_w, w_block},
+                                    {0, im_oc_block}}
                                 : slice_range {{n, 1UL}, {anch_h, h_block},
-                                  {anch_w, w_block},
-                                  {anch_c * im_oc_block, oc_block}});
+                                    {anch_w, w_block},
+                                    {anch_c * im_oc_block, oc_block}});
                           }
                         }
                       }
@@ -3175,53 +3181,54 @@ void gen_nested_conv_fwd_t::compute_conv_no_padding_nested(
                 create_fusion_anchor(fusion, owner_->get_outputs()[0],
                   blocking_output_
                     ? slice_range {{pbs, 1UL},
-                      {outer_k * oc_ / im_oc_block / oc_split,
-                        oc_ / im_oc_block / oc_split},
-                      {0, oh_}, {0, ow_}, {0, im_oc_block}}
+                        {outer_k * oc_ / im_oc_block / oc_split,
+                          oc_ / im_oc_block / oc_split},
+                        {0, oh_}, {0, ow_}, {0, im_oc_block}}
                     : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_},
-                      {outer_k * oc_ / oc_split, oc_ / oc_split}});
+                        {outer_k * oc_ / oc_split, oc_ / oc_split}});
               }
             }
 
             if (oc_threads == 1 && h_threads == 1 && w_threads == 1) {
               create_fusion_anchor(fusion, owner_->get_outputs()[0],
-                blocking_output_ ? slice_range {{pbs, 1UL},
-                  {outer_k * oc_ / im_oc_block / oc_split,
-                    oc_ / im_oc_block / oc_split},
-                  {0, oh_}, {0, ow_}, {0, im_oc_block}}
-                                 : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_},
-                                   {outer_k * oc_ / oc_split, oc_ / oc_split}});
+                blocking_output_
+                  ? slice_range {{pbs, 1UL},
+                      {outer_k * oc_ / im_oc_block / oc_split,
+                        oc_ / im_oc_block / oc_split},
+                      {0, oh_}, {0, ow_}, {0, im_oc_block}}
+                  : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_},
+                      {outer_k * oc_ / oc_split, oc_ / oc_split}});
             }
           }
           if (h_threads == 1 && w_threads == 1) {
             create_fusion_anchor(fusion, owner_->get_outputs()[0],
               blocking_output_ ? slice_range {{pbs, 1UL},
-                {outer_k * oc_ / im_oc_block / oc_split,
-                  oc_ / im_oc_block / oc_split},
-                {0, oh_}, {0, ow_}, {0, im_oc_block}}
+                                   {outer_k * oc_ / im_oc_block / oc_split,
+                                     oc_ / im_oc_block / oc_split},
+                                   {0, oh_}, {0, ow_}, {0, im_oc_block}}
                                : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_},
-                                 {outer_k * oc_ / oc_split, oc_ / oc_split}});
+                                   {outer_k * oc_ / oc_split, oc_ / oc_split}});
           }
         }
 
         if (h_threads == 1) {
           create_fusion_anchor(fusion, owner_->get_outputs()[0],
             blocking_output_ ? slice_range {{pbs, 1UL},
-              {outer_k * oc_ / im_oc_block / oc_split,
-                oc_ / im_oc_block / oc_split},
-              {0, oh_}, {0, ow_}, {0, im_oc_block}}
+                                 {outer_k * oc_ / im_oc_block / oc_split,
+                                   oc_ / im_oc_block / oc_split},
+                                 {0, oh_}, {0, ow_}, {0, im_oc_block}}
                              : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_},
-                               {outer_k * oc_ / oc_split, oc_ / oc_split}});
+                                 {outer_k * oc_ / oc_split, oc_ / oc_split}});
         }
       }
       if (mb_ > 1) {
         create_fusion_anchor(fusion, owner_->get_outputs()[0],
           blocking_output_ ? slice_range {{pbs, 1UL},
-            {outer_k * oc_ / im_oc_block / oc_split,
-              oc_ / im_oc_block / oc_split},
-            {0, oh_}, {0, ow_}, {0, im_oc_block}}
+                               {outer_k * oc_ / im_oc_block / oc_split,
+                                 oc_ / im_oc_block / oc_split},
+                               {0, oh_}, {0, ow_}, {0, im_oc_block}}
                            : slice_range {{pbs, 1UL}, {0, oh_}, {0, ow_},
-                             {outer_k * oc_ / oc_split, oc_ / oc_split}});
+                               {outer_k * oc_ / oc_split, oc_ / oc_split}});
       }
     }
   }
@@ -3308,9 +3315,9 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output,
                         _if_(h + im_h_i < oh_) {
                           std::vector<expr> output_pos = blocking_output_
                             ? std::vector<expr> {pic * mb_ + n, oc, h + im_h_i,
-                              w, 0}
+                                w, 0}
                             : std::vector<expr> {
-                              pic * mb_ + n, h + im_h_i, w, oc * im_oc_block};
+                                pic * mb_ + n, h + im_h_i, w, oc * im_oc_block};
 
                           if (ic_num_block_pt > 1) {
                             _if_(o_ic == 0) {
@@ -3410,18 +3417,18 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output,
                                             lanes)]
                                             = input[blocking_input_
                                                 ? span_t(
-                                                  {n, ic,
-                                                    (h + im_h_i) * sh_ + i
-                                                      - ph_b_,
-                                                    w * sw_ + j - pw_b_, k},
-                                                  lanes)
+                                                    {n, ic,
+                                                      (h + im_h_i) * sh_ + i
+                                                        - ph_b_,
+                                                      w * sw_ + j - pw_b_, k},
+                                                    lanes)
                                                 : span_t(
-                                                  {n,
-                                                    (h + im_h_i) * sh_ + i
-                                                      - ph_b_,
-                                                    w * sw_ + j - pw_b_,
-                                                    ic * im_ic_block + k},
-                                                  lanes)];
+                                                    {n,
+                                                      (h + im_h_i) * sh_ + i
+                                                        - ph_b_,
+                                                      w * sw_ + j - pw_b_,
+                                                      ic * im_ic_block + k},
+                                                    lanes)];
                                         }
                                       }
                                     }
@@ -3471,18 +3478,18 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output,
                                             lanes)]
                                             = input[blocking_input_
                                                 ? span_t(
-                                                  {n, ic,
-                                                    (h + im_h_i) * sh_ + i
-                                                      - ph_b_,
-                                                    w * sw_ + j - pw_b_, k},
-                                                  lanes)
+                                                    {n, ic,
+                                                      (h + im_h_i) * sh_ + i
+                                                        - ph_b_,
+                                                      w * sw_ + j - pw_b_, k},
+                                                    lanes)
                                                 : span_t(
-                                                  {n,
-                                                    (h + im_h_i) * sh_ + i
-                                                      - ph_b_,
-                                                    w * sw_ + j - pw_b_,
-                                                    ic * im_ic_block + k},
-                                                  lanes)];
+                                                    {n,
+                                                      (h + im_h_i) * sh_ + i
+                                                        - ph_b_,
+                                                      w * sw_ + j - pw_b_,
+                                                      ic * im_ic_block + k},
+                                                    lanes)];
                                         }
                                       }
 
@@ -3520,12 +3527,12 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output,
                                         A_list[idx] = tensor_ptr(input,
                                           blocking_input_
                                             ? std::vector<expr> {n, ic,
-                                              (h + im_h_i) * sh_ + r - ph_b_,
-                                              w * sw_ + s - pw_b_, 0}
+                                                (h + im_h_i) * sh_ + r - ph_b_,
+                                                w * sw_ + s - pw_b_, 0}
                                             : std::vector<expr> {n,
-                                              (h + im_h_i) * sh_ + r - ph_b_,
-                                              w * sw_ + s - pw_b_,
-                                              ic * im_ic_block});
+                                                (h + im_h_i) * sh_ + r - ph_b_,
+                                                w * sw_ + s - pw_b_,
+                                                ic * im_ic_block});
                                       }
                                     }
                                   }
@@ -3548,18 +3555,18 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output,
                                             lanes)]
                                             = input[blocking_input_
                                                 ? span_t(
-                                                  {n, ic,
-                                                    (h + im_h_i) * sh_ + i
-                                                      - ph_b_,
-                                                    w * sw_ + j - pw_b_, k},
-                                                  lanes)
+                                                    {n, ic,
+                                                      (h + im_h_i) * sh_ + i
+                                                        - ph_b_,
+                                                      w * sw_ + j - pw_b_, k},
+                                                    lanes)
                                                 : span_t(
-                                                  {n,
-                                                    (h + im_h_i) * sh_ + i
-                                                      - ph_b_,
-                                                    w * sw_ + j - pw_b_,
-                                                    ic * im_ic_block + k},
-                                                  lanes)];
+                                                    {n,
+                                                      (h + im_h_i) * sh_ + i
+                                                        - ph_b_,
+                                                      w * sw_ + j - pw_b_,
+                                                      ic * im_ic_block + k},
+                                                    lanes)];
                                         }
                                       }
                                       builtin::brgemm_init(
@@ -3600,7 +3607,7 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output,
                                   B_list[idx] = tensor_ptr(weight,
                                     kpack > 1
                                       ? std::vector<expr> {oc, ic, r, s, 0, 0,
-                                        0}
+                                          0}
                                       : std::vector<expr> {oc, ic, r, s, 0, 0});
                                 }
                               }
@@ -3635,11 +3642,11 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output,
                                 owner_->get_outputs()[0],
                                 blocking_output_
                                   ? slice_range {{n, 1UL}, {oc, 1},
-                                    {h + im_h_i, 1}, {w, im_w_block},
-                                    {0, im_oc_block}}
+                                      {h + im_h_i, 1}, {w, im_w_block},
+                                      {0, im_oc_block}}
                                   : slice_range {{n, 1UL}, {h + im_h_i, 1},
-                                    {w, im_w_block},
-                                    {oc * im_oc_block, im_oc_block}});
+                                      {w, im_w_block},
+                                      {oc * im_oc_block, im_oc_block}});
                             }
                           }
                         } // im_h_i
@@ -3649,10 +3656,10 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output,
                           create_fusion_anchor(fusion, owner_->get_outputs()[0],
                             blocking_output_
                               ? slice_range {{n, 1UL}, {oc, 1}, {h, im_h_block},
-                                {w, im_w_block}, {0, im_oc_block}}
+                                  {w, im_w_block}, {0, im_oc_block}}
                               : slice_range {{n, 1UL}, {h, im_h_block},
-                                {w, im_w_block},
-                                {oc * im_oc_block, im_oc_block}});
+                                  {w, im_w_block},
+                                  {oc * im_oc_block, im_oc_block}});
                         }
                       }
                     } // i_w
@@ -3665,11 +3672,12 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output,
                                       + o_w * w_block / im_w_block)
                         * im_w_block;
                       create_fusion_anchor(fusion, owner_->get_outputs()[0],
-                        blocking_output_ ? slice_range {{n, 1UL}, {oc, 1},
-                          {h, im_h_block}, {anch_w, w_block}, {0, im_oc_block}}
-                                         : slice_range {{n, 1UL},
-                                           {h, im_h_block}, {anch_w, w_block},
-                                           {oc * im_oc_block, im_oc_block}});
+                        blocking_output_
+                          ? slice_range {{n, 1UL}, {oc, 1}, {h, im_h_block},
+                              {anch_w, w_block}, {0, im_oc_block}}
+                          : slice_range {{n, 1UL}, {h, im_h_block},
+                              {anch_w, w_block},
+                              {oc * im_oc_block, im_oc_block}});
                     }
                   }
                 } // i_h
@@ -3687,9 +3695,10 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output,
                     create_fusion_anchor(fusion, owner_->get_outputs()[0],
                       blocking_output_
                         ? slice_range {{n, 1UL}, {oc, 1}, {anch_h, h_block},
-                          {anch_w, w_block}, {0, im_oc_block}}
+                            {anch_w, w_block}, {0, im_oc_block}}
                         : slice_range {{n, 1UL}, {anch_h, h_block},
-                          {anch_w, w_block}, {oc * im_oc_block, im_oc_block}});
+                            {anch_w, w_block},
+                            {oc * im_oc_block, im_oc_block}});
                   }
                 }
               } // ioc
@@ -3711,9 +3720,10 @@ void gen_nested_conv_fwd_t::single_thread_conv_padding_call(expr &output,
                 create_fusion_anchor(fusion, owner_->get_outputs()[0],
                   blocking_output_
                     ? slice_range {{n, 1UL}, {anch_oc, 1}, {anch_h, h_block},
-                      {anch_w, w_block}, {0, im_oc_block}}
+                        {anch_w, w_block}, {0, im_oc_block}}
                     : slice_range {{n, 1UL}, {anch_h, h_block},
-                      {anch_w, w_block}, {anch_oc * im_oc_block, im_oc_block}});
+                        {anch_w, w_block},
+                        {anch_oc * im_oc_block, im_oc_block}});
               }
             } // o_ic
           }
@@ -3810,9 +3820,9 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call(
                           _var_(copy_width, datatypes::index);
                           std::vector<expr> output_pos = blocking_output_
                             ? std::vector<expr> {pic * mb_ + n, oc, h + im_h_i,
-                              w, 0}
+                                w, 0}
                             : std::vector<expr> {
-                              pic * mb_ + n, h + im_h_i, w, oc * im_oc_block};
+                                pic * mb_ + n, h + im_h_i, w, oc * im_oc_block};
 
                           if (ic_num_block_pt > 1) {
                             _if_(o_ic == 0) {
@@ -3914,18 +3924,18 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call(
                                             lanes)]
                                             = input[blocking_input_
                                                 ? span_t(
-                                                  {n, ic,
-                                                    (h + im_h_i) * sh_ + i
-                                                      - ph_b_,
-                                                    w * sw_ + j - pw_b_, k},
-                                                  lanes)
+                                                    {n, ic,
+                                                      (h + im_h_i) * sh_ + i
+                                                        - ph_b_,
+                                                      w * sw_ + j - pw_b_, k},
+                                                    lanes)
                                                 : span_t(
-                                                  {n,
-                                                    (h + im_h_i) * sh_ + i
-                                                      - ph_b_,
-                                                    w * sw_ + j - pw_b_,
-                                                    ic * im_ic_block + k},
-                                                  lanes)];
+                                                    {n,
+                                                      (h + im_h_i) * sh_ + i
+                                                        - ph_b_,
+                                                      w * sw_ + j - pw_b_,
+                                                      ic * im_ic_block + k},
+                                                    lanes)];
                                         }
                                       }
                                     }
@@ -3975,18 +3985,18 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call(
                                             lanes)]
                                             = input[blocking_input_
                                                 ? span_t(
-                                                  {n, ic,
-                                                    (h + im_h_i) * sh_ + i
-                                                      - ph_b_,
-                                                    w * sw_ + j - pw_b_, k},
-                                                  lanes)
+                                                    {n, ic,
+                                                      (h + im_h_i) * sh_ + i
+                                                        - ph_b_,
+                                                      w * sw_ + j - pw_b_, k},
+                                                    lanes)
                                                 : span_t(
-                                                  {n,
-                                                    (h + im_h_i) * sh_ + i
-                                                      - ph_b_,
-                                                    w * sw_ + j - pw_b_,
-                                                    ic * im_ic_block + k},
-                                                  lanes)];
+                                                    {n,
+                                                      (h + im_h_i) * sh_ + i
+                                                        - ph_b_,
+                                                      w * sw_ + j - pw_b_,
+                                                      ic * im_ic_block + k},
+                                                    lanes)];
                                         }
                                       }
 
@@ -4025,12 +4035,12 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call(
                                         A_list[idx] = tensor_ptr(input,
                                           blocking_input_
                                             ? std::vector<expr> {n, ic,
-                                              (h + im_h_i) * sh_ + r - ph_b_,
-                                              w * sw_ + s - pw_b_, 0}
+                                                (h + im_h_i) * sh_ + r - ph_b_,
+                                                w * sw_ + s - pw_b_, 0}
                                             : std::vector<expr> {n,
-                                              (h + im_h_i) * sh_ + r - ph_b_,
-                                              w * sw_ + s - pw_b_,
-                                              ic * im_ic_block});
+                                                (h + im_h_i) * sh_ + r - ph_b_,
+                                                w * sw_ + s - pw_b_,
+                                                ic * im_ic_block});
                                       }
                                     }
                                   }
@@ -4054,18 +4064,18 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call(
                                             lanes)]
                                             = input[blocking_input_
                                                 ? span_t(
-                                                  {n, ic,
-                                                    (h + im_h_i) * sh_ + i
-                                                      - ph_b_,
-                                                    w * sw_ + j - pw_b_, k},
-                                                  lanes)
+                                                    {n, ic,
+                                                      (h + im_h_i) * sh_ + i
+                                                        - ph_b_,
+                                                      w * sw_ + j - pw_b_, k},
+                                                    lanes)
                                                 : span_t(
-                                                  {n,
-                                                    (h + im_h_i) * sh_ + i
-                                                      - ph_b_,
-                                                    w * sw_ + j - pw_b_,
-                                                    ic * im_ic_block + k},
-                                                  lanes)];
+                                                    {n,
+                                                      (h + im_h_i) * sh_ + i
+                                                        - ph_b_,
+                                                      w * sw_ + j - pw_b_,
+                                                      ic * im_ic_block + k},
+                                                    lanes)];
                                         }
                                       }
                                       builtin::brgemm_init(
@@ -4106,7 +4116,7 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call(
                                   B_list[idx] = tensor_ptr(weight,
                                     kpack > 1
                                       ? std::vector<expr> {oc, ic, r, s, 0, 0,
-                                        0}
+                                          0}
                                       : std::vector<expr> {oc, ic, r, s, 0, 0});
                                 }
                               }
@@ -4132,11 +4142,11 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call(
                                 owner_->get_outputs()[0],
                                 blocking_output_
                                   ? slice_range {{n, 1}, {oc, 1},
-                                    {h + im_h_i, 1}, {w, real_im_w_block},
-                                    {0, im_oc_block}}
+                                      {h + im_h_i, 1}, {w, real_im_w_block},
+                                      {0, im_oc_block}}
                                   : slice_range {{n, 1UL}, {h + im_h_i, 1},
-                                    {w, real_im_w_block},
-                                    {oc * im_oc_block, im_oc_block}});
+                                      {w, real_im_w_block},
+                                      {oc * im_oc_block, im_oc_block}});
                             }
                           } // im_h_i
                         }
@@ -4146,11 +4156,11 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call(
                           create_fusion_anchor(fusion, owner_->get_outputs()[0],
                             blocking_output_
                               ? slice_range {{n, 1UL}, {oc, 1},
-                                {h, real_im_h_block}, {w, real_im_w_block},
-                                {0, im_oc_block}}
+                                  {h, real_im_h_block}, {w, real_im_w_block},
+                                  {0, im_oc_block}}
                               : slice_range {{n, 1UL}, {h, real_im_h_block},
-                                {w, real_im_w_block},
-                                {oc * im_oc_block, im_oc_block}});
+                                  {w, real_im_w_block},
+                                  {oc * im_oc_block, im_oc_block}});
                         }
                       } // i_w
                     }
@@ -4167,11 +4177,11 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call(
                       create_fusion_anchor(fusion, owner_->get_outputs()[0],
                         blocking_output_
                           ? slice_range {{n, 1UL}, {oc, 1},
-                            {h, real_im_h_block}, {anch_w, w_block},
-                            {0, im_oc_block}}
+                              {h, real_im_h_block}, {anch_w, w_block},
+                              {0, im_oc_block}}
                           : slice_range {{n, 1UL}, {h, real_im_h_block},
-                            {anch_w, w_block},
-                            {oc * im_oc_block, im_oc_block}});
+                              {anch_w, w_block},
+                              {oc * im_oc_block, im_oc_block}});
                     }
                   }
                 } // i_h
@@ -4188,9 +4198,10 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call(
                     create_fusion_anchor(fusion, owner_->get_outputs()[0],
                       blocking_output_
                         ? slice_range {{n, 1UL}, {oc, 1}, {anch_h, h_block},
-                          {anch_w, w_block}, {0, im_oc_block}}
+                            {anch_w, w_block}, {0, im_oc_block}}
                         : slice_range {{n, 1UL}, {anch_h, h_block},
-                          {anch_w, w_block}, {oc * im_oc_block, im_oc_block}});
+                            {anch_w, w_block},
+                            {oc * im_oc_block, im_oc_block}});
                   }
                 } // i_oc
               }
@@ -4212,9 +4223,10 @@ void gen_nested_conv_fwd_t::single_thread_dynamic_conv_padding_call(
                 create_fusion_anchor(fusion, owner_->get_outputs()[0],
                   blocking_output_
                     ? slice_range {{n, 1UL}, {anch_oc, 1}, {anch_h, h_block},
-                      {anch_w, w_block}, {0, im_oc_block}}
+                        {anch_w, w_block}, {0, im_oc_block}}
                     : slice_range {{n, 1UL}, {anch_h, h_block},
-                      {anch_w, w_block}, {anch_oc * im_oc_block, im_oc_block}});
+                        {anch_w, w_block},
+                        {anch_oc * im_oc_block, im_oc_block}});
               }
             }
           }
@@ -4358,7 +4370,7 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_padding_nested(
   auto L2_cache_size = ctx->machine_.cpu_flags_.getDCacheSize(2);
   int oc_split = (oc_threads == 1 && oc_num_block_pt == 1)
     ? get_oc_split_factor(
-      -1, weight_size, L2_cache_size, oc_block / im_oc_block)
+        -1, weight_size, L2_cache_size, oc_block / im_oc_block)
     : 1;
 
   // create a global shared zero-buffer referenced by padding
@@ -4410,11 +4422,11 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_padding_nested(
                 create_fusion_anchor(fusion, owner_->get_outputs()[0],
                   blocking_output_
                     ? slice_range {{pbs, 1UL},
-                      {outer_k * oc_ / im_oc_block / oc_split,
-                        oc_ / im_oc_block / oc_split},
-                      {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
+                        {outer_k * oc_ / im_oc_block / oc_split,
+                          oc_ / im_oc_block / oc_split},
+                        {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
                     : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_},
-                      {outer_k * oc_ / oc_split, oc_ / oc_split}});
+                        {outer_k * oc_ / oc_split, oc_ / oc_split}});
               }
             }
 
@@ -4422,22 +4434,22 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_padding_nested(
               create_fusion_anchor(fusion, owner_->get_outputs()[0],
                 blocking_output_
                   ? slice_range {{pbs, 1UL},
-                    {outer_k * oc_ / im_oc_block / oc_split,
-                      oc_ / im_oc_block / oc_split},
-                    {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
+                      {outer_k * oc_ / im_oc_block / oc_split,
+                        oc_ / im_oc_block / oc_split},
+                      {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
                   : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_},
-                    {outer_k * oc_ / oc_split, oc_ / oc_split}});
+                      {outer_k * oc_ / oc_split, oc_ / oc_split}});
             }
           }
           if (h_threads == 1 && w_threads == 1) {
             create_fusion_anchor(fusion, owner_->get_outputs()[0],
               blocking_output_
                 ? slice_range {{pbs, 1UL},
-                  {outer_k * oc_ / im_oc_block / oc_split,
-                    oc_ / im_oc_block / oc_split},
-                  {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
+                    {outer_k * oc_ / im_oc_block / oc_split,
+                      oc_ / im_oc_block / oc_split},
+                    {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
                 : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_},
-                  {outer_k * oc_ / oc_split, oc_ / oc_split}});
+                    {outer_k * oc_ / oc_split, oc_ / oc_split}});
           }
         }
 
@@ -4445,21 +4457,21 @@ void gen_nested_conv_fwd_t::dynamic_compute_conv_padding_nested(
           create_fusion_anchor(fusion, owner_->get_outputs()[0],
             blocking_output_
               ? slice_range {{pbs, 1UL},
-                {outer_k * oc_ / im_oc_block / oc_split,
-                  oc_ / im_oc_block / oc_split},
-                {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
+                  {outer_k * oc_ / im_oc_block / oc_split,
+                    oc_ / im_oc_block / oc_split},
+                  {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
               : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_},
-                {outer_k * oc_ / oc_split, oc_ / oc_split}});
+                  {outer_k * oc_ / oc_split, oc_ / oc_split}});
         }
       }
       create_fusion_anchor(fusion, owner_->get_outputs()[0],
         blocking_output_
           ? slice_range {{pbs, 1UL},
-            {outer_k * oc_ / im_oc_block / oc_split,
-              oc_ / im_oc_block / oc_split},
-            {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
+              {outer_k * oc_ / im_oc_block / oc_split,
+                oc_ / im_oc_block / oc_split},
+              {0, oh_expr_}, {0, ow_expr_}, {0, im_oc_block}}
           : slice_range {{pbs, 1UL}, {0, oh_expr_}, {0, ow_expr_},
-            {outer_k * oc_ / oc_split, oc_ / oc_split}});
+              {outer_k * oc_ / oc_split, oc_ / oc_split}});
     }
   }
   bind_output_loop_axis(lpbs, "N");
diff --git a/legacy/core/src/runtime/microkernel/cpu/brgemm_onednn.cpp b/legacy/core/src/runtime/microkernel/cpu/brgemm_onednn.cpp
index edc8f909..49902ad5 100644
--- a/legacy/core/src/runtime/microkernel/cpu/brgemm_onednn.cpp
+++ b/legacy/core/src/runtime/microkernel/cpu/brgemm_onednn.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2020-2024 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 
 #include "brgemm_common.hpp"
 #include "brgemm_range_handle.hpp"
@@ -880,7 +881,7 @@ SC_API void dnnl_brgemm_call(brgemm_kernel_info *brg_desc, const void *A,
         new brgemm_batch_element_t[num]);
     brgemm_batch_element_t *batch = batch_v.get();
 #else
-    brgemm_batch_element_t batch[num];       // NOLINT
+    brgemm_batch_element_t batch[num]; // NOLINT
 #endif
 #endif
     if (top_pad) {
@@ -937,7 +938,7 @@ SC_API void dnnl_brgemm_call_postops(brgemm_kernel_info *brg_desc,
         new brgemm_batch_element_t[num]);
     brgemm_batch_element_t *batch = batch_v.get();
 #else
-    brgemm_batch_element_t batch[num];       // NOLINT
+    brgemm_batch_element_t batch[num]; // NOLINT
 #endif
 #endif
     if (top_pad) {
@@ -1000,7 +1001,7 @@ SC_API void dnnl_brgemm_list_call(brgemm_kernel_info *brg_desc,
       new brgemm_batch_element_t[batch_num]);
   brgemm_batch_element_t *batch = batch_v.get();
 #else
-  brgemm_batch_element_t batch[batch_num];   // NOLINT
+  brgemm_batch_element_t batch[batch_num]; // NOLINT
 #endif
 #endif
 
@@ -1066,7 +1067,7 @@ SC_API void dnnl_brgemm_list_call_postops(
       new brgemm_batch_element_t[batch_num]);
   brgemm_batch_element_t *batch = batch_v.get();
 #else
-  brgemm_batch_element_t batch[batch_num];   // NOLINT
+  brgemm_batch_element_t batch[batch_num]; // NOLINT
 #endif
 #endif
 
@@ -1292,7 +1293,7 @@ static int dnnl_brgemm_list_update_func(
       new brgemm_batch_element_t[batch_num]);
   brgemm_batch_element_t *batch = batch_v.get();
 #else
-  brgemm_batch_element_t batch[batch_num];   // NOLINT
+  brgemm_batch_element_t batch[batch_num]; // NOLINT
 #endif
 #endif
   int sizeofA = get_dtype_sizeof(dtypeA);
diff --git a/legacy/core/src/util/reflection.cpp b/legacy/core/src/util/reflection.cpp
index c588372f..6ad4b3eb 100644
--- a/legacy/core/src/util/reflection.cpp
+++ b/legacy/core/src/util/reflection.cpp
@@ -1,18 +1,19 @@
-/*******************************************************************************
- * Copyright 2020-2023 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
 #include "reflection.hpp"
 #include <common/compiler_workarounds.hpp>
 #include <compiler/ir/sc_data_type.hpp>
@@ -411,7 +412,7 @@ bool visitor_t::dispatch(general_ref_t *v, general_ref_t *v2) {
   } else if (v->type_.base_ == reflection::basic_type::t_class) {
     return visit_class(v, v2);
   } else {
-// clang-format off
+    // clang-format off
 #define PUT_VALUE(TYPE) \
     case basic_type::t_##TYPE: return visit(reinterpret_cast<TYPE *>(v->data_),  v2 ? reinterpret_cast<TYPE *>(v2->data_) : nullptr);  break; // NOLINT
     // clang-format on
diff --git a/legacy/core/src/util/variant.hpp b/legacy/core/src/util/variant.hpp
index 752bcf0e..ee804b6a 100644
--- a/legacy/core/src/util/variant.hpp
+++ b/legacy/core/src/util/variant.hpp
@@ -1,18 +1,20 @@
-/*******************************************************************************
- * Copyright 2022-2023 Intel Corporation
+/*
+ * Copyright (C) 2025 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *******************************************************************************/
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
 #ifndef GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_UTIL_VARIANT_HPP
 #define GRAPH_BACKEND_GRAPH_COMPILER_CORE_SRC_UTIL_VARIANT_HPP
 #include <stdexcept>
@@ -35,7 +37,9 @@ struct const_max<arg1, arg2, args...> {
                                       : const_max<arg2, args...>::value;
 };
 
-template <size_t v> struct const_max<v> { static constexpr size_t value = v; };
+template <size_t v> struct const_max<v> {
+  static constexpr size_t value = v;
+};
 
 template <typename... Args> struct helper;
 
diff --git a/lib/gc/Dialect/Microkernel/MicrokernelOps.cpp b/lib/gc/Dialect/Microkernel/MicrokernelOps.cpp
index 9dc940fc..785a5bc0 100644
--- a/lib/gc/Dialect/Microkernel/MicrokernelOps.cpp
+++ b/lib/gc/Dialect/Microkernel/MicrokernelOps.cpp
@@ -612,8 +612,8 @@ LogicalResult BrgemmExecuteOp::verify() {
   // inputs for BRGEMM: kernel id, A memref, B memref, C memref, batch_size,
   // addr_len
   if (inputs.size() != 6)
-    return brgemmOp.emitOpError() << "expect 6"
-                                  << " inputs but got " << inputs.size();
+    return brgemmOp.emitOpError()
+           << "expect 6" << " inputs but got " << inputs.size();
   // Verify the dispatch to be an i64.
   Value dispatch = brgemmOp.getDispatch();
   if (!dispatch.getType().isInteger(64))
diff --git a/lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp b/lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp
index f9d0663c..2c48c214 100644
--- a/lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp
+++ b/lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp
@@ -128,10 +128,9 @@ struct Kernel {
 
   explicit Kernel(cl_program program, cl_kernel kernel, const size_t *gridSize,
                   const size_t *blockSize, size_t argNum, const size_t *argSize)
-      : program(program),
-        kernel(kernel), globalSize{gridSize[0] * blockSize[0],
-                                   gridSize[1] * blockSize[1],
-                                   gridSize[2] * blockSize[2]},
+      : program(program), kernel(kernel),
+        globalSize{gridSize[0] * blockSize[0], gridSize[1] * blockSize[1],
+                   gridSize[2] * blockSize[2]},
         localSize{blockSize[0], blockSize[1], blockSize[2]},
         argSize(argSize, argSize + argNum) {
 #ifndef NDEBUG
diff --git a/lib/gc/Transforms/Microkernel/ConvertLinalgToMicrokernel.cpp b/lib/gc/Transforms/Microkernel/ConvertLinalgToMicrokernel.cpp
index 749ed807..0eabd6e1 100644
--- a/lib/gc/Transforms/Microkernel/ConvertLinalgToMicrokernel.cpp
+++ b/lib/gc/Transforms/Microkernel/ConvertLinalgToMicrokernel.cpp
@@ -175,8 +175,7 @@ static FailureOr<BrgemmDims> inferBrgemmDims(linalg::LinalgOp linalgOp) {
     }
   }
 
-  LLVM_DEBUG(llvm::dbgs() << "[inferBrgemmDims] Candidate dims: "
-                          << "\n");
+  LLVM_DEBUG(llvm::dbgs() << "[inferBrgemmDims] Candidate dims: " << "\n");
   LLVM_DEBUG(llvm::dbgs() << "[inferBrgemmDims] m pos in affine: " << mAffinePos
                           << "\n");
   LLVM_DEBUG(llvm::dbgs() << "[inferBrgemmDims] n pos in affine: " << nAffinePos