From e61f59600b505e758b38c09d3c02abe19868793c Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Wed, 15 Jan 2025 11:36:13 -0800
Subject: [PATCH] src: introduce quant_entry_t and refactor arg_scales_t to
 rely on it

---
 src/common/binary.cpp                         |  17 +-
 src/common/binary_pd.hpp                      |  13 +-
 src/common/concat.cpp                         |  22 +-
 src/common/convolution.cpp                    |  16 +-
 src/common/convolution_pd.hpp                 |   4 +-
 src/common/deconvolution.cpp                  |  19 +-
 src/common/deconvolution_pd.hpp               |   6 +-
 src/common/group_normalization.cpp            |  18 +-
 src/common/group_normalization_pd.hpp         |  12 +-
 src/common/inner_product.cpp                  |  17 +-
 src/common/inner_product_pd.hpp               |   6 +-
 src/common/layer_normalization.cpp            |  18 +-
 src/common/layer_normalization_pd.hpp         |  18 +-
 src/common/matmul.cpp                         | 105 ++++---
 src/common/matmul_pd.hpp                      |  53 ++--
 src/common/primitive_attr.cpp                 |  12 +-
 src/common/primitive_attr.hpp                 |   2 +-
 src/common/primitive_attr_quant.cpp           | 101 +++++++
 src/common/primitive_attr_quant.hpp           | 277 ++++++++++--------
 src/common/primitive_desc.hpp                 |   8 +-
 src/common/primitive_hashing.cpp              |  26 +-
 src/common/primitive_hashing.hpp              |   3 +-
 src/common/reorder.cpp                        |  29 +-
 src/common/sdpa_pd.hpp                        |  17 +-
 src/common/sdpa_types.hpp                     |   4 +-
 src/common/serialization.cpp                  |  23 +-
 src/common/serialization_stream.hpp           |   3 +-
 src/common/softmax.cpp                        |  20 +-
 src/common/softmax_pd.hpp                     |  17 +-
 src/common/verbose.cpp                        |  29 +-
 src/common/verbose.hpp                        |   1 +
 src/cpu/aarch64/acl_reorder.hpp               |  12 +-
 src/cpu/aarch64/brgemm/brgemm.cpp             |  17 +-
 src/cpu/aarch64/jit_brdgmm_dw_conv.cpp        |   2 +-
 src/cpu/aarch64/jit_brgemm_conv_utils.cpp     |   4 +-
 src/cpu/aarch64/jit_brgemm_post_ops.hpp       |   4 +-
 src/cpu/aarch64/jit_uni_reorder.cpp           |  11 +-
 src/cpu/aarch64/jit_uni_reorder_utils.cpp     |  13 +-
 src/cpu/aarch64/matmul/acl_lowp_matmul.cpp    |   6 +-
 src/cpu/aarch64/matmul/acl_lowp_matmul_sq.cpp |  19 +-
 src/cpu/aarch64/matmul/brgemm_matmul.cpp      |   4 +-
 .../aarch64/matmul/brgemm_matmul_utils.cpp    |  15 +-
 src/cpu/cpu_primitive.hpp                     |   6 +-
 src/cpu/dw_convolution_utils.hpp              |  16 +-
 src/cpu/gemm_convolution_utils.cpp            |   4 +-
 src/cpu/gemm_inner_product_utils.cpp          |  12 +-
 src/cpu/gemm_x8s8s32x_convolution.cpp         |  14 +-
 src/cpu/gemm_x8s8s32x_inner_product.cpp       |   7 +-
 src/cpu/matmul/gemm_bf16_matmul.cpp           |  23 +-
 src/cpu/matmul/gemm_f32_matmul.cpp            |  23 +-
 src/cpu/matmul/gemm_x8s8s32x_matmul.cpp       |  13 +-
 src/cpu/matmul/matmul_utils.hpp               |   9 +-
 src/cpu/matmul/ref_matmul.cpp                 |  23 +-
 src/cpu/matmul/ref_matmul_int8.cpp            |  30 +-
 src/cpu/ref_concat.hpp                        |  11 +-
 src/cpu/ref_convolution_int8.cpp              |  10 +-
 src/cpu/ref_deconvolution.cpp                 |  18 +-
 src/cpu/ref_fused_convolution.hpp             |  16 +-
 src/cpu/ref_inner_product_int8.cpp            |   7 +-
 src/cpu/ref_sum.hpp                           |   4 +-
 src/cpu/reorder/cpu_reorder_pd.hpp            |  10 +-
 src/cpu/reorder/simple_reorder.hpp            |  63 ++--
 src/cpu/scale_utils.cpp                       |  56 ++--
 src/cpu/scale_utils.hpp                       |   4 +-
 src/cpu/x64/brgemm/brgemm.cpp                 |  23 +-
 src/cpu/x64/brgemm/capi/brgemm_api.cpp        |   9 +-
 .../jit_avx512_core_amx_1x1_conv_kernel.cpp   |   5 +-
 .../jit_avx512_core_amx_1x1_convolution.cpp   |   7 +-
 .../jit_avx512_core_amx_1x1_convolution.hpp   |   6 +-
 .../x64/jit_avx512_core_amx_conv_kernel.cpp   |   4 +-
 .../x64/jit_avx512_core_amx_convolution.cpp   |  17 +-
 .../x64/jit_avx512_core_amx_convolution.hpp   |  10 +-
 .../x64/jit_avx512_core_amx_deconvolution.cpp |   7 +-
 .../x64/jit_avx512_core_scale_precompute.cpp  |  20 +-
 .../x64/jit_avx512_core_scale_precompute.hpp  |  13 +-
 ...t_avx512_core_x8s8s32x_1x1_conv_kernel.cpp |   6 +-
 ...t_avx512_core_x8s8s32x_1x1_convolution.cpp |   6 +-
 .../jit_avx512_core_x8s8s32x_conv_kernel.cpp  |   6 +-
 .../jit_avx512_core_x8s8s32x_convolution.cpp  |   4 +-
 ...jit_avx512_core_x8s8s32x_deconvolution.cpp |   8 +-
 src/cpu/x64/jit_brdgmm_dw_conv.cpp            |  11 +-
 src/cpu/x64/jit_brgemm_1x1_conv.cpp           |   9 +-
 src/cpu/x64/jit_brgemm_conv.cpp               |   9 +-
 src/cpu/x64/jit_brgemm_conv_bwd_strided.cpp   |   9 +-
 src/cpu/x64/jit_brgemm_conv_bwd_utils.cpp     |   4 +-
 src/cpu/x64/jit_brgemm_conv_utils.cpp         |   4 +-
 src/cpu/x64/jit_brgemm_inner_product.cpp      |   5 +-
 src/cpu/x64/jit_brgemm_inner_product.hpp      |   4 +-
 .../x64/jit_brgemm_inner_product_utils.cpp    |   5 +-
 src/cpu/x64/jit_brgemm_post_ops.cpp           |   3 +-
 src/cpu/x64/jit_gemm_inner_product_utils.cpp  |   4 +-
 src/cpu/x64/jit_uni_binary.cpp                |   8 +-
 src/cpu/x64/jit_uni_group_normalization.cpp   |   6 +-
 .../x64/jit_uni_instance_normalization.cpp    |   6 +-
 src/cpu/x64/jit_uni_layer_normalization.cpp   |   6 +-
 src/cpu/x64/jit_uni_reorder.cpp               |  13 +-
 src/cpu/x64/jit_uni_reorder_utils.cpp         |  15 +-
 src/cpu/x64/jit_uni_softmax.cpp               |  10 +-
 .../x64/jit_uni_x8s8s32x_1x1_conv_kernel.cpp  |   6 +-
 .../x64/jit_uni_x8s8s32x_1x1_convolution.cpp  |   6 +-
 src/cpu/x64/jit_uni_x8s8s32x_conv_kernel.cpp  |   6 +-
 src/cpu/x64/jit_uni_x8s8s32x_convolution.cpp  |   4 +-
 .../x64/jit_uni_x8s8s32x_deconvolution.cpp    |   8 +-
 src/cpu/x64/matmul/brgemm_matmul.cpp          |  27 +-
 src/cpu/x64/matmul/brgemm_matmul_utils.cpp    |  16 +-
 src/cpu/x64/matmul_inner_product.cpp          |  17 +-
 src/gpu/generic/ref_concat.hpp                |  10 +-
 .../sycl/layer_normalizations_kernels.hpp     |  32 +-
 src/gpu/generic/sycl/ref_binary.hpp           |   5 +-
 src/gpu/generic/sycl/ref_convolution.cpp      |  26 +-
 src/gpu/generic/sycl/ref_convolution.hpp      |   4 +-
 .../generic/sycl/ref_layer_normalizations.cpp |  16 +-
 .../generic/sycl/ref_layer_normalizations.hpp |   4 +-
 src/gpu/generic/sycl/ref_matmul.cpp           |   9 +-
 src/gpu/generic/sycl/ref_matmul.hpp           |   5 +-
 src/gpu/generic/sycl/ref_reorder.cpp          |   9 +-
 src/gpu/generic/sycl/ref_reorder.hpp          |   4 +-
 src/gpu/generic/sycl/ref_softmax.cpp          |   8 +-
 src/gpu/generic/sycl/ref_softmax.hpp          |  13 +-
 src/gpu/generic/sycl/ref_sum.hpp              |  14 +-
 src/gpu/generic/sycl/reorder_kernels.hpp      |  10 +-
 src/gpu/gpu_utils.hpp                         |  28 +-
 src/gpu/intel/jit/conv/config.cpp             |   4 +-
 src/gpu/intel/jit/gemm/gen_gemm.hpp           |  24 +-
 src/gpu/intel/jit/gemm/jit_gemm_pd.cpp        |  48 ++-
 src/gpu/intel/jit/ir/post_ops.cpp             |  19 +-
 src/gpu/intel/jit/ir/tensor_config.cpp        |   5 +-
 src/gpu/intel/jit/reorder/gen_reorder.cpp     |   9 +-
 .../intel/ocl/convolution_inner_product.cpp   |   2 +-
 src/gpu/intel/ocl/gemm/gemm_with_post_ops.cpp |  16 +-
 src/gpu/intel/ocl/gemm/ref_gemm.hpp           |  13 +-
 src/gpu/intel/ocl/gemm_inner_product.hpp      |  21 +-
 src/gpu/intel/ocl/gemm_matmul.hpp             |  30 +-
 src/gpu/intel/ocl/gen9_binary.hpp             |  20 +-
 src/gpu/intel/ocl/gen9_softmax.hpp            |   6 +-
 src/gpu/intel/ocl/generic_reorder.cpp         |   6 +-
 src/gpu/intel/ocl/micro_sdpa.cpp              |   9 +-
 src/gpu/intel/ocl/micro_sdpa.hpp              |   4 +-
 src/gpu/intel/ocl/multi_po_reorder_binary.hpp |   6 +-
 src/gpu/intel/ocl/ref_group_normalization.cpp |   6 +-
 src/gpu/intel/ocl/ref_layer_normalization.hpp |   6 +-
 src/gpu/intel/ocl/ref_matmul.cpp              |  22 +-
 src/gpu/intel/ocl/ref_matmul.hpp              |   8 +-
 src/gpu/intel/ocl/reusable_lnorm.cpp          |   6 +-
 src/gpu/intel/ocl/simple_binary.hpp           |   4 +-
 .../intel/ocl/simple_layer_normalization.hpp  |   6 +-
 src/gpu/intel/ocl/simple_softmax.hpp          |   6 +-
 src/gpu/intel/ocl/vectorized_lnorm.hpp        |   6 +-
 src/gpu/intel/primitive_conf.cpp              |  16 +-
 src/gpu/intel/primitive_conf.hpp              |   5 +-
 src/gpu/nvidia/cudnn_binary.hpp               |   9 +-
 src/gpu/nvidia/cudnn_convolution.hpp          |  22 +-
 src/gpu/nvidia/cudnn_convolution_impl.hpp     |  13 +-
 src/gpu/nvidia/cudnn_inner_product.hpp        |  22 +-
 src/gpu/nvidia/cudnn_matmul.hpp               |  11 +-
 src/gpu/nvidia/cudnn_matmul_impl.hpp          |   4 +-
 src/gpu/nvidia/cudnn_matmul_lt.hpp            |  38 ++-
 src/gpu/nvidia/cudnn_matmul_lt_impl.hpp       |  23 +-
 src/gpu/nvidia/cudnn_reorder.hpp              |  21 +-
 src/gpu/nvidia/cudnn_reorder_lt.hpp           |  48 +--
 src/gpu/nvidia/cudnn_softmax.cpp              |   6 +-
 src/gpu/nvidia/cudnn_softmax.hpp              |  10 +-
 tests/gtests/test_iface_attr.cpp              |  10 +-
 163 files changed, 1342 insertions(+), 1199 deletions(-)
 create mode 100644 src/common/primitive_attr_quant.cpp

diff --git a/src/common/binary.cpp b/src/common/binary.cpp
index a29aeb017d7..83cc51b1512 100644
--- a/src/common/binary.cpp
+++ b/src/common/binary.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -55,16 +55,17 @@ status_t binary_attr_check(const binary_desc_t &desc, const engine_t *engine,
 
     // Check scales
     if (!attr->scales_.has_default_values()) {
-        VCHECK_BINARY_UNIMPL(attr->scales_.has_default_values(
-                                     {DNNL_ARG_SRC_0, DNNL_ARG_SRC_1}),
+        static const std::vector<int> supported_args {
+                DNNL_ARG_SRC_0, DNNL_ARG_SRC_1};
+        VCHECK_BINARY_UNIMPL(attr->scales_.has_default_values(supported_args),
                 VERBOSE_UNSUPPORTED_SCALES_CFG);
 
-        const auto &sc = attr->scales_;
-        const int mask_src_0 = sc.get(DNNL_ARG_SRC_0).mask_;
-        const int mask_src_1 = sc.get(DNNL_ARG_SRC_1).mask_;
+        for (int arg : supported_args) {
+            if (attr->scales_.has_default_values(arg)) continue;
 
-        VCHECK_BINARY_UNIMPL(utils::everyone_is(0, mask_src_0, mask_src_1),
-                VERBOSE_UNSUPPORTED_SCALES_CFG);
+            const int mask = attr->scales_.get_mask(arg);
+            VCHECK_BINARY_UNIMPL(mask == 0, VERBOSE_UNSUPPORTED_SCALES_CFG);
+        }
     }
 
     // Check post-ops
diff --git a/src/common/binary_pd.hpp b/src/common/binary_pd.hpp
index b9c94e58475..5c19c907a78 100644
--- a/src/common/binary_pd.hpp
+++ b/src/common/binary_pd.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -179,10 +179,13 @@ struct binary_pd_t : public primitive_desc_t {
 
     bool attr_scales_ok(const std::vector<int> &supported_args
             = {DNNL_ARG_SRC_0, DNNL_ARG_SRC_1, DNNL_ARG_DST}) const {
-        bool ok = attr()->scales_.has_default_values(supported_args);
-        for (int arg : supported_args) {
-            const auto &mask = attr()->scales_.get(arg).mask_;
-            ok = ok && (mask == 0);
+        const auto &scales = attr()->scales_;
+        bool ok = scales.has_default_values(supported_args);
+
+        for (const auto &arg : supported_args) {
+            if (scales.has_default_values(arg)) continue;
+
+            ok = ok && scales.get_mask(arg) == 0;
         }
         return ok;
     }
diff --git a/src/common/concat.cpp b/src/common/concat.cpp
index d686df416f8..089aadf9285 100644
--- a/src/common/concat.cpp
+++ b/src/common/concat.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018-2023 Intel Corporation
+* Copyright 2018-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -54,10 +54,22 @@ status_t concat_primitive_desc_create(std::shared_ptr<primitive_desc_t> &pd,
         VCHECK_CONCAT_UNIMPL(attr->has_default_values(smask_t::scales_runtime),
                 VERBOSE_UNSUPPORTED_ATTR);
         const auto &scales = attr->scales_;
-        if (!scales.has_default_values())
-            for (const auto &s : scales.scales_)
-                VCHECK_CONCAT_UNIMPL(
-                        s.second.mask_ == 0, VERBOSE_UNSUPPORTED_SCALES_CFG);
+        if (!scales.has_default_values()) {
+            std::vector<int> supported_args(n);
+            for (int i = 0; i < n; i++) {
+                supported_args[i] = DNNL_ARG_MULTIPLE_SRC + i;
+            }
+            VCHECK_CONCAT_UNIMPL(
+                    attr->scales_.has_default_values(supported_args),
+                    VERBOSE_UNSUPPORTED_SCALES_CFG);
+
+            for (int arg : supported_args) {
+                if (scales.has_default_values(arg)) continue;
+
+                int mask = scales.get_mask(arg);
+                VCHECK_CONCAT_UNIMPL(mask == 0, VERBOSE_UNSUPPORTED_SCALES_CFG);
+            }
+        }
     }
 
     const int ndims = src_mds[0]->ndims;
diff --git a/src/common/convolution.cpp b/src/common/convolution.cpp
index 80e61134353..78643c884d9 100644
--- a/src/common/convolution.cpp
+++ b/src/common/convolution.cpp
@@ -186,13 +186,19 @@ status_t conv_attr_check(const convolution_desc_t &desc, const engine_t *engine,
         // Check scales
         if (!attr->scales_.has_default_values()) {
             const auto &sc = attr->scales_;
-            const int mask_src = sc.get(DNNL_ARG_SRC).mask_;
-            const int mask_wei = sc.get(DNNL_ARG_WEIGHTS).mask_;
-            const int mask_dst = sc.get(DNNL_ARG_DST).mask_;
             const bool with_groups
                     = desc.src_desc.ndims != desc.weights_desc.ndims;
-            VCHECK_CONV_UNIMPL(utils::one_of(mask_wei, 0, with_groups ? 3 : 1)
-                            && utils::one_of(mask_dst, 0, 2) && mask_src == 0,
+            VCHECK_CONV_UNIMPL(IMPLICATION(!sc.has_default_values(DNNL_ARG_SRC),
+                                       sc.get_mask(DNNL_ARG_SRC) == 0),
+                    VERBOSE_UNSUPPORTED_SCALES_CFG);
+            VCHECK_CONV_UNIMPL(
+                    IMPLICATION(!sc.has_default_values(DNNL_ARG_WEIGHTS),
+                            utils::one_of(sc.get_mask(DNNL_ARG_WEIGHTS), 0,
+                                    with_groups ? 3 : 1)),
+                    VERBOSE_UNSUPPORTED_SCALES_CFG);
+            VCHECK_CONV_UNIMPL(
+                    IMPLICATION(!sc.has_default_values(DNNL_ARG_DST),
+                            utils::one_of(sc.get_mask(DNNL_ARG_DST), 0, 2)),
                     VERBOSE_UNSUPPORTED_SCALES_CFG);
         }
 
diff --git a/src/common/convolution_pd.hpp b/src/common/convolution_pd.hpp
index 51afe19efd1..487fbbad2a8 100644
--- a/src/common/convolution_pd.hpp
+++ b/src/common/convolution_pd.hpp
@@ -242,7 +242,9 @@ struct convolution_pd_t : public primitive_desc_t {
             = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) const {
         bool ok = attr()->scales_.has_default_values(supported_args);
         for (int arg : supported_args) {
-            const auto &mask = attr()->scales_.get(arg).mask_;
+            if (attr()->scales_.has_default_values(arg)) continue;
+
+            const auto &mask = attr()->scales_.get_mask(arg);
             if (arg == DNNL_ARG_WEIGHTS)
                 ok = ok && (mask == 0 || mask == (with_groups() ? 3 : 1));
             else if (arg == DNNL_ARG_DST)
diff --git a/src/common/deconvolution.cpp b/src/common/deconvolution.cpp
index 00f3f89d037..3b4695156ed 100644
--- a/src/common/deconvolution.cpp
+++ b/src/common/deconvolution.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018-2024 Intel Corporation
+* Copyright 2018-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -172,13 +172,20 @@ status_t deconv_attr_check(const deconvolution_desc_t &desc,
         // Check scales
         if (!attr->scales_.has_default_values()) {
             const auto &sc = attr->scales_;
-            const int mask_src = sc.get(DNNL_ARG_SRC).mask_;
-            const int mask_wei = sc.get(DNNL_ARG_WEIGHTS).mask_;
-            const int mask_dst = sc.get(DNNL_ARG_DST).mask_;
             const bool with_groups
                     = desc.src_desc.ndims != desc.weights_desc.ndims;
-            VCHECK_DECONV_UNIMPL(utils::everyone_is(0, mask_src, mask_dst)
-                            && utils::one_of(mask_wei, 0, with_groups ? 3 : 1),
+            VCHECK_DECONV_UNIMPL(
+                    IMPLICATION(!sc.has_default_values(DNNL_ARG_SRC),
+                            sc.get_mask(DNNL_ARG_SRC) == 0),
+                    VERBOSE_UNSUPPORTED_SCALES_CFG);
+            VCHECK_DECONV_UNIMPL(
+                    IMPLICATION(!sc.has_default_values(DNNL_ARG_WEIGHTS),
+                            utils::one_of(sc.get_mask(DNNL_ARG_WEIGHTS), 0,
+                                    with_groups ? 3 : 1)),
+                    VERBOSE_UNSUPPORTED_SCALES_CFG);
+            VCHECK_DECONV_UNIMPL(
+                    IMPLICATION(!sc.has_default_values(DNNL_ARG_DST),
+                            sc.get_mask(DNNL_ARG_DST) == 0),
                     VERBOSE_UNSUPPORTED_SCALES_CFG);
         }
 
diff --git a/src/common/deconvolution_pd.hpp b/src/common/deconvolution_pd.hpp
index 32dfb1a58ef..24a44153a98 100644
--- a/src/common/deconvolution_pd.hpp
+++ b/src/common/deconvolution_pd.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018-2024 Intel Corporation
+* Copyright 2018-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -173,7 +173,9 @@ struct deconvolution_pd_t : public primitive_desc_t {
             = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) const {
         bool ok = attr()->scales_.has_default_values(supported_args);
         for (int arg : supported_args) {
-            const auto &mask = attr()->scales_.get(arg).mask_;
+            if (attr()->scales_.has_default_values(arg)) continue;
+
+            const auto &mask = attr()->scales_.get_mask(arg);
             if (arg == DNNL_ARG_WEIGHTS)
                 ok = ok && (mask == 0 || mask == (with_groups() ? 3 : 1));
             else
diff --git a/src/common/group_normalization.cpp b/src/common/group_normalization.cpp
index 4e0abf3b6a2..c4f0e7cc5e7 100644
--- a/src/common/group_normalization.cpp
+++ b/src/common/group_normalization.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023 Intel Corporation
+* Copyright 2023-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -154,12 +154,18 @@ status_t group_normalization_attr_check(const group_normalization_desc_t &desc,
 
         // Check scales
         if (!attr->scales_.has_default_values()) {
-            const auto &sc = attr->scales_;
-            const int mask_src = sc.get(DNNL_ARG_SRC).mask_;
-            const int mask_dst = sc.get(DNNL_ARG_DST).mask_;
-
-            VCHECK_GNORM_UNIMPL(utils::everyone_is(0, mask_src, mask_dst),
+            static const std::vector<int> supported_args {
+                    DNNL_ARG_SRC, DNNL_ARG_DST};
+            VCHECK_GNORM_UNIMPL(
+                    attr->scales_.has_default_values(supported_args),
                     VERBOSE_UNSUPPORTED_SCALES_CFG);
+
+            for (int arg : supported_args) {
+                if (attr->scales_.has_default_values(arg)) continue;
+
+                const int mask = attr->scales_.get_mask(arg);
+                VCHECK_GNORM_UNIMPL(mask == 0, VERBOSE_UNSUPPORTED_SCALES_CFG);
+            }
         }
 
         // Check post-ops
diff --git a/src/common/group_normalization_pd.hpp b/src/common/group_normalization_pd.hpp
index a9a48ed29f0..14343554126 100644
--- a/src/common/group_normalization_pd.hpp
+++ b/src/common/group_normalization_pd.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023-2024 Intel Corporation
+* Copyright 2023-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -190,17 +190,17 @@ struct group_normalization_fwd_pd_t : public group_normalization_pd_t {
         return IMPLICATION(use_scale() || use_shift(),
                 weights_md()->data_type == data_type::f32);
     }
-    bool attr_scales_ok() const {
+    bool attr_scales_ok(const std::vector<int> &supported_args
+            = {DNNL_ARG_SRC, DNNL_ARG_DST}) const {
         using namespace data_type;
         const auto &scales = attr()->scales_;
-        const std::vector<int> supported_args({DNNL_ARG_SRC, DNNL_ARG_DST});
         bool ok = scales.has_default_values(supported_args);
 
         for (const auto &arg : supported_args) {
-            const auto &sc = scales.get(arg);
-            if (!sc.has_default_values()) {
+            if (!scales.has_default_values(arg)) {
                 const data_type_t dt = arg_md(arg)->data_type;
-                ok = ok && utils::one_of(dt, s8, u8) && sc.mask_ == 0;
+                ok = ok && utils::one_of(dt, s8, u8);
+                ok = ok && scales.get_mask(arg) == 0;
             }
         }
         return ok;
diff --git a/src/common/inner_product.cpp b/src/common/inner_product.cpp
index 4c1943cf67f..ac24c0c02c3 100644
--- a/src/common/inner_product.cpp
+++ b/src/common/inner_product.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2016-2024 Intel Corporation
+* Copyright 2016-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -133,12 +133,15 @@ status_t ip_attr_check(const inner_product_desc_t &desc, const engine_t *engine,
         // Check scales
         if (!attr->scales_.has_default_values()) {
             const auto &sc = attr->scales_;
-            const int mask_src = sc.get(DNNL_ARG_SRC).mask_;
-            const int mask_wei = sc.get(DNNL_ARG_WEIGHTS).mask_;
-            const int mask_dst = sc.get(DNNL_ARG_DST).mask_;
-
-            VCHECK_IP_UNIMPL(utils::everyone_is(0, mask_src, mask_dst)
-                            && utils::one_of(mask_wei, 0, 1),
+            VCHECK_IP_UNIMPL(IMPLICATION(!sc.has_default_values(DNNL_ARG_SRC),
+                                     sc.get_mask(DNNL_ARG_SRC) == 0),
+                    VERBOSE_UNSUPPORTED_SCALES_CFG);
+            VCHECK_IP_UNIMPL(
+                    IMPLICATION(!sc.has_default_values(DNNL_ARG_WEIGHTS),
+                            utils::one_of(sc.get_mask(DNNL_ARG_WEIGHTS), 0, 1)),
+                    VERBOSE_UNSUPPORTED_SCALES_CFG);
+            VCHECK_IP_UNIMPL(IMPLICATION(!sc.has_default_values(DNNL_ARG_DST),
+                                     sc.get_mask(DNNL_ARG_DST) == 0),
                     VERBOSE_UNSUPPORTED_SCALES_CFG);
         }
 
diff --git a/src/common/inner_product_pd.hpp b/src/common/inner_product_pd.hpp
index 5d7d2163ddf..ecde97f3687 100644
--- a/src/common/inner_product_pd.hpp
+++ b/src/common/inner_product_pd.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2016-2024 Intel Corporation
+* Copyright 2016-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -184,7 +184,9 @@ struct inner_product_pd_t : public primitive_desc_t {
             = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) const {
         bool ok = attr()->scales_.has_default_values(supported_args);
         for (auto arg : supported_args) {
-            int mask = attr()->scales_.get(arg).mask_;
+            if (attr()->scales_.has_default_values(arg)) continue;
+
+            int mask = attr()->scales_.get_mask(arg);
             if (arg == DNNL_ARG_WEIGHTS)
                 ok = ok && (mask == 0 || mask == (1 << 0));
             else
diff --git a/src/common/layer_normalization.cpp b/src/common/layer_normalization.cpp
index 79ccc98c45c..ab8a3790a80 100644
--- a/src/common/layer_normalization.cpp
+++ b/src/common/layer_normalization.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -164,12 +164,18 @@ status_t layer_normalization_attr_check(const layer_normalization_desc_t &desc,
 
         // Check scales
         if (!attr->scales_.has_default_values()) {
-            const auto &sc = attr->scales_;
-            const int mask_src = sc.get(DNNL_ARG_SRC).mask_;
-            const int mask_dst = sc.get(DNNL_ARG_DST).mask_;
-
-            VCHECK_LNORM_UNIMPL(utils::everyone_is(0, mask_src, mask_dst),
+            static const std::vector<int> supported_args {
+                    DNNL_ARG_SRC, DNNL_ARG_DST};
+            VCHECK_LNORM_UNIMPL(
+                    attr->scales_.has_default_values(supported_args),
                     VERBOSE_UNSUPPORTED_SCALES_CFG);
+
+            for (int arg : supported_args) {
+                if (attr->scales_.has_default_values(arg)) continue;
+
+                const int mask = attr->scales_.get_mask(arg);
+                VCHECK_LNORM_UNIMPL(mask == 0, VERBOSE_UNSUPPORTED_SCALES_CFG);
+            }
         }
 
         // Check post-ops
diff --git a/src/common/layer_normalization_pd.hpp b/src/common/layer_normalization_pd.hpp
index 0629be31180..7e901c64bf3 100644
--- a/src/common/layer_normalization_pd.hpp
+++ b/src/common/layer_normalization_pd.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -248,11 +248,19 @@ struct layer_normalization_fwd_pd_t : public layer_normalization_pd_t {
         return false;
     }
 
-    bool attr_scales_ok() const {
+    bool attr_scales_ok(const std::vector<int> &supported_args
+            = {DNNL_ARG_SRC, DNNL_ARG_DST}) const {
+        using namespace data_type;
         const auto &scales = attr()->scales_;
-        bool ok = true;
-        for (const auto &e : scales.scales_) {
-            ok = ok && e.second.mask_ == 0;
+        bool ok = scales.has_default_values(supported_args);
+
+        for (const auto &arg : supported_args) {
+            if (!scales.has_default_values(arg)) {
+                // TODO: disallow non-int8 scales?
+                // const data_type_t dt = arg_md(arg)->data_type;
+                // ok = ok && utils::one_of(dt, s8, u8);
+                ok = ok && scales.get_mask(arg) == 0;
+            }
         }
         return ok;
     }
diff --git a/src/common/matmul.cpp b/src/common/matmul.cpp
index 8dd32b8055a..8d05e147aaf 100644
--- a/src/common/matmul.cpp
+++ b/src/common/matmul.cpp
@@ -95,36 +95,73 @@ status_t matmul_attr_check(const matmul_desc_t &desc, const engine_t *engine,
     // Check scales
     if (!attr->scales_.has_default_values()) {
         const auto &sc = attr->scales_;
-        const auto &sc_src = sc.get(DNNL_ARG_SRC);
-        const auto &sc_wei = sc.get(DNNL_ARG_WEIGHTS);
 
-        const int mask_src = sc_src.mask_;
-        const int mask_wei = sc_wei.mask_;
-        const int mask_dst = sc.get(DNNL_ARG_DST).mask_;
+        dim_t src_scale_group_k = 1;
+        if (!sc.has_default_values(DNNL_ARG_SRC)) {
+            const int mask_src = sc.get_mask(DNNL_ARG_SRC);
 
-        VCHECK_MATMUL_UNIMPL(utils::one_of(mask_src, 0, src_qmask_K,
-                                     src_qmask_M + src_qmask_K),
-                VERBOSE_UNSUPPORTED_SCALES_CFG);
-        // Masks for weights scales can be any - skipping them.
-        if (engine->kind() == engine_kind::gpu) {
-            VCHECK_MATMUL_UNIMPL(
-                    utils::one_of(mask_dst, 0, dst_qmask_N, dst_qmask_M,
-                            dst_qmask_N + dst_qmask_M),
+            VCHECK_MATMUL_UNIMPL(utils::one_of(mask_src, 0, src_qmask_K,
+                                         src_qmask_M + src_qmask_K),
+                    VERBOSE_UNSUPPORTED_SCALES_CFG);
+
+            if (!sc.get(DNNL_ARG_SRC).has_default_groups()) {
+                if (mask_src & src_qmask_K)
+                    src_scale_group_k = sc.get_group(DNNL_ARG_SRC, 1);
+            }
+
+            // Due to hardware specifics, groups should be multiple of 32.
+            VCHECK_MATMUL_UNIMPL(IMPLICATION(src_scale_group_k > 1,
+                                         src_scale_group_k % 32 == 0),
                     VERBOSE_UNSUPPORTED_SCALES_CFG);
-        } else {
-            VCHECK_MATMUL_UNIMPL(mask_dst == 0, VERBOSE_UNSUPPORTED_SCALES_CFG);
         }
+
+        dim_t wei_scale_group_k = 1;
+        dim_t wei_scale_group_n = 1;
+        if (!sc.has_default_values(DNNL_ARG_WEIGHTS)) {
+            const int mask_wei = sc.get_mask(DNNL_ARG_WEIGHTS);
+
+            // Masks for weights scales can be any - skipping them.
+
+            if (!sc.get(DNNL_ARG_WEIGHTS).has_default_groups()) {
+                if (mask_wei & wei_qmask_K)
+                    wei_scale_group_k = sc.get_group(DNNL_ARG_WEIGHTS, 0);
+                if (mask_wei & wei_qmask_N)
+                    wei_scale_group_n = sc.get_group(DNNL_ARG_WEIGHTS, 1);
+            }
+
+            // Groups per N are solely for weights decompression as it's
+            // impossible to get performant kernel for a single `k` element in
+            // chain for regular quantized case.
+            VCHECK_MATMUL_UNIMPL(IMPLICATION(wei_scale_group_n > 1,
+                                         attr->fpmath_.apply_to_int_),
+                    VERBOSE_UNSUPPORTED_SCALES_CFG);
+
+            // Due to hardware specifics, groups should be multiple of 32.
+            VCHECK_MATMUL_UNIMPL(IMPLICATION(wei_scale_group_k > 1,
+                                         wei_scale_group_k % 32 == 0),
+                    VERBOSE_UNSUPPORTED_SCALES_CFG);
+            VCHECK_MATMUL_UNIMPL(IMPLICATION(wei_scale_group_n > 1,
+                                         wei_scale_group_n % 32 == 0),
+                    VERBOSE_UNSUPPORTED_SCALES_CFG);
+        }
+
+        if (!sc.has_default_values(DNNL_ARG_DST)) {
+            const int mask_dst = sc.get_mask(DNNL_ARG_DST);
+
+            if (engine->kind() == engine_kind::gpu) {
+                VCHECK_MATMUL_UNIMPL(
+                        utils::one_of(mask_dst, 0, dst_qmask_N, dst_qmask_M,
+                                dst_qmask_N + dst_qmask_M),
+                        VERBOSE_UNSUPPORTED_SCALES_CFG);
+            } else {
+                VCHECK_MATMUL_UNIMPL(
+                        mask_dst == 0, VERBOSE_UNSUPPORTED_SCALES_CFG);
+            }
+        }
+
         // Check dependency between scales.
         // Source scales groups are supported for int8 source and must divide
         // or be divided by weights groups when both are greater than 1.
-        const auto src_scale_group_k
-                = (mask_src & src_qmask_K) && sc_src.ndims_ > 0
-                ? sc_src.group_dims_[1]
-                : 1;
-        const auto wei_scale_group_k
-                = (mask_wei & wei_qmask_K) && sc_wei.ndims_ > 0
-                ? sc_wei.group_dims_[0]
-                : 1;
         const bool groups_are_divisible = IMPLICATION(
                 src_scale_group_k > 1 && wei_scale_group_k > 1,
                 (src_scale_group_k % wei_scale_group_k == 0)
@@ -133,28 +170,6 @@ status_t matmul_attr_check(const matmul_desc_t &desc, const engine_t *engine,
                 IMPLICATION(src_scale_group_k > 1,
                         (src_is_int8 || src_is_fp8) && groups_are_divisible),
                 VERBOSE_UNSUPPORTED_SCALES_CFG);
-
-        // Groups per N are solely for weights decompression as it's impossible
-        // to get performant kernel for a single `k` element in chain for
-        // regular quantized case.
-        const auto wei_scale_group_n
-                = (mask_wei & wei_qmask_N) && sc_wei.ndims_ > 0
-                ? sc_wei.group_dims_[1]
-                : 1;
-        VCHECK_MATMUL_UNIMPL(
-                IMPLICATION(wei_scale_group_n > 1, attr->fpmath_.apply_to_int_),
-                VERBOSE_UNSUPPORTED_SCALES_CFG);
-
-        // Due to hardware specifics, groups should be multiple of 32.
-        VCHECK_MATMUL_UNIMPL(
-                IMPLICATION(src_scale_group_k > 1, src_scale_group_k % 32 == 0),
-                VERBOSE_UNSUPPORTED_SCALES_CFG);
-        VCHECK_MATMUL_UNIMPL(
-                IMPLICATION(wei_scale_group_k > 1, wei_scale_group_k % 32 == 0),
-                VERBOSE_UNSUPPORTED_SCALES_CFG);
-        VCHECK_MATMUL_UNIMPL(
-                IMPLICATION(wei_scale_group_n > 1, wei_scale_group_n % 32 == 0),
-                VERBOSE_UNSUPPORTED_SCALES_CFG);
     }
 
     // Check zero points
diff --git a/src/common/matmul_pd.hpp b/src/common/matmul_pd.hpp
index 94828f4c082..ece3d0a5fc9 100644
--- a/src/common/matmul_pd.hpp
+++ b/src/common/matmul_pd.hpp
@@ -182,29 +182,26 @@ struct matmul_pd_t : public primitive_desc_t {
 
     virtual bool attr_scales_ok(const std::vector<int> &supported_args
             = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) const {
-        if (attr()->scales_.has_default_values()) return true;
+        const auto &scales = attr()->scales_;
+        if (scales.has_default_values()) return true;
 
-        bool ok = attr()->scales_.has_default_values(supported_args);
+        bool ok = scales.has_default_values(supported_args);
         for (int arg : supported_args) {
-            const auto &sc = attr()->scales_.get(arg);
-            const auto &mask = sc.mask_;
-            if (sc.has_default_values()) { continue; }
+            if (scales.has_default_values(arg)) { continue; }
 
+            const auto &mask = scales.get_mask(arg);
             if (arg == DNNL_ARG_WEIGHTS) {
-                const bool wei_k_group_ok
-                        = IMPLICATION(sc.ndims_ == 2 && sc.group_dims_[0] > 1,
-                                K() % sc.group_dims_[0] == 0);
-                const bool wei_n_group_ok
-                        = IMPLICATION(sc.ndims_ == 2 && sc.group_dims_[1] > 1,
-                                N() % sc.group_dims_[1] == 0);
+                const auto &g0 = scales.get_group(arg, 0);
+                const auto &g1 = scales.get_group(arg, 1);
+                const bool wei_k_group_ok = IMPLICATION(g0 > 1, K() % g1 == 0);
+                const bool wei_n_group_ok = IMPLICATION(g1 > 1, N() % g0 == 0);
 
                 // Any group is allowed to be greater than 1 but only one at a
                 // time, not both.
-                ok = ok && utils::one_of(sc.ndims_, 0, 2)
-                        && IMPLICATION(sc.ndims_ == 2,
-                                utils::one_of(
-                                        1, sc.group_dims_[0], sc.group_dims_[1])
-                                        && wei_k_group_ok && wei_n_group_ok);
+                ok = ok
+                        && IMPLICATION(!scales.get(arg).has_default_groups(),
+                                utils::one_of(1, g0, g1) && wei_k_group_ok
+                                        && wei_n_group_ok);
 
                 // Mask over K dim is allowed for decompression feature only.
                 const bool is_decompression_or_dynquant
@@ -220,20 +217,24 @@ struct matmul_pd_t : public primitive_desc_t {
                 ok = ok
                         && utils::one_of(mask, 0, src_qmask_K(),
                                 src_qmask_M() + src_qmask_K());
-                ok = ok && utils::one_of(sc.ndims_, 0, 2);
-                ok = ok && IMPLICATION((mask & src_qmask_K()), sc.ndims_ == 2);
                 ok = ok
-                        && IMPLICATION(sc.ndims_ == 2,
-                                sc.group_dims_[0] == 1
-                                        && K() % sc.group_dims_[1] == 0);
-            } else {
+                        && IMPLICATION((mask & src_qmask_K()),
+                                !scales.get(arg).has_default_groups());
+                ok = ok
+                        && IMPLICATION(!scales.get(arg).has_default_groups(),
+                                scales.get_group(arg, 0)
+                                        && K() % scales.get_group(arg, 1) == 0);
+            } else if (arg == DNNL_ARG_DST) {
                 ok = ok
                         && utils::one_of(mask, 0, dst_qmask_N(),
                                 dst_qmask_M() + dst_qmask_N());
-                ok = ok && utils::one_of(sc.ndims_, 0, 2)
-                        && IMPLICATION(sc.ndims_ == 2,
-                                sc.group_dims_[1] == 1
-                                        && M() % sc.group_dims_[0] == 0);
+                ok = ok
+                        && IMPLICATION(!scales.get(arg).has_default_groups(),
+                                scales.get_group(arg, 1) == 1
+                                        && (M() % scales.get_group(arg, 0))
+                                                == 0);
+            } else {
+                assert(!"Unsupported arg");
             }
         }
         return ok;
diff --git a/src/common/primitive_attr.cpp b/src/common/primitive_attr.cpp
index 68b27f201a5..f846585ff31 100644
--- a/src/common/primitive_attr.cpp
+++ b/src/common/primitive_attr.cpp
@@ -35,11 +35,6 @@ const primitive_attr_t &default_attr() {
     return default_attr_instance;
 }
 
-const runtime_scales_t &default_runtime_scale() {
-    static const runtime_scales_t default_runtime_scale_instance;
-    return default_runtime_scale_instance;
-}
-
 void rnn_create_time_scales_t::set_single_scale(float scale) {
     count_ = 1;
     mask_ = 0;
@@ -543,8 +538,9 @@ status_t dnnl_primitive_attr_set_scratchpad_mode(
 
 status_t dnnl_primitive_attr_set_scales_mask(
         primitive_attr_t *attr, int arg, int mask) {
-    bool ok = attr && mask >= 0 && arg >= 0;
-    if (!ok) return invalid_arguments;
+    VCHECK_ATTR(attr, VERBOSE_NULL_ARG);
+    VCHECK_ATTR(mask >= 0, VERBOSE_BAD_PARAM, "mask");
+    VCHECK_ATTR(arg >= 0, VERBOSE_BAD_PARAM, "arg");
     return attr->scales_.set(arg, mask);
 }
 
@@ -560,7 +556,7 @@ status_t dnnl_primitive_attr_set_scales(primitive_attr_t *attr, int arg,
             VERBOSE_INVALID_DATATYPE, "scales");
     VCHECK_ATTR(IMPLICATION(ndims, validate_dims(ndims, group_dims)),
             VERBOSE_BAD_PARAM, "group_dims");
-    return attr->scales_.set(arg, mask, ndims, group_dims, data_type);
+    return attr->scales_.set(arg, mask, data_type, ndims, group_dims);
 }
 
 status_t dnnl_primitive_attr_set_zero_points_mask(
diff --git a/src/common/primitive_attr.hpp b/src/common/primitive_attr.hpp
index 4c61aef817a..bafaddc12f3 100644
--- a/src/common/primitive_attr.hpp
+++ b/src/common/primitive_attr.hpp
@@ -669,7 +669,7 @@ struct dnnl_primitive_attr : public dnnl::impl::c_compatible {
     }
 
     // NOTE: make sure that the types below have overloaded comparison operator
-    dnnl::impl::arg_scales_t scales_;
+    dnnl::impl::scales_t scales_;
     dnnl::impl::zero_points_t zero_points_;
     dnnl::impl::scratchpad_mode_t scratchpad_mode_;
     dnnl::impl::fpmath_t fpmath_;
diff --git a/src/common/primitive_attr_quant.cpp b/src/common/primitive_attr_quant.cpp
new file mode 100644
index 00000000000..172225154e7
--- /dev/null
+++ b/src/common/primitive_attr_quant.cpp
@@ -0,0 +1,101 @@
+/*******************************************************************************
+* Copyright 2024-2025 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "common/primitive_attr_quant.hpp"
+#include "common/primitive_hashing.hpp"
+#include "common/verbose.hpp"
+
+namespace dnnl {
+namespace impl {
+
+const quant_entry_t &default_quant_entry() {
+    static const quant_entry_t default_quant_entry;
+    return default_quant_entry;
+}
+
+size_t quant_entry_t::get_hash() const {
+    size_t seed = 0;
+    seed = hash_combine(seed, mask_);
+    seed = hash_combine(seed, static_cast<size_t>(data_type_));
+    seed = hash_combine(seed, group_ndims_);
+    if (group_ndims_ > 0)
+        seed = primitive_hashing::get_array_hash(
+                seed, group_dims_, group_ndims_);
+    return seed;
+}
+
+void quant_entry_t::serialize(serialization_stream_t &sstream) const {
+    sstream.write(&mask_);
+    sstream.write(&data_type_);
+    sstream.write(&group_ndims_);
+    if (group_ndims_ > 0) sstream.write(group_dims_, group_ndims_);
+}
+
+std::string quant_entry_t::get_verbose() const {
+    std::string s;
+    s.append(std::to_string(mask_));
+    s.append(":").append(dnnl_dt2str(data_type_));
+    if (group_ndims_ > 0) {
+        s.append(":")
+                .append(std::to_string(group_dims_[0]))
+                .append("x")
+                .append(std::to_string(group_dims_[1]));
+    }
+    return s;
+}
+
+std::ostream &operator<<(std::ostream &ss, const quant_entry_t &e) {
+    ss << e.get_verbose();
+    return ss;
+}
+
+size_t scales_t::get_hash() const {
+    size_t seed = 0;
+    // Go through scales for all arguments.
+    for (const auto &e : scales_) {
+        seed = hash_combine(seed, e.first);
+        seed = hash_combine(seed, e.second.get_hash());
+    }
+    return seed;
+}
+
+void scales_t::serialize(serialization_stream_t &sstream) const {
+    for (const auto &e : scales_) {
+        sstream.write(&e.first);
+        e.second.serialize(sstream);
+    }
+}
+
+std::string scales_t::get_verbose() const {
+    std::string s;
+    std::string empty_delim, attr_delim = "+";
+    std::string delim = empty_delim;
+    for (const auto &scale : scales_) {
+        const auto &q = scale.second;
+        if (q.has_default_values()) continue;
+
+        int arg = scale.first;
+        s.append(delim)
+                .append(arg2str(arg))
+                .append(":")
+                .append(q.get_verbose());
+        delim = attr_delim;
+    }
+    return s;
+}
+
+} // namespace impl
+} // namespace dnnl
diff --git a/src/common/primitive_attr_quant.hpp b/src/common/primitive_attr_quant.hpp
index 6396327cd88..979b110d69d 100644
--- a/src/common/primitive_attr_quant.hpp
+++ b/src/common/primitive_attr_quant.hpp
@@ -28,172 +28,205 @@
 // dependency between headers when it comes to inclusion of opdesc.hpp which
 // sdpa_desc_t is a part of.
 
-#include "utils.hpp"
+#include "common/serialization_stream.hpp"
+#include "common/utils.hpp"
 
 #include <algorithm>
-#include <map>
+#include <string>
 #include <vector>
+#include <unordered_map>
 
 namespace dnnl {
 namespace impl {
 
-struct runtime_scales_t;
-const runtime_scales_t &default_runtime_scale();
+struct quant_entry_t;
+const quant_entry_t &default_quant_entry();
 
-struct runtime_scales_t : public c_compatible {
-    runtime_scales_t() = default;
+struct quant_entry_t : public c_compatible {
+    quant_entry_t() = default;
 
-    runtime_scales_t &operator=(const runtime_scales_t &rhs) {
-        mask_ = rhs.mask_;
-        is_set_ = rhs.is_set_;
-        ndims_ = rhs.ndims_;
-        if (ndims_ > 0) utils::array_copy(group_dims_, rhs.group_dims_, ndims_);
-        data_type_ = rhs.data_type_;
-        return *this;
+    // `set(...)` approach is taken over constructors as the usage model assumes
+    // the change of state of this object but it doesn't require its destruction
+    // which would come with some performance price which prevails in this case.
+    status_t set(int mask, data_type_t data_type) {
+        return set(mask, data_type, 0, {});
     }
-
-    status_t set(int mask) { return set(0, mask, {}, data_type::f32); }
-
-    status_t set(int ndims, int mask, const dims_t group_dims,
-            data_type_t data_type = data_type::f32) {
+    status_t set(int mask, data_type_t data_type, int group_ndims,
+            const dims_t group_dims) {
         mask_ = mask;
-        is_set_ = true;
-        ndims_ = ndims;
-        if (ndims > 0) utils::array_copy(group_dims_, group_dims, ndims);
         data_type_ = data_type;
+        group_ndims_ = group_ndims;
+        if (group_ndims_ > 0) {
+            utils::array_copy(group_dims_, group_dims, group_ndims_);
+        }
         return status::success;
     }
+    status_t set(const quant_entry_t &other) {
+        return set(other.mask_, other.data_type_, other.group_ndims_,
+                other.group_dims_);
+    }
+
+    quant_entry_t &operator=(const quant_entry_t &rhs) {
+        auto st = this->set(rhs);
+        assert(st == status::success);
+        UNUSED(st);
+        return *this;
+    }
+
+    bool has_default_values() const { return *this == default_quant_entry(); }
+    bool has_default_groups() const {
+        return this->group_ndims_ == default_quant_entry().group_ndims_;
+    }
 
-    bool operator==(const runtime_scales_t &rhs) const {
-        return mask_ == rhs.mask_ && is_set_ == rhs.is_set_
-                && ndims_ == rhs.ndims_
-                && IMPLICATION(ndims_ > 0,
-                        utils::array_cmp(group_dims_, rhs.group_dims_, ndims_))
-                && data_type_ == rhs.data_type_;
+    int get_mask() const { return mask_; }
+    data_type_t get_data_type() const { return data_type_; }
+    dim_t get_group(int d) const {
+        // If groups were not requested, return `1` for convenience.
+        if (group_ndims_ == default_quant_entry().group_ndims_) return 1;
+        // But if they were, any out of bound access would return `0` and likely
+        // lead to a division by zero which is fast to catch.
+        if (d >= group_ndims_) return 0;
+        return group_dims_[d];
     }
 
-    bool has_default_values() const { return *this == default_runtime_scale(); }
+    // Note: keep the definition here to satisfy the
+    // `gtests/internals/test_comparison_operators` linking requirements which
+    // mandates bodies to be in the header file.
+    bool operator==(const quant_entry_t &rhs) const {
+        return mask_ == rhs.mask_ && data_type_ == rhs.data_type_
+                && group_ndims_ == rhs.group_ndims_
+                && IMPLICATION(group_ndims_ > 0,
+                        utils::array_cmp(
+                                group_dims_, rhs.group_dims_, group_ndims_));
+    }
 
-    bool has_default_groups() const { return 0 == ndims_; }
-    bool has_default_data_type() const { return data_type_ == data_type::f32; }
+    size_t get_hash() const;
 
-    // TODO: replace with `-1` to remove `is_set_`.
-    // Hide `mask_` under `private:` to force interface usage.
-    int mask_ = 0;
-    bool is_set_ = false;
-    int ndims_ = 0;
-    dims_t group_dims_ = {};
-    data_type_t data_type_ = data_type::f32;
+    void serialize(serialization_stream_t &sstream) const;
+
+    std::string get_verbose() const;
+
+private:
+    // Note: INT_MIN is used on purpose to avoid potential issues when
+    // `(mask & bit)` expression will return `true`. `INT_MIN` is represented
+    // as `10...0` in bits and will avoid such situations.
+    int mask_ = INT_MIN;
+    data_type_t data_type_ = data_type::undef;
+    int group_ndims_ = 0;
+    dims_t group_dims_ {};
 };
 
-struct arg_scales_t : public c_compatible {
-    arg_scales_t() = default;
+std::ostream &operator<<(std::ostream &ss, const quant_entry_t &e);
 
-    const runtime_scales_t &get(int arg) const {
-        static const runtime_scales_t default_scales;
+struct scales_t : public c_compatible {
+    scales_t() = default;
+
+    const quant_entry_t &get(int arg) const {
         const auto it = scales_.find(arg);
-        if (it == scales_.end()) return default_scales;
+        if (it == scales_.end()) return default_quant_entry();
         return it->second;
     }
 
-    status_t set(int arg, const runtime_scales_t &scale) {
+    // See `set(...)` comment for `quant_entry_t` for a design choice
+    // explanation.
+    status_t set(int arg, int mask) {
+        return set(arg, mask, default_data_type, 0, {});
+    }
+    status_t set(int arg, int mask, data_type_t data_type, int group_ndims,
+            const dims_t group_dims) {
         if (!check_arg(arg)) return status::invalid_arguments;
-        scales_[arg] = scale;
+        CHECK(scales_[arg].set(mask, data_type, group_ndims, group_dims));
         return status::success;
     }
-
-    bool operator==(const arg_scales_t &rhs) const {
-        return scales_ == rhs.scales_;
+    // Use this interface with `default_quant_entry` when need to remove a
+    // specific scale.
+    status_t set(int arg, const quant_entry_t &other) {
+        return scales_[arg].set(other);
     }
 
-    bool has_default_values(const std::vector<int> &skip_args = {}) const {
-        auto predicate = [](const runtime_scales_t &s) {
-            return s.has_default_values();
-        };
-        return has_default_property(skip_args, predicate);
+    // This interface is different from the one below and is just a shortcut.
+    bool has_default_values(int arg) const {
+        return get(arg).has_default_values();
     }
 
-    bool has_default_data_type(const std::vector<int> &skip_args = {}) const {
-        auto predicate = [](const runtime_scales_t &s) {
-            return s.has_default_data_type();
-        };
-        return has_default_property(skip_args, predicate);
+    // This interface is used to make sure that other than `supported_args` have
+    // default values. It's to make sure that non-allowed arguments were not
+    // passed to the library.
+    bool has_default_values(const std::vector<int> &supported_args = {}) const {
+        auto predicate
+                = [](const quant_entry_t &s) { return s.has_default_values(); };
+        return has_default_property(supported_args, predicate);
     }
 
-    bool has_default_groups(const std::vector<int> &skip_args = {}) const {
-        auto predicate = [](const runtime_scales_t &s) {
-            return s.has_default_groups();
+    // This interface is used to make sure that other than `supported_args` have
+    // default values. It's to make sure that non-allowed arguments were not
+    // passed to the library.
+    bool has_default_data_type(
+            const std::vector<int> &supported_args = {}) const {
+        auto predicate = [](const quant_entry_t &s) {
+            // Note: `data_type::undef` represents `default_quant_entry`.
+            return utils::one_of(
+                    s.get_data_type(), default_data_type, data_type::undef);
         };
-        return has_default_property(skip_args, predicate);
+        return has_default_property(supported_args, predicate);
     }
-
-    status_t set(int arg, int mask) {
-        return set(arg, mask, 0, {}, data_type::f32);
+    // This interface checks specific argument. It exists because quant_entry_t
+    // doesn't have a notion of default data_type, only scales do.
+    // Note: can be removed once the library unconditionally supports data type
+    // for scales for every implementation, then this call can be removed as to
+    // make a proper load, the data type must be queried.
+    bool has_default_data_type(int arg) const {
+        // Note: `data_type::undef` represents `default_quant_entry`.
+        return utils::one_of(
+                get(arg).get_data_type(), default_data_type, data_type::undef);
     }
 
-    status_t set(int arg, int mask, int ndims, const dims_t group_dims,
-            data_type_t data_type) {
-        if (!check_arg(arg)) return status::invalid_arguments;
-        return scales_[arg].set(ndims, mask, group_dims, data_type);
+    // This interface is different from the one below and is just a shortcut.
+    bool has_default_groups(int arg) const {
+        return get(arg).has_default_groups();
     }
 
-    // TODO: move to `private` and keep a single interface per entry.
-    status_t get(int arg, int *mask, bool *is_set, int *ndims = nullptr,
-            dims_t group_dims = nullptr,
-            data_type_t *data_type = nullptr) const {
-        if (!check_arg(arg)) return status::invalid_arguments;
-        const auto &s = get(arg);
-        if (mask) *mask = s.mask_;
-        if (is_set) *is_set = s.is_set_;
-        if (ndims) *ndims = s.ndims_;
-        if (group_dims && s.ndims_ > 0)
-            utils::array_copy(group_dims, s.group_dims_, s.ndims_);
-        if (data_type) *data_type = s.data_type_;
-        return status::success;
+    // This interface is used to make sure that other than `supported_args` have
+    // default values. It's to make sure that non-allowed arguments were not
+    // passed to the library.
+    bool has_default_groups(const std::vector<int> &supported_args = {}) const {
+        auto predicate
+                = [](const quant_entry_t &s) { return s.has_default_groups(); };
+        return has_default_property(supported_args, predicate);
     }
 
+    int get_mask(int arg) const { return get(arg).get_mask(); }
     data_type_t get_data_type(int arg) const {
-        data_type_t data_type;
-        auto st = get(arg, nullptr, nullptr, nullptr, nullptr, &data_type);
-        if (st != status::success) return data_type::undef;
-        return data_type;
+        return get(arg).get_data_type();
     }
+    dim_t get_group(int arg, int d) const { return get(arg).get_group(d); }
 
-    status_t reset(int arg) {
-        if (!check_arg(arg)) return status::invalid_arguments;
-        const auto it = scales_.find(arg);
-        if (it != scales_.end()) scales_.erase(it);
-        return status::success;
+    bool operator==(const scales_t &rhs) const {
+        return scales_ == rhs.scales_;
     }
 
-    status_t copy_from(const arg_scales_t &other) {
-        for (auto it = other.scales_.begin(); it != other.scales_.end(); ++it) {
-            // Find an entry that can match the arguments without constructing a
-            // new object.
-            if (scales_.count(it->first) == 1) {
-                auto &entry = scales_[it->first];
-                if (entry == it->second) continue;
-            }
+    size_t get_hash() const;
 
-            CHECK(set(it->first, it->second));
-        }
-        return status::success;
-    }
+    void serialize(serialization_stream_t &sstream) const;
 
-    std::map<int, runtime_scales_t> scales_;
+    std::string get_verbose() const;
 
 private:
+    // Sorted property of `std::map` is used for hashing.
+    std::map<int, quant_entry_t> scales_;
+    static constexpr data_type_t default_data_type = data_type::f32;
+
     bool check_arg(int arg) const {
+        // regular
+        for (const auto &sa : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) {
+            if (arg == sa) return true;
+        }
         // binary
-        for (const auto &sa : {DNNL_ARG_SRC_0, DNNL_ARG_SRC_1}) {
+        for (const auto &sa : {DNNL_ARG_SRC_1}) {
             if (arg == sa) return true;
         }
         // concat
         if (arg & DNNL_ARG_MULTIPLE_SRC) return true;
-        // convolution
-        for (const auto &sa : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) {
-            if (arg == sa) return true;
-        }
         // depth-wise convolution post op
         for (const auto &sa : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) {
             if (arg == (DNNL_ARG_ATTR_POST_OP_DW | sa)) return true;
@@ -203,19 +236,23 @@ struct arg_scales_t : public c_compatible {
         return false;
     }
 
-    bool has_default_property(const std::vector<int> &skip_args,
-            bool (*predicate)(const runtime_scales_t &)) const {
+    // The function makes sure that if any argument was specified by user, that
+    // only `supported_args` have their value customized, rest unsupported
+    // values were not updated.
+    bool has_default_property(const std::vector<int> &supported_args,
+            bool (*predicate)(const quant_entry_t &)) const {
         for (const auto &s : scales_) {
-            if (!predicate(s.second)) {
-                bool skip = false;
-                for (const auto &skip_a : skip_args)
-                    if (s.first == skip_a) {
-                        skip = true;
-                        break;
-                    }
-                if (skip) continue;
-                return false;
-            }
+            // Arg passed the condition, check the next one.
+            if (predicate(s.second)) continue;
+
+            bool allow_non_default = false;
+            for (const auto &supported_arg : supported_args)
+                if (s.first == supported_arg) {
+                    allow_non_default = true;
+                    break;
+                }
+            if (allow_non_default) continue;
+            return false;
         }
         return true;
     }
diff --git a/src/common/primitive_desc.hpp b/src/common/primitive_desc.hpp
index 288c9339117..86f1e3740c5 100644
--- a/src/common/primitive_desc.hpp
+++ b/src/common/primitive_desc.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2016-2024 Intel Corporation
+* Copyright 2016-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -157,14 +157,14 @@ struct primitive_desc_t : public c_compatible {
         }
         if (arg & DNNL_ARG_ATTR_SCALES) {
             int scale_arg = arg & ~DNNL_ARG_ATTR_SCALES;
-            if (!attr()->scales_.get(scale_arg).has_default_values())
+            if (!attr()->scales_.has_default_values(scale_arg))
                 return arg_usage_t::input;
         }
         if ((arg == (DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC_0))
-                && !attr()->scales_.get(DNNL_ARG_SRC_0).has_default_values())
+                && !attr()->scales_.has_default_values(DNNL_ARG_SRC_0))
             return arg_usage_t::input;
         if ((arg == (DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC_1))
-                && !attr()->scales_.get(DNNL_ARG_SRC_1).has_default_values())
+                && !attr()->scales_.has_default_values(DNNL_ARG_SRC_1))
             return arg_usage_t::input;
         if (arg == DNNL_ARG_SCRATCHPAD && !is_zero_md(scratchpad_md()))
             return arg_usage_t::output;
diff --git a/src/common/primitive_hashing.cpp b/src/common/primitive_hashing.cpp
index b3037c0c67c..28dbdfabedd 100644
--- a/src/common/primitive_hashing.cpp
+++ b/src/common/primitive_hashing.cpp
@@ -15,6 +15,7 @@
 *******************************************************************************/
 
 #include <algorithm>
+#include "primitive_attr.hpp"
 #include "primitive_desc.hpp"
 #include "type_helpers.hpp"
 #include "utils.hpp"
@@ -218,19 +219,6 @@ size_t get_md_hash(const memory_desc_t &md) {
     return seed;
 }
 
-// Generate a hash for runtime scales
-size_t get_runtime_scale_hash(const runtime_scales_t &scales) {
-    size_t seed = 0;
-    seed = hash_combine(seed, scales.mask_);
-    // scales: groups
-    const int ndims = scales.ndims_;
-    seed = hash_combine(seed, ndims);
-    if (ndims > 0) seed = get_array_hash(seed, scales.group_dims_, ndims);
-    // scales: data type
-    seed = hash_combine(seed, static_cast<size_t>(scales.data_type_));
-    return seed;
-}
-
 // Generate a hash for zero points
 size_t get_zero_points_hash(const zero_points_t &zps) {
     size_t seed = 0;
@@ -276,13 +264,7 @@ size_t get_attr_hash(const primitive_attr_t &attr) {
     }
 
     if (!attr.scales_.has_default_values()) {
-        // go through scales for all arguments
-        for (const auto &p : attr.scales_.scales_) {
-            // scales: arg
-            seed = hash_combine(seed, p.first);
-            // scales: mask
-            seed = hash_combine(seed, get_runtime_scale_hash(p.second));
-        }
+        seed = hash_combine(seed, attr.scales_.get_hash());
     }
 
     seed = hash_combine(seed, get_zero_points_hash(attr.zero_points_));
@@ -776,9 +758,9 @@ size_t get_desc_hash(const sdpa_desc_t &desc) {
     seed = hash_combine(seed, get_md_hash(desc.q_desc));
     seed = hash_combine(seed, get_md_hash(desc.k_desc));
     seed = hash_combine(seed, get_md_hash(desc.v_desc));
-    seed = hash_combine(seed, get_runtime_scale_hash(desc.kq_scales));
+    seed = hash_combine(seed, desc.kq_scales.get_hash());
     seed = hash_combine(seed, get_zero_points_hash(desc.kq_zero_points));
-    seed = hash_combine(seed, get_runtime_scale_hash(desc.vs_scales));
+    seed = hash_combine(seed, desc.vs_scales.get_hash());
     seed = hash_combine(seed, get_zero_points_hash(desc.vs_zero_points));
     seed = hash_combine(seed, get_md_hash(desc.dst_desc));
     seed = hash_combine(seed, get_md_hash(desc.attn_mask_desc));
diff --git a/src/common/primitive_hashing.hpp b/src/common/primitive_hashing.hpp
index e43bfcf9669..2b4e7e8d22f 100644
--- a/src/common/primitive_hashing.hpp
+++ b/src/common/primitive_hashing.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -23,7 +23,6 @@
 
 #include "common/c_types_map.hpp"
 #include "common/engine_id.hpp"
-#include "common/primitive_attr.hpp"
 #include "common/type_helpers.hpp"
 #include "common/verbose.hpp"
 
diff --git a/src/common/reorder.cpp b/src/common/reorder.cpp
index 506e939de5a..c21fe526dfb 100644
--- a/src/common/reorder.cpp
+++ b/src/common/reorder.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2016-2024 Intel Corporation
+* Copyright 2016-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -38,6 +38,10 @@ namespace impl {
     VCONDCHECK(primitive, create, check, reorder, (cond), \
             status::invalid_arguments, msg, ##__VA_ARGS__);
 
+#define VCHECK_REORDER_UNIMPL(cond, msg, ...) \
+    VCONDCHECK(primitive, create, check, reorder, (cond), \
+            status::unimplemented, msg, ##__VA_ARGS__);
+
 namespace {
 engine_t *get_reorder_engine(engine_t *src_engine, engine_t *dst_engine) {
     auto s_ek = src_engine->kind();
@@ -100,24 +104,28 @@ status_t reorder_primitive_desc_create(std::shared_ptr<primitive_desc_t> &pd,
 
     // Check scales
     if (!attr->scales_.has_default_values()) {
+        static const std::vector<int> supported_args {
+                DNNL_ARG_SRC, DNNL_ARG_DST};
+        VCHECK_REORDER_UNIMPL(attr->scales_.has_default_values(supported_args),
+                VERBOSE_UNSUPPORTED_SCALES_CFG);
+
         const auto &sc = attr->scales_;
         const auto &sc_src = sc.get(DNNL_ARG_SRC);
-        const int mask_src = sc_src.mask_;
+        const int mask_src = sc.get_mask(DNNL_ARG_SRC);
 
         VCHECK_REORDER(IMPLICATION(utils::one_of(src_md->data_type,
                                            data_type::s4, data_type::u4),
                                mask_src > 0),
                 VERBOSE_INVALID_DATATYPE, "mask for int4 source");
 
-        if (sc_src.ndims_ > 0) {
+        if (!sc_src.has_default_groups()) {
             const int src_ndims = s_mdw.ndims();
             const bool group_dims_are_consistent
-                    = IMPLICATION(sc_src.group_dims_[0] > 1,
-                              src_md->dims[src_ndims - 2]
-                                              % sc_src.group_dims_[0]
+                    = IMPLICATION(sc_src.get_group(0) > 1,
+                              src_md->dims[src_ndims - 2] % sc_src.get_group(0)
                                       == 0)
-                    && IMPLICATION(sc_src.group_dims_[1] > 1,
-                            src_md->dims[src_ndims - 1] % sc_src.group_dims_[1]
+                    && IMPLICATION(sc_src.get_group(1) > 1,
+                            src_md->dims[src_ndims - 1] % sc_src.get_group(1)
                                     == 0);
             VCHECK_REORDER(group_dims_are_consistent,
                     "groups dimensions are not consistent with reorder "
@@ -132,9 +140,8 @@ status_t reorder_primitive_desc_create(std::shared_ptr<primitive_desc_t> &pd,
                     "mask is not consistent with groups");
         }
 
-        const auto &sc_dst = sc.get(DNNL_ARG_DST);
-        VCHECK_REORDER(sc_dst.ndims_ == 0, VERBOSE_BAD_NDIMS, "dst scales",
-                sc_dst.ndims_);
+        VCHECK_REORDER(sc.get(DNNL_ARG_DST).has_default_groups(),
+                VERBOSE_UNSUPPORTED_SCALES_CFG);
     }
 
     bool is_cross_engine = src_engine != dst_engine
diff --git a/src/common/sdpa_pd.hpp b/src/common/sdpa_pd.hpp
index 5c09693c992..cdbb28d066f 100644
--- a/src/common/sdpa_pd.hpp
+++ b/src/common/sdpa_pd.hpp
@@ -130,7 +130,9 @@ struct sdpa_pd_t : public primitive_desc_t {
     }
 
     /// Returns the data type of the scales tensor for the KQ matmul
-    data_type_t key_scales_dt() const { return desc()->kq_scales.data_type_; }
+    data_type_t key_scales_dt() const {
+        return desc()->kq_scales.get_data_type();
+    }
 
     /// Returns the data type of the zero points tensor for the KQ matmul
     data_type_t key_zp_dt() const {
@@ -138,7 +140,9 @@ struct sdpa_pd_t : public primitive_desc_t {
     }
 
     /// Returns the data type of the scales tensor for the VS matmul
-    data_type_t value_scales_dt() const { return desc()->vs_scales.data_type_; }
+    data_type_t value_scales_dt() const {
+        return desc()->vs_scales.get_data_type();
+    }
 
     /// Returns the data type of the zero points tensor for the VS matmul
     data_type_t value_zp_dt() const {
@@ -198,18 +202,19 @@ struct sdpa_pd_t : public primitive_desc_t {
 
 private:
     static int scale_group_size(
-            const runtime_scales_t &scales, const memory_desc_t &desc) {
+            const quant_entry_t &scales, const memory_desc_t &desc) {
         dim_t out = utils::array_product(desc.dims, desc.ndims);
+        const auto mask = scales.get_mask();
         if (scales.has_default_groups()) {
-            for (int idx : mask_iterator(scales.mask_)) {
+            for (int idx : mask_iterator(mask)) {
                 out /= desc.dims[idx];
             }
         } else {
-            for (int idx : mask_iterator(scales.mask_)) {
+            for (int idx : mask_iterator(mask)) {
                 if (idx < 2) {
                     out /= desc.dims[idx];
                 } else {
-                    out /= (desc.dims[idx] / scales.group_dims_[idx - 2]);
+                    out /= (desc.dims[idx] / scales.get_group(idx - 2));
                 }
             }
         }
diff --git a/src/common/sdpa_types.hpp b/src/common/sdpa_types.hpp
index 671269a5437..6ef24b149f6 100644
--- a/src/common/sdpa_types.hpp
+++ b/src/common/sdpa_types.hpp
@@ -47,9 +47,9 @@ struct sdpa_desc_t : public op_desc_t {
 
     // primitive_attr_t can't be used because of deleted copy-ctor, but desc_t
     // must be copyable.
-    runtime_scales_t kq_scales {};
+    quant_entry_t kq_scales {};
     zero_points_t kq_zero_points {};
-    runtime_scales_t vs_scales {};
+    quant_entry_t vs_scales {};
     zero_points_t vs_zero_points {};
 
     memory_desc_t dst_desc {};
diff --git a/src/common/serialization.cpp b/src/common/serialization.cpp
index afe9c37f49e..6cc5af2b0da 100644
--- a/src/common/serialization.cpp
+++ b/src/common/serialization.cpp
@@ -178,18 +178,6 @@ void serialize_post_ops(
     }
 }
 
-void serialize_runtime_scales(
-        serialization_stream_t &sstream, const runtime_scales_t &scales) {
-    // scales: mask
-    sstream.write(&scales.mask_);
-    // scales: groups
-    const int ndims = scales.ndims_;
-    sstream.write(&ndims);
-    if (ndims > 0) sstream.write(scales.group_dims_, ndims);
-    // scales: data type
-    sstream.write(&scales.data_type_);
-}
-
 void serialize_zero_points(
         serialization_stream_t &sstream, const zero_points_t &zps) {
     for (int arg : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST})
@@ -224,12 +212,7 @@ void serialize_attr(
 
     if (!attr.scales_.has_default_values()) {
         sstream.write("scale:");
-        // go through scales for all arguments
-        for (const auto &p : attr.scales_.scales_) {
-            // scales: arg
-            sstream.write(&p.first);
-            serialize_runtime_scales(sstream, p.second);
-        }
+        attr.scales_.serialize(sstream);
     }
     // zero_points
     if (!attr.zero_points_.has_default_values()) sstream.write("zp:");
@@ -626,9 +609,9 @@ void serialize_desc(serialization_stream_t &sstream, const sdpa_desc_t &desc) {
     serialize_md(sstream, desc.q_desc);
     serialize_md(sstream, desc.k_desc);
     serialize_md(sstream, desc.v_desc);
-    serialize_runtime_scales(sstream, desc.kq_scales);
+    desc.kq_scales.serialize(sstream);
     serialize_zero_points(sstream, desc.kq_zero_points);
-    serialize_runtime_scales(sstream, desc.vs_scales);
+    desc.vs_scales.serialize(sstream);
     serialize_zero_points(sstream, desc.vs_zero_points);
     serialize_md(sstream, desc.dst_desc);
     serialize_md(sstream, desc.attn_mask_desc);
diff --git a/src/common/serialization_stream.hpp b/src/common/serialization_stream.hpp
index 28eb32aad61..90da39b94fe 100644
--- a/src/common/serialization_stream.hpp
+++ b/src/common/serialization_stream.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2021-2023 Intel Corporation
+* Copyright 2021-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #define COMMON_SERIALIZATION_STREAM_HPP
 
 #include <cstdint>
+#include <cstdlib>
 #include <vector>
 #include <type_traits>
 
diff --git a/src/common/softmax.cpp b/src/common/softmax.cpp
index 94e6e9c4ca5..c055321bb63 100644
--- a/src/common/softmax.cpp
+++ b/src/common/softmax.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2016-2023 Intel Corporation
+* Copyright 2016-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -112,13 +112,21 @@ status_t softmax_attr_check(const softmax_desc_t &desc, const engine_t *engine,
         VCHECK_SOFTMAX_UNIMPL(attr->has_default_values(fwd_attr_mask, dst_dt),
                 VERBOSE_UNSUPPORTED_ATTR);
 
+        // Check scales
         if (!attr->scales_.has_default_values()) {
-            const auto &sc = attr->scales_;
-            const int mask_src = sc.get(DNNL_ARG_SRC).mask_;
-            const int mask_dst = sc.get(DNNL_ARG_DST).mask_;
-
-            VCHECK_SOFTMAX_UNIMPL(utils::everyone_is(0, mask_src, mask_dst),
+            static const std::vector<int> supported_args {
+                    DNNL_ARG_SRC, DNNL_ARG_DST};
+            VCHECK_SOFTMAX_UNIMPL(
+                    attr->scales_.has_default_values(supported_args),
                     VERBOSE_UNSUPPORTED_SCALES_CFG);
+
+            for (int arg : supported_args) {
+                if (attr->scales_.has_default_values(arg)) continue;
+
+                const int mask = attr->scales_.get_mask(arg);
+                VCHECK_SOFTMAX_UNIMPL(
+                        mask == 0, VERBOSE_UNSUPPORTED_SCALES_CFG);
+            }
         }
 
         // Check post-ops
diff --git a/src/common/softmax_pd.hpp b/src/common/softmax_pd.hpp
index 6e6c12c54c2..905d604ce92 100644
--- a/src/common/softmax_pd.hpp
+++ b/src/common/softmax_pd.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2016-2024 Intel Corporation
+* Copyright 2016-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -176,11 +176,18 @@ struct softmax_fwd_pd_t : public softmax_pd_t {
                 dst_md_, src_md_.format_desc.blocking);
     }
 
-    bool attr_scales_ok() const {
+    bool attr_scales_ok(const std::vector<int> &supported_args
+            = {DNNL_ARG_SRC, DNNL_ARG_DST}) const {
         const auto &scales = attr()->scales_;
-        bool ok = true;
-        for (const auto &e : scales.scales_) {
-            ok = ok && e.second.mask_ == 0;
+        bool ok = scales.has_default_values(supported_args);
+
+        for (const auto &arg : supported_args) {
+            if (scales.has_default_values(arg)) continue;
+
+            // TODO: disallow non-int8 scales?
+            // const data_type_t dt = arg_md(arg)->data_type;
+            // ok = ok && utils::one_of(dt, s8, u8);
+            ok = ok && scales.get_mask(arg) == 0;
         }
         return ok;
     }
diff --git a/src/common/verbose.cpp b/src/common/verbose.cpp
index 4d93782ee7c..05682c9d906 100644
--- a/src/common/verbose.cpp
+++ b/src/common/verbose.cpp
@@ -618,18 +618,6 @@ std::string md2desc_str(const memory_desc_t *md) {
     return s;
 }
 
-std::ostream &operator<<(std::ostream &ss, const runtime_scales_t &scale) {
-    ss << scale.mask_;
-    ss << ":" << scale.data_type_;
-    if (scale.ndims_) {
-        ss << ":";
-        for (int i = 0; i < scale.ndims_ - 1; ++i)
-            ss << scale.group_dims_[i] << 'x';
-        ss << scale.group_dims_[scale.ndims_ - 1];
-    }
-    return ss;
-}
-
 std::ostream &operator<<(
         std::ostream &ss, const rnn_create_time_scales_t &rnn_scales) {
     ss << rnn_scales.mask_;
@@ -742,20 +730,13 @@ std::ostream &operator<<(std::ostream &ss, const primitive_attr_t *attr) {
     if (deterministic) {
         ss << field_delim() << "attr-deterministic:" << deterministic;
     }
-    if (attr->has_default_values()) return ss;
 
-    const arg_scales_t &as = attr->scales_;
-    if (!as.has_default_values()) {
-        std::string delim = empty_delim;
-        ss << field_delim() << "attr-scales:";
-        for (const auto &map_entry : as.scales_) {
-            const auto &val = map_entry.second;
-            if (val.has_default_values()) continue;
+    // Fast exit if rest attributes were not specified.
+    if (attr->has_default_values()) return ss;
 
-            int arg = map_entry.first;
-            ss << delim << arg2str(arg) << ":" << val;
-            delim = attr_delim;
-        }
+    const scales_t &scales = attr->scales_;
+    if (!scales.has_default_values()) {
+        ss << field_delim() << "attr-scales:" << scales.get_verbose();
     }
 
     const zero_points_t &zp = attr->zero_points_;
diff --git a/src/common/verbose.hpp b/src/common/verbose.hpp
index adb7b3708e9..637f8cdd03d 100644
--- a/src/common/verbose.hpp
+++ b/src/common/verbose.hpp
@@ -382,6 +382,7 @@ std::string md2fmt_str(
         const char *name, const memory_desc_t *md, format_kind_t user_format);
 std::string md2dim_str(
         const memory_desc_t *md, dims_type_t dims_type = dims_type_t::dims);
+std::string arg2str(int arg);
 // Returns a verbose string of dimensions or descriptor from src, wei, and/or
 // dst memory descs. Can be called externally to provide info about actual
 // values of runtime dimensions.
diff --git a/src/cpu/aarch64/acl_reorder.hpp b/src/cpu/aarch64/acl_reorder.hpp
index e586ed4e304..d429fddf7e7 100644
--- a/src/cpu/aarch64/acl_reorder.hpp
+++ b/src/cpu/aarch64/acl_reorder.hpp
@@ -95,12 +95,12 @@ struct acl_reorder_fwd_t : public primitive_t {
 
             if (!ok) return status::unimplemented;
 
-            int mask = -1;
-            bool is_set = false;
-            CHECK(attr->scales_.get(DNNL_ARG_DST, &mask, &is_set));
-            const memory_desc_wrapper input_d(src_md);
-            if (input_d.has_runtime_dims_or_strides() && is_set && mask > 0)
-                return status::unimplemented;
+            if (!attr->scales_.has_default_values(DNNL_ARG_DST)) {
+                int mask = attr->scales_.get_mask(DNNL_ARG_DST);
+                const memory_desc_wrapper input_d(src_md);
+                if (input_d.has_runtime_dims_or_strides() && mask > 0)
+                    return status::unimplemented;
+            }
 
             // Create and check primitive descriptor
             auto _pd = make_unique_pd<pd_t>(attr, src_engine->kind(), src_md,
diff --git a/src/cpu/aarch64/brgemm/brgemm.cpp b/src/cpu/aarch64/brgemm/brgemm.cpp
index 86d1798496e..1cfbc4552c5 100644
--- a/src/cpu/aarch64/brgemm/brgemm.cpp
+++ b/src/cpu/aarch64/brgemm/brgemm.cpp
@@ -297,21 +297,22 @@ status_t brgemm_desc_set_postops(brgemm_t *brg, const primitive_attr_t *attr,
     if (brg->with_scales) {
         // Note. the current version supports only two different output scale
         // types:
-        //     1) common (mask_ = 0)
+        //     1) common (mask = 0)
         //     2) per_n_dim_scale - broadcast across n dimension;
         //        for convolution and inner product promitives it corresponds
-        //        to "per_oc" mask_ = 1 << 1; for matmul - to
-        //        mask_ = (1 << (ndims - 1))), where ndims is number of
+        //        to "per_oc" mask = 1 << 1; for matmul - to
+        //        mask = (1 << (ndims - 1))), where ndims is number of
         //        dimensions for original matmul problem
-        // So if wei_scales.mask_ != 0 (not common) it's assumed here that scale
-        // type is per_n_dim_scale and driver which calls brgemm kernel checked
-        // that mask has correct value for this case
-        brg->is_oc_scale = wei_scales.mask_ != 0;
+        // So if wei_scales.get_mask() > 0 (not common) it's assumed here that
+        // scale type is per_n_dim_scale and driver which calls brgemm kernel
+        // checked that mask has correct value for this case
+        brg->is_oc_scale = wei_scales.get_mask() > 0;
     }
 
     const auto &dst_scales = attr->scales_.get(DNNL_ARG_DST);
     brg->with_dst_scales = !dst_scales.has_default_values();
-    const bool scales_ok = src_scales.mask_ == 0 && dst_scales.mask_ == 0
+    const bool scales_ok = src_scales.get_mask() == 0
+            && dst_scales.get_mask() == 0
             && attr->scales_.has_default_values(
                     {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST});
     if (!scales_ok) return status::unimplemented;
diff --git a/src/cpu/aarch64/jit_brdgmm_dw_conv.cpp b/src/cpu/aarch64/jit_brdgmm_dw_conv.cpp
index 24e018aef02..a8be300ddb0 100644
--- a/src/cpu/aarch64/jit_brdgmm_dw_conv.cpp
+++ b/src/cpu/aarch64/jit_brdgmm_dw_conv.cpp
@@ -200,7 +200,7 @@ status_t brdgmm_dw_convolution_fwd_t<isa>::pd_t::init(engine_t *engine) {
     const auto &wei_scales = attr_.scales_.get(DNNL_ARG_WEIGHTS);
     jcp.with_scale = !src_scales.has_default_values()
             || !wei_scales.has_default_values();
-    jcp.is_oc_scale = wei_scales.mask_ != 0;
+    jcp.is_oc_scale = wei_scales.get_mask() > 0;
 
     const bool scales_ok
             = attr_scales_ok({DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST});
diff --git a/src/cpu/aarch64/jit_brgemm_conv_utils.cpp b/src/cpu/aarch64/jit_brgemm_conv_utils.cpp
index 3b9d3422594..026290c53e9 100644
--- a/src/cpu/aarch64/jit_brgemm_conv_utils.cpp
+++ b/src/cpu/aarch64/jit_brgemm_conv_utils.cpp
@@ -1993,7 +1993,7 @@ status_t init_conf(jit_brgemm_conv_conf_t &jcp, cpu_isa_t isa,
     jcp.with_scales = !src_scales.has_default_values()
             || !wei_scales.has_default_values()
             || jcp.scale_adjust_factor != 1.0f;
-    jcp.is_oc_scale = wei_scales.mask_ != 0;
+    jcp.is_oc_scale = wei_scales.get_mask() > 0;
 
     // disables the shape with small ic but large spatial
     // or specific large spatial shapes for int8 conv
@@ -2190,7 +2190,7 @@ status_t init_1x1_conf(jit_brgemm_conv_conf_t &jcp, cpu_isa_t isa,
     jcp.with_scales = !src_scales.has_default_values()
             || !wei_scales.has_default_values()
             || jcp.scale_adjust_factor != 1.0f;
-    jcp.is_oc_scale = wei_scales.mask_ != 0;
+    jcp.is_oc_scale = wei_scales.get_mask() > 0;
 
     // enable ununroll_bd_loop for big shapes to reduce kernel sizes
     jcp.ununroll_bd_loop
diff --git a/src/cpu/aarch64/jit_brgemm_post_ops.hpp b/src/cpu/aarch64/jit_brgemm_post_ops.hpp
index 4257bfe31b8..5aed828a582 100644
--- a/src/cpu/aarch64/jit_brgemm_post_ops.hpp
+++ b/src/cpu/aarch64/jit_brgemm_post_ops.hpp
@@ -325,8 +325,8 @@ struct jit_brgemm_kernel_post_ops : public jit_generator {
         const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS);
         // per_oc: conv: 1 << 0, (1 << 1) + (1 << 0) (with groups)
         // per_oc: ip: 1 << 0
-        is_oc_scale_
-                = utils::one_of(wei_scales.mask_, 1 << 0, (1 << 1) + (1 << 0));
+        is_oc_scale_ = utils::one_of(
+                wei_scales.get_mask(), 1 << 0, (1 << 1) + (1 << 0));
 
         LDD_ = brg.LDD;
         inp_dt_ = brg.dt_c;
diff --git a/src/cpu/aarch64/jit_uni_reorder.cpp b/src/cpu/aarch64/jit_uni_reorder.cpp
index d6c20904c90..85e9ae4036c 100644
--- a/src/cpu/aarch64/jit_uni_reorder.cpp
+++ b/src/cpu/aarch64/jit_uni_reorder.cpp
@@ -2788,13 +2788,10 @@ status_t jit_uni_reorder_t::pd_t::init_scratchpad() {
                 compensation_reduce_size);
     }
 
-    const memory_desc_wrapper input_d(src_md());
-    int scales_mask = -1;
-    bool is_set = false;
-    CHECK(attr()->scales_.get(DNNL_ARG_DST, &scales_mask, &is_set));
-
-    if (is_set && scales_mask > 0) {
-        get_D_values(input_d, scales_mask, nullptr, &D_mask_, nullptr);
+    if (!attr()->scales_.has_default_values(DNNL_ARG_DST)) {
+        const memory_desc_wrapper input_d(src_md());
+        int mask = attr()->scales_.get_mask(DNNL_ARG_DST);
+        get_D_values(input_d, mask, nullptr, &D_mask_, nullptr);
         if (D_mask_ > 1) {
             scratchpad.template book<float>(
                     memory_tracking::names::key_reorder_precomputed_dst_scales,
diff --git a/src/cpu/aarch64/jit_uni_reorder_utils.cpp b/src/cpu/aarch64/jit_uni_reorder_utils.cpp
index 63fc09a4cf2..0900d59d17d 100644
--- a/src/cpu/aarch64/jit_uni_reorder_utils.cpp
+++ b/src/cpu/aarch64/jit_uni_reorder_utils.cpp
@@ -276,24 +276,21 @@ status_t prb_init(prb_t &p, const memory_desc_t &imd, const memory_desc_t &omd,
 
     p.src_scale_type = scale_type_t::NONE;
     int src_mask = 0;
-    bool is_src_set = false;
-    CHECK(attr->scales_.get(DNNL_ARG_SRC, &src_mask, &is_src_set));
-    if (is_src_set) {
+    if (!attr->scales_.has_default_values(DNNL_ARG_SRC)) {
+        src_mask = attr->scales_.get_mask(DNNL_ARG_SRC);
         p.src_scale_type
                 = src_mask == 0 ? scale_type_t::COMMON : scale_type_t::MANY;
     }
 
     p.dst_scale_type = scale_type_t::NONE;
     int dst_mask = 0;
-    bool is_dst_set = false;
-    CHECK(attr->scales_.get(DNNL_ARG_DST, &dst_mask, &is_dst_set));
-    if (is_dst_set) {
+    if (!attr->scales_.has_default_values(DNNL_ARG_DST)) {
+        dst_mask = attr->scales_.get_mask(DNNL_ARG_DST);
         p.dst_scale_type
                 = dst_mask == 0 ? scale_type_t::COMMON : scale_type_t::MANY;
     }
 
-    if (is_src_set && is_dst_set && src_mask != dst_mask)
-        return status::unimplemented;
+    if (src_mask != dst_mask) return status::unimplemented;
 
     p.scale_adjust = (om_d.extra().flags & memory_extra_flags::scale_adjust)
             ? om_d.extra().scale_adjust
diff --git a/src/cpu/aarch64/matmul/acl_lowp_matmul.cpp b/src/cpu/aarch64/matmul/acl_lowp_matmul.cpp
index 076d5fd321a..ca826296706 100644
--- a/src/cpu/aarch64/matmul/acl_lowp_matmul.cpp
+++ b/src/cpu/aarch64/matmul/acl_lowp_matmul.cpp
@@ -66,11 +66,11 @@ status_t acl_lowp_matmul_t::pd_t::init(engine_t *engine) {
                     | smask_t::zero_points_runtime | smask_t::post_ops),
             "only scale, zero point and post-ops attrs supported");
 
-    VDISPATCH_MATMUL(attr()->scales_.get(DNNL_ARG_SRC).mask_ == 0
+    VDISPATCH_MATMUL(attr()->scales_.get_mask(DNNL_ARG_SRC) == 0
                     && attr()->zero_points_.get(DNNL_ARG_SRC) == 0
-                    && attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ == 0
+                    && attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == 0
                     && attr()->zero_points_.get(DNNL_ARG_WEIGHTS) == 0
-                    && attr()->scales_.get(DNNL_ARG_DST).mask_ == 0
+                    && attr()->scales_.get_mask(DNNL_ARG_DST) == 0
                     && attr()->zero_points_.get(DNNL_ARG_DST) == 0,
             "common scales and zero points only");
 
diff --git a/src/cpu/aarch64/matmul/acl_lowp_matmul_sq.cpp b/src/cpu/aarch64/matmul/acl_lowp_matmul_sq.cpp
index bdb4e04ce74..8b16545dd9b 100644
--- a/src/cpu/aarch64/matmul/acl_lowp_matmul_sq.cpp
+++ b/src/cpu/aarch64/matmul/acl_lowp_matmul_sq.cpp
@@ -52,13 +52,20 @@ status_t acl_lowp_matmul_sq_t::pd_t::init(engine_t *engine) {
             attr()->has_default_values(smask_t::scales_runtime
                     | smask_t::zero_points_runtime | smask_t::post_ops),
             "only scale, zero point and post-ops attrs supported");
-    VDISPATCH_MATMUL(attr()->scales_.get(DNNL_ARG_SRC).mask_ == 0
-                    && attr()->zero_points_.get(DNNL_ARG_SRC) == 0
-                    && attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ == 0
+
+    static const std::vector<int> supported_args {
+            DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST};
+    for (int arg : supported_args) {
+        if (attr()->scales_.has_default_values(arg)) continue;
+
+        VDISPATCH_MATMUL(attr()->scales_.get_mask(arg) == 0,
+                VERBOSE_UNSUPPORTED_SCALES_CFG);
+    }
+
+    VDISPATCH_MATMUL(attr()->zero_points_.get(DNNL_ARG_SRC) == 0
                     && attr()->zero_points_.get(DNNL_ARG_WEIGHTS) == 0
-                    && attr()->scales_.get(DNNL_ARG_DST).mask_ == 0
                     && attr()->zero_points_.get(DNNL_ARG_DST) == 0,
-            "common scales and zero points only");
+            "common zero points only");
     VDISPATCH_MATMUL(
             !has_runtime_dims_or_strides(), VERBOSE_RUNTIMEDIM_UNSUPPORTED);
     const memory_desc_wrapper src_d(src_md_);
@@ -214,4 +221,4 @@ status_t acl_lowp_matmul_sq_t::execute(const exec_ctx_t &ctx) const {
 } // namespace aarch64
 } // namespace cpu
 } // namespace impl
-} // namespace dnnl
\ No newline at end of file
+} // namespace dnnl
diff --git a/src/cpu/aarch64/matmul/brgemm_matmul.cpp b/src/cpu/aarch64/matmul/brgemm_matmul.cpp
index 7ede5613803..ae7255c9b5e 100644
--- a/src/cpu/aarch64/matmul/brgemm_matmul.cpp
+++ b/src/cpu/aarch64/matmul/brgemm_matmul.cpp
@@ -72,9 +72,9 @@ status_t brgemm_matmul_t<isa>::pd_t::init(engine_t *engine) {
         const std::vector<int> supported_args
                 = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST};
         bool ok = attr_scales_ok(supported_args);
-        if (!attr()->scales_.get(DNNL_ARG_SRC).has_default_values()
+        if (!attr()->scales_.has_default_values(DNNL_ARG_SRC)
                 && !attr()->scales_.get(DNNL_ARG_WEIGHTS).has_default_values()
-                && attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ != 0) {
+                && attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) > 0) {
             // This case requires scratchpad
             if (N() == DNNL_RUNTIME_DIM_VAL) ok = false;
         }
diff --git a/src/cpu/aarch64/matmul/brgemm_matmul_utils.cpp b/src/cpu/aarch64/matmul/brgemm_matmul_utils.cpp
index 1506be014a9..0610147c752 100644
--- a/src/cpu/aarch64/matmul/brgemm_matmul_utils.cpp
+++ b/src/cpu/aarch64/matmul/brgemm_matmul_utils.cpp
@@ -783,21 +783,22 @@ status_t init_brgemm_matmul_conf(cpu_isa_t isa, brgemm_matmul_conf_t &bgmmc,
 
     const auto &src_scales = attr.scales_.get(DNNL_ARG_SRC);
     const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS);
-    bgmmc.with_scales = !src_scales.has_default_values()
-            || !wei_scales.has_default_values();
-    if (bgmmc.with_scales) {
-        bgmmc.is_oscale_per_n = wei_scales.mask_ == 1 << (bgmmc.ndims - 1);
+    const bool has_wei_scales = !wei_scales.has_default_values();
+    bgmmc.with_scales = !src_scales.has_default_values() || has_wei_scales;
+    if (has_wei_scales) {
+        bgmmc.is_oscale_per_n
+                = wei_scales.get_mask() == (1 << (bgmmc.ndims - 1));
 
         // only common and per-oc-channel scales are supported
-        VCONDCHECK_BG(wei_scales.mask_ == 0 || bgmmc.is_oscale_per_n,
+        VCONDCHECK_BG(wei_scales.get_mask() == 0 || bgmmc.is_oscale_per_n,
                 VERBOSE_UNSUPPORTED_SCALES_CFG);
     }
 
     const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST);
     bgmmc.with_dst_scales = !dst_scales.has_default_values();
     // only common scales are supported
-    if (bgmmc.with_dst_scales && dst_scales.mask_ != 0)
-        return status::unimplemented;
+    VCONDCHECK_BG(!(bgmmc.with_dst_scales && dst_scales.get_mask() > 0),
+            VERBOSE_UNSUPPORTED_SCALES_CFG);
 
     const auto &p = attr.post_ops_;
     bgmmc.with_sum = p.find(primitive_kind::sum) != -1;
diff --git a/src/cpu/cpu_primitive.hpp b/src/cpu/cpu_primitive.hpp
index ac1dccd4181..d82b9ae0d92 100644
--- a/src/cpu/cpu_primitive.hpp
+++ b/src/cpu/cpu_primitive.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -33,7 +33,7 @@
     alignas(16) float CONCAT2(scales, _buf16)[16] = {0}; \
     const float *scales {nullptr}; \
     if ((attr)) { \
-        if ((attr)->scales_.get(arg).has_default_values()) { \
+        if ((attr)->scales_.has_default_values(arg)) { \
             utils::array_set(CONCAT2(scales, _buf16), 1.0f, 16); \
             scales = CONCAT2(scales, _buf16); \
         } else { \
@@ -92,7 +92,7 @@
 
 #define ASSIGN_ARG_SCALE_VALUE(scale, mem_arg) \
     alignas(16) float CONCAT2(CONCAT2(scales, _buf16), mem_arg)[16] = {0}; \
-    if (pd()->attr()->scales_.get(mem_arg).has_default_values()) { \
+    if (pd()->attr()->scales_.has_default_values(mem_arg)) { \
         utils::array_set(CONCAT2(CONCAT2(scales, _buf16), mem_arg), 1.0f, 16); \
         scale = CONCAT2(CONCAT2(scales, _buf16), mem_arg); \
     } else { \
diff --git a/src/cpu/dw_convolution_utils.hpp b/src/cpu/dw_convolution_utils.hpp
index 088e01b9964..03bb6ce43f5 100644
--- a/src/cpu/dw_convolution_utils.hpp
+++ b/src/cpu/dw_convolution_utils.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2023 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -43,23 +43,19 @@ inline status_t get_depthwise_conv_desc(convolution_desc_t &cd_dw,
     // post-ops after depthwise post-op.
     auto &dw_po = attr_1x1.post_ops_.entry_[dw_po_index].depthwise_conv;
 
-    // erase 1x1 conv scales
-    for (auto arg : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) {
-        auto &scale = attr_dw.scales_.get(arg);
-        if (!scale.has_default_values()) attr_dw.scales_.reset(arg);
-    }
-
     const auto &dw_src_scales = attr_1x1.scales_.get(DNNL_ARG_DST);
     const auto &dw_wei_scales
             = attr_1x1.scales_.get(DNNL_ARG_ATTR_POST_OP_DW | DNNL_ARG_WEIGHTS);
     const auto &dw_dst_scales
             = attr_1x1.scales_.get(DNNL_ARG_ATTR_POST_OP_DW | DNNL_ARG_DST);
+
+    assert(attr_dw.scales_.has_default_values());
     if (!dw_src_scales.has_default_values())
-        attr_dw.scales_.set(DNNL_ARG_SRC, dw_src_scales.mask_);
+        CHECK(attr_dw.scales_.set(DNNL_ARG_SRC, dw_src_scales.get_mask()));
     if (!dw_wei_scales.has_default_values())
-        attr_dw.scales_.set(DNNL_ARG_WEIGHTS, dw_wei_scales.mask_);
+        CHECK(attr_dw.scales_.set(DNNL_ARG_WEIGHTS, dw_wei_scales.get_mask()));
     if (!dw_dst_scales.has_default_values())
-        attr_dw.scales_.set(DNNL_ARG_DST, dw_dst_scales.mask_);
+        CHECK(attr_dw.scales_.set(DNNL_ARG_DST, dw_dst_scales.get_mask()));
 
     auto dw_po_len = attr_1x1.post_ops_.len() - (dw_po_index + 1);
     attr_dw.post_ops_.entry_.resize(dw_po_len);
diff --git a/src/cpu/gemm_convolution_utils.cpp b/src/cpu/gemm_convolution_utils.cpp
index 01b90f5fc41..0400b66608c 100644
--- a/src/cpu/gemm_convolution_utils.cpp
+++ b/src/cpu/gemm_convolution_utils.cpp
@@ -2133,8 +2133,8 @@ status_t init_conf(conv_gemm_conf_t &jcp,
     jcp.dst_os_stride = dst_d.is_blocking_desc()
             ? dst_d.blocking_desc().strides[ndims - 1]
             : 0;
-    jcp.scale_idx_mult = attr.scales_.get(DNNL_ARG_WEIGHTS).mask_ != 0;
-    jcp.with_dst_scale = !attr.scales_.get(DNNL_ARG_DST).has_default_values();
+    jcp.scale_idx_mult = attr.scales_.get_mask(DNNL_ARG_WEIGHTS) > 0;
+    jcp.with_dst_scale = !attr.scales_.has_default_values(DNNL_ARG_DST);
     book_precomputed_scales(scratchpad, attr.scales_, jcp.ngroups * jcp.oc);
 
     if (jcp.zp.src_exists) {
diff --git a/src/cpu/gemm_inner_product_utils.cpp b/src/cpu/gemm_inner_product_utils.cpp
index 815e953898b..2d637d543cf 100644
--- a/src/cpu/gemm_inner_product_utils.cpp
+++ b/src/cpu/gemm_inner_product_utils.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2023 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -157,17 +157,17 @@ pp_kernel_t::pp_kernel_t(size_t OC, size_t MB, dim_t dst_mb_stride,
     , bias_data_type_(bias_dt)
     , acc_data_type_(acc_dt)
     , dst_data_type_(dst_md->data_type)
-    , do_scale_(!attr->scales_.get(DNNL_ARG_SRC).has_default_values()
-              || !attr->scales_.get(DNNL_ARG_WEIGHTS).has_default_values())
+    , do_scale_(!attr->scales_.has_default_values(DNNL_ARG_SRC)
+              || !attr->scales_.has_default_values(DNNL_ARG_WEIGHTS))
     , ndims_(dst_md->ndims) {
 
-    if (do_scale_) {
-        int wei_mask = attr->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    if (!attr->scales_.has_default_values(DNNL_ARG_WEIGHTS)) {
+        int wei_mask = attr->scales_.get_mask(DNNL_ARG_WEIGHTS);
         // matmul: per_oc: 1 << (ndims_ - 1)
         // ip: per_oc: 1 << 0
         scale_idx_mult_ = wei_mask == (1 << (ndims_ - 1)) || wei_mask == 1 << 0;
     }
-    do_dst_scale_ = !attr->scales_.get(DNNL_ARG_DST).has_default_values();
+    do_dst_scale_ = !attr->scales_.has_default_values(DNNL_ARG_DST);
 
     post_ops_ = attr->post_ops_;
     const int eltwise_ind = post_ops_.find(primitive_kind::eltwise);
diff --git a/src/cpu/gemm_x8s8s32x_convolution.cpp b/src/cpu/gemm_x8s8s32x_convolution.cpp
index 8482ae65eb0..53119b38915 100644
--- a/src/cpu/gemm_x8s8s32x_convolution.cpp
+++ b/src/cpu/gemm_x8s8s32x_convolution.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2017-2024 Intel Corporation
+* Copyright 2017-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -135,10 +135,9 @@ status_t gemm_x8s8s32x_convolution_fwd_t::execute_forward(
     DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS);
     DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST);
 
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     const float *scales = precompute_scales(scratchpad, src_scales, wei_scales,
-            pd()->IC(), pd()->OC(), false, wei_scale_mask != 0, pd()->attr());
+            pd()->IC(), pd()->OC(), false, wei_scale_mask > 0, pd()->attr());
 
     parallel(jcp.nthr, [&](const int ithr, const int nthr) {
         status_t st_thr = execute_forward_thr(ithr, nthr, src_base, wei_base,
@@ -358,16 +357,15 @@ status_t gemm_x8s8s32x_convolution_bwd_data_t::execute_backward_data_thr(
     const auto diff_src_dt_size
             = types::data_type_size(diff_src_md.data_type());
 
-    const int scale_idx_mult = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_
+    const int scale_idx_mult = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS)
             == (1 << static_cast<int>(pd()->with_groups()));
     DEFINE_ARG_SCALES_BUFFER(src_scales, DNNL_ARG_SRC);
     DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS);
     DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST);
 
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     const float *scales = precompute_scales(scratchpad, src_scales, wei_scales,
-            pd()->IC(), pd()->OC(), false, wei_scale_mask != 0, pd()->attr());
+            pd()->IC(), pd()->OC(), false, wei_scale_mask > 0, pd()->attr());
 
     const dim_t work_amount = jcp.ngroups * jcp.mb;
 
diff --git a/src/cpu/gemm_x8s8s32x_inner_product.cpp b/src/cpu/gemm_x8s8s32x_inner_product.cpp
index 341a584a276..cad125ea7be 100644
--- a/src/cpu/gemm_x8s8s32x_inner_product.cpp
+++ b/src/cpu/gemm_x8s8s32x_inner_product.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018-2024 Intel Corporation
+* Copyright 2018-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -64,10 +64,9 @@ status_t gemm_x8s8s32x_inner_product_fwd_t::execute_forward(
     DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST);
 
     auto scratchpad = ctx.get_scratchpad_grantor();
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     const float *scales = precompute_scales(scratchpad, src_scales, wei_scales,
-            IC, OC, false, wei_scale_mask != 0, pd()->attr());
+            IC, OC, false, wei_scale_mask > 0, pd()->attr());
 
     int32_t *acc = pd()->dst_is_acc_
             ? (int32_t *)dst
diff --git a/src/cpu/matmul/gemm_bf16_matmul.cpp b/src/cpu/matmul/gemm_bf16_matmul.cpp
index a9e32a34e5f..7758a3db4ed 100644
--- a/src/cpu/matmul/gemm_bf16_matmul.cpp
+++ b/src/cpu/matmul/gemm_bf16_matmul.cpp
@@ -105,9 +105,9 @@ status_t gemm_bf16_matmul_t<dst_type>::pd_t::check_and_configure_attributes(
         engine_t *engine) {
     auto check_attr_scales = [&]() -> bool {
         bool ok = attr_scales_ok();
-        if (!attr()->scales_.get(DNNL_ARG_SRC).has_default_values()
-                && !attr()->scales_.get(DNNL_ARG_WEIGHTS).has_default_values()
-                && attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ != 0) {
+        if (!attr()->scales_.has_default_values(DNNL_ARG_SRC)
+                && !attr()->scales_.has_default_values(DNNL_ARG_WEIGHTS)
+                && attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) > 0) {
             // This case requires scratchpad with unknown size
             if (N() == DNNL_RUNTIME_DIM_VAL) ok = false;
         }
@@ -145,11 +145,15 @@ status_t gemm_bf16_matmul_t<dst_type>::pd_t::check_and_configure_attributes(
     // set state
     CHECK(params_.pp_attr_.copy_from(*attr()));
     params_.gemm_applies_output_scales_
-            = attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ == 0 && !with_bias();
+            = attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == 0 && !with_bias();
 
     if (params_.gemm_applies_output_scales_) {
-        params_.pp_attr_.scales_.reset(DNNL_ARG_SRC);
-        params_.pp_attr_.scales_.reset(DNNL_ARG_WEIGHTS);
+        VDISPATCH_MATMUL_SC(params_.pp_attr_.scales_.set(
+                                    DNNL_ARG_SRC, default_quant_entry()),
+                VERBOSE_UNSUPPORTED_SCALES_CFG);
+        VDISPATCH_MATMUL_SC(params_.pp_attr_.scales_.set(
+                                    DNNL_ARG_WEIGHTS, default_quant_entry()),
+                VERBOSE_UNSUPPORTED_SCALES_CFG);
     }
 
     // check post-ops
@@ -203,11 +207,10 @@ status_t gemm_bf16_matmul_t<dst_type>::execute_ref(
     DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST);
 
     auto scratchpad = ctx.get_scratchpad_grantor();
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     const float *scales = precompute_scales(scratchpad, src_scales, wei_scales,
             src_d.dims()[ndims - 1], dst_d.dims()[ndims - 1], false,
-            wei_scale_mask != 0, pd()->attr());
+            wei_scale_mask > 0, pd()->attr());
 
     if (src_d.has_zero_dim() || weights_d.has_zero_dim()
             || dst_d.has_zero_dim())
@@ -254,7 +257,7 @@ status_t gemm_bf16_matmul_t<dst_type>::execute_ref(
     const float beta = params.gemm_beta_;
     const dim_t acc_ldc = dst_is_acc ? ldc : N;
     const int scale_idx_mult
-            = this->pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_
+            = this->pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS)
             == (1 << (ndims - 1));
 
     std::atomic<status_t> st(status::success);
diff --git a/src/cpu/matmul/gemm_f32_matmul.cpp b/src/cpu/matmul/gemm_f32_matmul.cpp
index de57af38944..d44fd2eb0aa 100644
--- a/src/cpu/matmul/gemm_f32_matmul.cpp
+++ b/src/cpu/matmul/gemm_f32_matmul.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -50,9 +50,9 @@ status_t gemm_f32_matmul_t::pd_t::init(engine_t *engine) {
 
     auto check_attr_scales = [&]() -> bool {
         bool ok = attr_scales_ok();
-        if (!attr()->scales_.get(DNNL_ARG_SRC).has_default_values()
+        if (!attr()->scales_.has_default_values(DNNL_ARG_SRC)
                 && !attr()->scales_.get(DNNL_ARG_WEIGHTS).has_default_values()
-                && attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ != 0) {
+                && attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) > 0) {
             // This case requires scratchpad with unknown size
             if (N() == DNNL_RUNTIME_DIM_VAL) ok = false;
         }
@@ -131,10 +131,14 @@ status_t gemm_f32_matmul_t::pd_t::configure_attributes() {
 
     CHECK(params_.pp_attr_.copy_from(*attr()));
     params_.gemm_applies_output_scales_
-            = attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ == 0 && !with_bias();
+            = attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == 0 && !with_bias();
     if (params_.gemm_applies_output_scales_) {
-        params_.pp_attr_.scales_.reset(DNNL_ARG_SRC);
-        params_.pp_attr_.scales_.reset(DNNL_ARG_WEIGHTS);
+        VDISPATCH_MATMUL_SC(params_.pp_attr_.scales_.set(
+                                    DNNL_ARG_SRC, default_quant_entry()),
+                VERBOSE_UNSUPPORTED_SCALES_CFG);
+        VDISPATCH_MATMUL_SC(params_.pp_attr_.scales_.set(
+                                    DNNL_ARG_WEIGHTS, default_quant_entry()),
+                VERBOSE_UNSUPPORTED_SCALES_CFG);
     }
 
     const auto &po = params_.pp_attr_.post_ops_;
@@ -186,11 +190,10 @@ status_t gemm_f32_matmul_t::execute_ref(const exec_ctx_t &ctx) const {
     DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST);
 
     auto scratchpad = ctx.get_scratchpad_grantor();
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     const float *scales = precompute_scales(scratchpad, src_scales, wei_scales,
             src_d.dims()[ndims - 1], dst_d.dims()[ndims - 1], false,
-            wei_scale_mask != 0, pd()->attr());
+            wei_scale_mask > 0, pd()->attr());
 
     if (src_d.has_zero_dim() || weights_d.has_zero_dim()
             || dst_d.has_zero_dim())
@@ -237,7 +240,7 @@ status_t gemm_f32_matmul_t::execute_ref(const exec_ctx_t &ctx) const {
 
     const dim_t acc_ldc = dst_is_acc ? ldc : N;
     const int scale_idx_mult
-            = this->pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_
+            = this->pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS)
             == (1 << (ndims - 1));
 
     std::atomic<status_t> st(status::success);
diff --git a/src/cpu/matmul/gemm_x8s8s32x_matmul.cpp b/src/cpu/matmul/gemm_x8s8s32x_matmul.cpp
index 5b9f7de0987..2483b25d048 100644
--- a/src/cpu/matmul/gemm_x8s8s32x_matmul.cpp
+++ b/src/cpu/matmul/gemm_x8s8s32x_matmul.cpp
@@ -61,9 +61,9 @@ status_t gemm_x8s8s32x_matmul_t::pd_t::init(engine_t *engine) {
 
     auto check_attr_scales = [&]() -> bool {
         bool ok = attr_scales_ok();
-        if (!attr()->scales_.get(DNNL_ARG_SRC).has_default_values()
-                && !attr()->scales_.get(DNNL_ARG_WEIGHTS).has_default_values()
-                && attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ != 0) {
+        if (!attr()->scales_.has_default_values(DNNL_ARG_SRC)
+                && !attr()->scales_.has_default_values(DNNL_ARG_WEIGHTS)
+                && attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) > 0) {
             // This case requires scratchpad with unknown size
             if (N() == DNNL_RUNTIME_DIM_VAL) ok = false;
         }
@@ -203,11 +203,10 @@ status_t gemm_x8s8s32x_matmul_t::execute_ref(const exec_ctx_t &ctx) const {
     DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST);
 
     auto &scratchpad = ctx.get_scratchpad_grantor();
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     const float *scales = precompute_scales(scratchpad, src_scales, wei_scales,
             src_d.dims()[ndims - 1], dst_d.dims()[ndims - 1], false,
-            wei_scale_mask != 0, pd()->attr());
+            wei_scale_mask > 0, pd()->attr());
 
     DEFINE_ZERO_POINT_VALUE(src_zero_point, DNNL_ARG_SRC);
     DEFINE_ZERO_POINT_VALUE(weights_zero_point, DNNL_ARG_WEIGHTS);
@@ -276,7 +275,7 @@ status_t gemm_x8s8s32x_matmul_t::execute_ref(const exec_ctx_t &ctx) const {
     const float beta = params.gemm_beta_;
     const dim_t acc_ldc = dst_is_acc ? ldc : N;
     const int scale_idx_mult
-            = this->pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_
+            = this->pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS)
             == (1 << (ndims - 1));
 
     std::atomic<status_t> st(status::success);
diff --git a/src/cpu/matmul/matmul_utils.hpp b/src/cpu/matmul/matmul_utils.hpp
index 996683c522d..2712e000189 100644
--- a/src/cpu/matmul/matmul_utils.hpp
+++ b/src/cpu/matmul/matmul_utils.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 * Copyright 2022 Arm Ltd. and affiliates
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@@ -156,6 +156,11 @@ struct matmul_helper_t {
     static status_t get_quant_md(memory_desc_t &md, const int ndims,
             const dims_t in_dims, const int quant_mask, const dim_t g0,
             const dim_t g1, const data_type_t dt) {
+        if (dt == data_type::undef) {
+            md = glob_zero_md;
+            return status::success;
+        }
+
         dims_t quant_dims {};
         utils::copy_dims_with_mask(quant_dims, in_dims, ndims, quant_mask,
                 /* fill_with_ones = */ true);
@@ -172,6 +177,8 @@ struct matmul_helper_t {
     static dim_t get_quant_off(const dims_t &input_idx, const int ndims,
             const int quant_mask, const dim_t g0, const dim_t g1,
             const memory_desc_t &quant_md) {
+        if (types::is_zero_md(&quant_md)) return 0;
+
         dims_t quant_idx {};
         utils::array_copy(quant_idx, input_idx, ndims);
         utils::apply_mask_on_dims(quant_idx, ndims, quant_mask);
diff --git a/src/cpu/matmul/ref_matmul.cpp b/src/cpu/matmul/ref_matmul.cpp
index d81b63a2b4b..8b6a02a5494 100644
--- a/src/cpu/matmul/ref_matmul.cpp
+++ b/src/cpu/matmul/ref_matmul.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -112,25 +112,18 @@ status_t ref_matmul_t::execute_ref(const exec_ctx_t &ctx) const {
 
     // arg scales section
     const auto &attr_scales = pd()->attr()->scales_;
-    const bool with_src_scales
-            = !attr_scales.get(DNNL_ARG_SRC).has_default_values();
+    const bool with_src_scales = !attr_scales.has_default_values(DNNL_ARG_SRC);
     const bool with_wei_scales
-            = !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_values();
-    const bool with_dst_scales
-            = !attr_scales.get(DNNL_ARG_DST).has_default_values();
-    const auto wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_;
+            = !attr_scales.has_default_values(DNNL_ARG_WEIGHTS);
+    const bool with_dst_scales = !attr_scales.has_default_values(DNNL_ARG_DST);
+    const auto wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS);
     const dim_t wei_scale_stride_n
             = (wei_scale_mask & pd()->wei_qmask_N()) ? 1 : 0;
-    const auto &wei_scale_dt = attr_scales.get(DNNL_ARG_WEIGHTS).data_type_;
+    const auto &wei_scale_dt = attr_scales.get_data_type(DNNL_ARG_WEIGHTS);
     const auto wei_scales_d
             = ctx.memory_mdw(DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS);
-    const auto wei_scale_group_ndim = attr_scales.get(DNNL_ARG_WEIGHTS).ndims_;
-    const auto wei_scale_group_k = wei_scale_group_ndim > 0
-            ? attr_scales.get(DNNL_ARG_WEIGHTS).group_dims_[0]
-            : 1;
-    const auto wei_scale_group_n = wei_scale_group_ndim > 0
-            ? attr_scales.get(DNNL_ARG_WEIGHTS).group_dims_[1]
-            : 1;
+    const auto wei_scale_group_k = attr_scales.get_group(DNNL_ARG_WEIGHTS, 0);
+    const auto wei_scale_group_n = attr_scales.get_group(DNNL_ARG_WEIGHTS, 1);
     // Initialize a memory desc for quant entries for easier offset calculation.
     memory_desc_t wei_scale_md {};
     CHECK(matmul_helper_t::get_quant_md(wei_scale_md, ndims, weights_d.dims(),
diff --git a/src/cpu/matmul/ref_matmul_int8.cpp b/src/cpu/matmul/ref_matmul_int8.cpp
index bbe4804994f..bc5c3744c46 100644
--- a/src/cpu/matmul/ref_matmul_int8.cpp
+++ b/src/cpu/matmul/ref_matmul_int8.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2021-2024 Intel Corporation
+* Copyright 2021-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -124,19 +124,12 @@ status_t ref_matmul_int8_t::execute_ref(const exec_ctx_t &ctx) const {
     // arg scales section
     const auto &attr_scales = pd()->attr()->scales_;
     const bool with_wei_scales
-            = !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_values();
-    const bool with_dst_scales
-            = !attr_scales.get(DNNL_ARG_DST).has_default_values();
-    const int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_;
+            = !attr_scales.has_default_values(DNNL_ARG_WEIGHTS);
+    const bool with_dst_scales = !attr_scales.has_default_values(DNNL_ARG_DST);
+    const int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS);
     const auto &wei_scale_dt = attr_scales.get_data_type(DNNL_ARG_WEIGHTS);
-    const bool wei_scale_per_k = wei_scale_mask & pd()->wei_qmask_K();
-    const auto wei_scale_group_ndim = attr_scales.get(DNNL_ARG_WEIGHTS).ndims_;
-    const auto wei_scale_group_k = wei_scale_group_ndim > 0
-            ? attr_scales.get(DNNL_ARG_WEIGHTS).group_dims_[0]
-            : (wei_scale_per_k ? 1 : K);
-    const auto wei_scale_group_n = wei_scale_group_ndim > 0
-            ? attr_scales.get(DNNL_ARG_WEIGHTS).group_dims_[1]
-            : 1;
+    const auto wei_scale_group_k = attr_scales.get_group(DNNL_ARG_WEIGHTS, 0);
+    const auto wei_scale_group_n = attr_scales.get_group(DNNL_ARG_WEIGHTS, 1);
     const auto wei_scale_ngroups_k = K / wei_scale_group_k;
     // Initialize a memory desc for quant entries for easier offset calculation.
     memory_desc_t wei_scale_md {};
@@ -144,15 +137,10 @@ status_t ref_matmul_int8_t::execute_ref(const exec_ctx_t &ctx) const {
             wei_scale_mask, wei_scale_group_k, wei_scale_group_n,
             wei_scale_dt));
 
-    const bool with_src_scales
-            = !attr_scales.get(DNNL_ARG_SRC).has_default_values();
-    const int src_scale_mask = attr_scales.get(DNNL_ARG_SRC).mask_;
+    const bool with_src_scales = !attr_scales.has_default_values(DNNL_ARG_SRC);
+    const int src_scale_mask = attr_scales.get_mask(DNNL_ARG_SRC);
     const auto &src_scale_dt = attr_scales.get_data_type(DNNL_ARG_SRC);
-    const bool src_scale_per_k = src_scale_mask & pd()->src_qmask_K();
-    const auto src_scale_group_ndim = attr_scales.get(DNNL_ARG_SRC).ndims_;
-    const auto src_scale_group_k = src_scale_group_ndim > 0
-            ? attr_scales.get(DNNL_ARG_SRC).group_dims_[1]
-            : (src_scale_per_k ? 1 : K);
+    const auto src_scale_group_k = attr_scales.get_group(DNNL_ARG_SRC, 1);
     const auto src_scale_ngroups_k = K / src_scale_group_k;
     // Initialize a memory desc for quant entries for easier offset calculation.
     memory_desc_t src_scale_md {};
diff --git a/src/cpu/ref_concat.hpp b/src/cpu/ref_concat.hpp
index 516e2ff0aac..2cf58d56078 100644
--- a/src/cpu/ref_concat.hpp
+++ b/src/cpu/ref_concat.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2017-2024 Intel Corporation
+* Copyright 2017-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -58,11 +58,10 @@ struct ref_concat_t : public primitive_t {
             reorder_pds_.resize(n_ + use_tent_dst());
             for (int i = 0; i < n_; ++i) {
                 primitive_attr_t r_attr;
-                if (!sc.get(DNNL_ARG_MULTIPLE_SRC + i).has_default_values()) {
-                    int mask = 0;
-                    CHECK(sc.get(DNNL_ARG_MULTIPLE_SRC + i, &mask, nullptr));
-                    if (mask != 0) return status::unimplemented;
-                    r_attr.scales_.set(DNNL_ARG_SRC, mask);
+                if (!sc.has_default_values(DNNL_ARG_MULTIPLE_SRC + i)) {
+                    int mask = sc.get_mask(DNNL_ARG_MULTIPLE_SRC + i);
+                    VDISPATCH_CONCAT(mask == 0, VERBOSE_UNSUPPORTED_SCALES_CFG);
+                    CHECK(r_attr.scales_.set(DNNL_ARG_SRC, mask));
                 }
                 CHECK(reorder_primitive_desc_create(reorder_pds_[i], engine,
                         src_md(i), src_image_md(i), &r_attr));
diff --git a/src/cpu/ref_convolution_int8.cpp b/src/cpu/ref_convolution_int8.cpp
index b1c99eb8cda..c54a52372ca 100644
--- a/src/cpu/ref_convolution_int8.cpp
+++ b/src/cpu/ref_convolution_int8.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2021-2023 Intel Corporation
+* Copyright 2021-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -35,7 +35,7 @@ namespace {
 void dequantize(float &d, dim_t g, dim_t C, dim_t c, const float *wei_scales,
         bool with_groups, int wei_mask, const float *src_scales) {
     // scale_idx_mult = 1 for per_channel scales and 0, otherwise
-    const int wei_scale_idx_mult = wei_mask != 0;
+    const int wei_scale_idx_mult = wei_mask > 0;
     float scale = 1.0f;
     if (src_scales) scale *= src_scales[0];
     if (wei_scales) scale *= wei_scales[(g * C + c) * wei_scale_idx_mult];
@@ -63,8 +63,7 @@ status_t ref_convolution_int8_fwd_t::execute_forward(
     DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS);
     DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST);
 
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
 
     DEFINE_ZERO_POINTS_BUFFER(src_zero_point, DNNL_ARG_SRC);
     DEFINE_ZERO_POINTS_BUFFER(dst_zero_point, DNNL_ARG_DST);
@@ -290,8 +289,7 @@ status_t ref_convolution_int8_bwd_data_t::execute_backward_data(
     DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS);
     DEFINE_ARG_SCALES_BUFFER(diff_dst_scales, DNNL_ARG_DST);
 
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
 
     const memory_desc_wrapper diff_dst_d(pd()->diff_dst_md());
     const memory_desc_wrapper diff_src_d(pd()->diff_src_md());
diff --git a/src/cpu/ref_deconvolution.cpp b/src/cpu/ref_deconvolution.cpp
index 08c2304e675..5771bb5d36d 100644
--- a/src/cpu/ref_deconvolution.cpp
+++ b/src/cpu/ref_deconvolution.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018-2024 Intel Corporation
+* Copyright 2018-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -174,8 +174,7 @@ status_t ref_deconvolution_fwd_t::compute_oscale(
 
     DEFINE_ARG_SCALES_BUFFER(src_scales, DNNL_ARG_SRC);
     DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS);
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
 
     const memory_desc_wrapper dst_d(pd()->dst_md());
 
@@ -190,7 +189,7 @@ status_t ref_deconvolution_fwd_t::compute_oscale(
     const auto maybe_oscale = [](float &d, dim_t oc, const float *src_scales,
                                       const float *wei_scales, int wei_mask) {
         // scale_idx_mult = 1 for per_oc scales and 0, otherwise
-        const int wei_scale_idx_mult = wei_mask != 0;
+        const int wei_scale_idx_mult = wei_mask > 0;
         d *= src_scales[0] * wei_scales[oc * wei_scale_idx_mult];
     };
 
@@ -216,7 +215,7 @@ status_t ref_deconvolution_fwd_t::compute_ref_attrs(const exec_ctx_t &ctx,
     auto dst = CTX_OUT_MEM(void *, DNNL_ARG_DST);
 
     DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST);
-    const int dst_scale_mask = pd()->attr()->scales_.get(DNNL_ARG_DST).mask_;
+    const int dst_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_DST);
 
     DEFINE_ZERO_POINTS_BUFFER(dst_zero_point, DNNL_ARG_DST);
     const bool is_dst_zp_common
@@ -242,7 +241,7 @@ status_t ref_deconvolution_fwd_t::compute_ref_attrs(const exec_ctx_t &ctx,
     const auto maybe_scale
             = [](float &d, dim_t oc, const float *scales, int mask) {
                   // scale_idx_mult = 1 for per_oc scales and 0, otherwise
-                  const int scale_idx_mult = mask != 0;
+                  const int scale_idx_mult = mask > 0;
                   d *= scales[oc * scale_idx_mult];
               };
 
@@ -536,11 +535,10 @@ status_t ref_deconvolution_fwd_t::execute(const exec_ctx_t &ctx) const {
 
     float *conv_output = scratchpad.get<float>(key_deconv_bias);
 
-    const auto &arg_scales = pd()->attr()->scales_;
-    const auto &src_scales = arg_scales.get(DNNL_ARG_SRC);
-    const auto &wei_scales = arg_scales.get(DNNL_ARG_WEIGHTS);
+    const auto &scales = pd()->attr()->scales_;
 
-    if (!src_scales.has_default_values() || !wei_scales.has_default_values()) {
+    if (!scales.has_default_values(DNNL_ARG_SRC)
+            || !scales.has_default_values(DNNL_ARG_WEIGHTS)) {
         compute_oscale(ctx, conv_output);
     }
 
diff --git a/src/cpu/ref_fused_convolution.hpp b/src/cpu/ref_fused_convolution.hpp
index 1e00b56d576..0fddedd5628 100644
--- a/src/cpu/ref_fused_convolution.hpp
+++ b/src/cpu/ref_fused_convolution.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 * Copyright 2022 Arm Ltd. and affiliates
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@@ -224,10 +224,10 @@ struct ref_fused_convolution_fwd_t : public primitive_t {
             primitive_attr_t attr_1x1(*attr());
             // erase dw_conv post-op scales
             for (auto arg : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) {
-                auto &scale
-                        = attr_1x1.scales_.get(DNNL_ARG_ATTR_POST_OP_DW | arg);
-                if (!scale.has_default_values())
-                    attr_1x1.scales_.reset(DNNL_ARG_ATTR_POST_OP_DW | arg);
+                if (!attr_1x1.scales_.has_default_values(
+                            DNNL_ARG_ATTR_POST_OP_DW | arg))
+                    CHECK(attr_1x1.scales_.set(DNNL_ARG_ATTR_POST_OP_DW | arg,
+                            default_quant_entry()));
             }
             // erase post-ops after fusion as they will be handled separately
             auto &e = attr_1x1.post_ops_.entry_;
@@ -250,7 +250,7 @@ struct ref_fused_convolution_fwd_t : public primitive_t {
             arg_cache.append_ctx_arg(DNNL_ARG_SRC);
             arg_cache.append_ctx_arg(DNNL_ARG_WEIGHTS);
             for (auto arg : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST})
-                if (!attr_1x1.scales_.get(arg).has_default_values())
+                if (!attr_1x1.scales_.has_default_values(arg))
                     arg_cache.append_ctx_arg(DNNL_ARG_ATTR_SCALES | arg);
             if (desc()->bias_desc.data_type != data_type::undef)
                 arg_cache.append_ctx_arg(DNNL_ARG_BIAS);
@@ -316,12 +316,12 @@ struct ref_fused_convolution_fwd_t : public primitive_t {
                 arg_cache.append_ctx_arg(DNNL_ARG_WEIGHTS,
                         DNNL_ARG_ATTR_POST_OP_DW | DNNL_ARG_WEIGHTS);
                 for (auto arg : {DNNL_ARG_WEIGHTS, DNNL_ARG_DST})
-                    if (!attr_dw.scales_.get(arg).has_default_values())
+                    if (!attr_dw.scales_.has_default_values(arg))
                         arg_cache.append_ctx_arg(DNNL_ARG_ATTR_SCALES | arg,
                                 DNNL_ARG_ATTR_POST_OP_DW | DNNL_ARG_ATTR_SCALES
                                         | arg);
                 // dw_conv src_scale = 1x1_conv dst_scale
-                if (!attr_1x1.scales_.get(DNNL_ARG_DST).has_default_values())
+                if (!attr_1x1.scales_.has_default_values(DNNL_ARG_DST))
                     arg_cache.append_ctx_arg(
                             DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC,
                             DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST);
diff --git a/src/cpu/ref_inner_product_int8.cpp b/src/cpu/ref_inner_product_int8.cpp
index 91198c680ab..322f39da638 100644
--- a/src/cpu/ref_inner_product_int8.cpp
+++ b/src/cpu/ref_inner_product_int8.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2021-2023 Intel Corporation
+* Copyright 2021-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -76,13 +76,12 @@ status_t ref_inner_product_int8_fwd_t::execute_forward(
     DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST);
 
     const auto &attr_scales = pd()->attr()->scales_;
-    const bool with_dst_scales
-            = !attr_scales.get(DNNL_ARG_DST).has_default_values();
+    const bool with_dst_scales = !attr_scales.has_default_values(DNNL_ARG_DST);
 
     auto maybe_oscale = [&](float &d, dim_t oc) {
         // scale_idx_mult = 1 for per_oc scales and 0, otherwise
         const int scale_idx_mult
-                = attr_scales.get(DNNL_ARG_WEIGHTS).mask_ == (1 << 0);
+                = attr_scales.get_mask(DNNL_ARG_WEIGHTS) == (1 << 0);
         d *= src_scales[0] * wei_scales[oc * scale_idx_mult];
     };
 
diff --git a/src/cpu/ref_sum.hpp b/src/cpu/ref_sum.hpp
index 7256114d685..e0e2cfb8669 100644
--- a/src/cpu/ref_sum.hpp
+++ b/src/cpu/ref_sum.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2017-2024 Intel Corporation
+* Copyright 2017-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -46,7 +46,7 @@ struct ref_sum_t : public primitive_t {
             reorder_pds_.resize(n_ + need_output_reorder());
             for (int i = 0; i < n_; ++i) {
                 primitive_attr_t r_attr;
-                r_attr.scales_.set(DNNL_ARG_SRC, 0);
+                CHECK(r_attr.scales_.set(DNNL_ARG_SRC, 0));
                 if (i != 0) r_attr.post_ops_.append_sum(1.0);
                 CHECK(reorder_primitive_desc_create(reorder_pds_[i], engine,
                         src_md(i), dst_acc_md(), &r_attr));
diff --git a/src/cpu/reorder/cpu_reorder_pd.hpp b/src/cpu/reorder/cpu_reorder_pd.hpp
index ca69992b0fe..1fbac15ee32 100644
--- a/src/cpu/reorder/cpu_reorder_pd.hpp
+++ b/src/cpu/reorder/cpu_reorder_pd.hpp
@@ -85,15 +85,15 @@ struct cpu_reorder_pd_t : public reorder_pd_t {
             const float *dst_scales) const {
         using namespace dnnl::impl::memory_tracking::names;
 
-        int mask = -1;
-        bool is_set = false;
-        auto status = attr->scales_.get(DNNL_ARG_DST, &mask, &is_set);
-        if (status != status::success) return nullptr;
+        if (attr->scales_.has_default_values(DNNL_ARG_DST)) {
+            return dst_scales;
+        }
 
         // It's possible that mask > 0 but `count` is still `1`. This case is
         //   covered by `DEFINE_ARG_SCALES_BUFFER` macro and no need to inverse
         //   in such case.
-        if (is_set && mask > 0 && count > 1) {
+        int mask = attr->scales_.get_mask(DNNL_ARG_DST);
+        if (mask > 0 && count > 1) {
             auto loc_scales = scratchpad.template get<float>(
                     key_reorder_precomputed_dst_scales);
             if (!loc_scales) return nullptr;
diff --git a/src/cpu/reorder/simple_reorder.hpp b/src/cpu/reorder/simple_reorder.hpp
index 00b8654ae62..73d8dca2c01 100644
--- a/src/cpu/reorder/simple_reorder.hpp
+++ b/src/cpu/reorder/simple_reorder.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2016-2024 Intel Corporation
+* Copyright 2016-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -133,12 +133,12 @@ inline status_t get_scales_mask(
         return status::invalid_arguments;
 
     *src_mask = 0;
-    if (!s.get(DNNL_ARG_SRC).has_default_values())
-        *src_mask = s.get(DNNL_ARG_SRC).mask_;
+    if (!s.has_default_values(DNNL_ARG_SRC))
+        *src_mask = s.get_mask(DNNL_ARG_SRC);
 
     *dst_mask = 0;
-    if (!s.get(DNNL_ARG_DST).has_default_values())
-        *dst_mask = s.get(DNNL_ARG_DST).mask_;
+    if (!s.has_default_values(DNNL_ARG_DST))
+        *dst_mask = s.get_mask(DNNL_ARG_DST);
 
     // This is used in a check function.
     if (*src_mask > 0 && *dst_mask > 0 && *dst_mask != *src_mask)
@@ -152,11 +152,10 @@ inline bool simple_attr_check(const primitive_attr_t *attr,
     if (sum_support) skip_mask = skip_mask | smask_t::post_ops;
     if (!attr->has_default_values(skip_mask)) return false;
     for (int arg : {DNNL_ARG_SRC, DNNL_ARG_DST}) {
-        const auto &sc = attr->scales_.get(arg);
         // Data type for scales is not generally supported.
-        if (!sc.has_default_data_type()) return false;
+        if (!attr->scales_.has_default_data_type(arg)) return false;
         // Groups are generally not supported.
-        if (!sc.has_default_groups()) return false;
+        if (!attr->scales_.get(arg).has_default_groups()) return false;
     }
     if (many_scales_support) return true;
     int src_mask, dst_mask;
@@ -2263,8 +2262,7 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
                 | smask_t::zero_points_runtime_groups;
         VDISPATCH_REORDER_IC(
                 attr->has_default_values(skip_mask), VERBOSE_UNSUPPORTED_ATTR);
-        VDISPATCH_REORDER_IC(
-                attr->scales_.get(DNNL_ARG_DST).has_default_values(),
+        VDISPATCH_REORDER_IC(attr->scales_.has_default_values(DNNL_ARG_DST),
                 VERBOSE_UNSUPPORTED_SCALES_CFG);
         return status::success;
     }
@@ -2293,8 +2291,8 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
         const bool need_transform = input_d.strides()[input_d.ndims() - 1] != 1;
         // * Post-processing, including advanced dequantization parameters as
         //   groups.
-        const auto &sc_src = pd->attr()->scales_.get(DNNL_ARG_SRC);
-        const bool has_src_scales = !sc_src.has_default_values();
+        const auto &scales = pd->attr()->scales_;
+        const bool has_src_scales = !scales.has_default_values(DNNL_ARG_SRC);
         const auto &zps = pd->attr()->zero_points_;
         const bool has_src_zps = !zps.has_default_values(DNNL_ARG_SRC);
 
@@ -2331,11 +2329,9 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
 
         const int ndims = input_d.ndims();
         // Applied to the pre-last dimension.
-        const auto src_scales_group0
-                = sc_src.ndims_ > 0 ? sc_src.group_dims_[0] : 1;
+        const auto src_scales_group0 = scales.get_group(DNNL_ARG_SRC, 0);
         // Applied to the last dimension.
-        const auto src_scales_group1
-                = sc_src.ndims_ > 0 ? sc_src.group_dims_[1] : 1;
+        const auto src_scales_group1 = scales.get_group(DNNL_ARG_SRC, 1);
 
         memory_desc_t src_scales_md {};
         if (has_src_scales) {
@@ -2537,11 +2533,11 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
         VDISPATCH_REORDER_IC(
                 attr->has_default_values(skip_mask), VERBOSE_UNSUPPORTED_ATTR);
         VDISPATCH_REORDER_IC(simple_po_check(attr), VERBOSE_UNSUPPORTED_POSTOP);
-        const auto &sc_dst = attr->scales_.get(DNNL_ARG_DST);
-        const bool has_dst_scales = !sc_dst.has_default_values();
+        const auto &scales = attr->scales_;
+        const bool has_dst_scales = !scales.has_default_values(DNNL_ARG_DST);
         if (has_dst_scales) {
-            VDISPATCH_REORDER_IC(sc_dst.has_default_data_type()
-                            && sc_dst.has_default_groups(),
+            VDISPATCH_REORDER_IC(scales.has_default_data_type(DNNL_ARG_DST)
+                            && scales.has_default_groups(DNNL_ARG_DST),
                     VERBOSE_UNSUPPORTED_SCALES_CFG);
         }
         VDISPATCH_REORDER_IC(
@@ -2565,14 +2561,12 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
         ctx.zero_pad_output(DNNL_ARG_TO);
 
         const int ndims = input_d.ndims();
-        const auto &sc_src = pd->attr()->scales_.get(DNNL_ARG_SRC);
-        const bool has_src_scales = !sc_src.has_default_values();
+        const auto &scales = pd->attr()->scales_;
+        const bool has_src_scales = !scales.has_default_values(DNNL_ARG_SRC);
         // Applied to the pre-last dimension.
-        const auto src_scales_group0
-                = sc_src.ndims_ > 0 ? sc_src.group_dims_[0] : 1;
+        const auto src_scales_group0 = scales.get_group(DNNL_ARG_SRC, 0);
         // Applied to the last dimension.
-        const auto src_scales_group1
-                = sc_src.ndims_ > 0 ? sc_src.group_dims_[1] : 1;
+        const auto src_scales_group1 = scales.get_group(DNNL_ARG_SRC, 1);
         memory_desc_t src_scales_md {};
         if (has_src_scales) {
             get_quant_md(src_scales_md, ndims, input_d.dims(), src_scales_mask,
@@ -2580,8 +2574,7 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
                     src_scales_d.data_type());
         }
 
-        const auto &sc_dst = pd->attr()->scales_.get(DNNL_ARG_DST);
-        const bool has_dst_scales = !sc_dst.has_default_values();
+        const bool has_dst_scales = !scales.has_default_values(DNNL_ARG_DST);
         memory_desc_t dst_scales_md {};
         if (has_dst_scales) {
             get_quant_md(dst_scales_md, ndims, input_d.dims(), dst_scales_mask,
@@ -2690,12 +2683,14 @@ struct simple_reorder_t : public primitive_t {
                     spec>::is_applicable(src_md, dst_md, attr);
             if (status != status::success) return status;
 
-            int mask = -1;
-            bool is_set = false;
-            CHECK(attr->scales_.get(DNNL_ARG_DST, &mask, &is_set));
             const memory_desc_wrapper input_d(src_md);
-            if (input_d.has_runtime_dims_or_strides() && is_set && mask > 0)
-                return status::unimplemented;
+
+            int mask = -1;
+            if (!attr->scales_.has_default_values(DNNL_ARG_DST)) {
+                mask = attr->scales_.get_mask(DNNL_ARG_DST);
+                if (input_d.has_runtime_dims_or_strides() && mask > 0)
+                    return status::unimplemented;
+            }
 
             auto _pd = make_unique_pd<pd_t>(attr, src_engine->kind(), src_md,
                     dst_engine->kind(), dst_md);
@@ -2709,7 +2704,7 @@ struct simple_reorder_t : public primitive_t {
             scratchpad.book(memory_tracking::names::key_reorder_space,
                     scratchpad_sz_, 1, 16);
 
-            if (is_set && mask > 0) {
+            if (mask > 0) {
                 dim_t D_mask;
                 _pd->get_D_values(input_d, mask, nullptr, &D_mask, nullptr);
                 scratchpad.template book<float>(
diff --git a/src/cpu/scale_utils.cpp b/src/cpu/scale_utils.cpp
index c6d92a33e2f..8db6d5fb125 100644
--- a/src/cpu/scale_utils.cpp
+++ b/src/cpu/scale_utils.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2022-2024 Intel Corporation
+* Copyright 2022-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -32,21 +32,18 @@ constexpr size_t scales_simd_w = 16;
 }
 
 void book_precomputed_scales(memory_tracking::registrar_t &scratchpad,
-        const arg_scales_t &attr_scales, size_t wei_scale_count,
+        const scales_t &attr_scales, size_t wei_scale_count,
         bool force_scales_book) {
     using namespace dnnl::impl::memory_tracking::names;
 
-    const bool with_src_scales
-            = !attr_scales.get(DNNL_ARG_SRC).has_default_values();
+    const bool with_src_scales = !attr_scales.has_default_values(DNNL_ARG_SRC);
     const bool with_wei_scales
-            = !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_values();
-    const auto wei_scales_dt = attr_scales.get(DNNL_ARG_WEIGHTS).data_type_;
-    const auto wei_scale_groups_ndims
-            = attr_scales.get(DNNL_ARG_WEIGHTS).ndims_;
+            = !attr_scales.has_default_values(DNNL_ARG_WEIGHTS);
+
     if ((with_src_scales && with_wei_scales) || force_scales_book
-            || (wei_scales_dt != data_type::f32 && with_wei_scales)
-            || (wei_scale_groups_ndims > 0 && with_wei_scales)) {
-        const int wei_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_;
+            || !attr_scales.has_default_data_type(DNNL_ARG_WEIGHTS)
+            || !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_groups()) {
+        const int wei_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS);
         const size_t precomputed_scales_size = wei_mask == 0
                 ? scales_simd_w
                 : nstl::max(
@@ -60,27 +57,26 @@ void book_precomputed_scales(memory_tracking::registrar_t &scratchpad,
 bool req_copy_scales(
         const primitive_attr_t *attr, const float scale_adjust_factor) {
     const auto &attr_scales = attr->scales_;
-    const bool with_src_scales
-            = !attr_scales.get(DNNL_ARG_SRC).has_default_values();
+    const bool with_src_scales = !attr_scales.has_default_values(DNNL_ARG_SRC);
     const bool with_wei_scales
-            = !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_values();
-    const auto wei_scales_dt = attr_scales.get(DNNL_ARG_WEIGHTS).data_type_;
-    const auto wei_scale_groups_ndims
-            = attr_scales.get(DNNL_ARG_WEIGHTS).ndims_;
+            = !attr_scales.has_default_values(DNNL_ARG_WEIGHTS);
     return (with_src_scales && with_wei_scales) || scale_adjust_factor != 1.0f
-            || (wei_scales_dt != data_type::f32 && with_wei_scales)
-            || (wei_scale_groups_ndims > 0 && with_wei_scales);
+            || !attr_scales.has_default_data_type(DNNL_ARG_WEIGHTS)
+            || !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_groups();
 }
 
 const float *precompute_scales(const memory_tracking::grantor_t &scratchpad,
         const float *src_scales, const float *wei_scales, dim_t oc,
         const primitive_attr_t *attr, float scale_adjust_factor) {
-    // Note: per-ic-channel is no supported in default
-    const int wei_scale_mask = attr->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    // Note: per-ic-channel is no supported by default.
+    const int wei_scale_mask = attr->scales_.get_mask(DNNL_ARG_WEIGHTS);
     return precompute_scales(scratchpad, src_scales, wei_scales, 1, oc, false,
-            wei_scale_mask != 0, attr, scale_adjust_factor, false);
+            wei_scale_mask > 0, attr, scale_adjust_factor, false);
 }
 
+// Note: `wei_scale_per_ic` and `wei_scale_per_oc` could be identified in this
+// function unless different primitives have same definition of `per_ic` and
+// `per_oc` masks. Mostly, matmul is different from anybody else.
 const float *precompute_scales(const memory_tracking::grantor_t &scratchpad,
         const float *src_scales, const float *wei_scales, dim_t IC, dim_t OC,
         const bool wei_scale_per_ic, const bool wei_scale_per_oc,
@@ -89,14 +85,15 @@ const float *precompute_scales(const memory_tracking::grantor_t &scratchpad,
     using namespace dnnl::impl::memory_tracking::names;
 
     const auto &attr_scales = attr->scales_;
-    const bool with_src_scales
-            = !attr_scales.get(DNNL_ARG_SRC).has_default_values();
+    const bool with_src_scales = !attr_scales.has_default_values(DNNL_ARG_SRC);
     const auto wei_scale_count
             = (wei_scale_per_ic ? IC : 1) * (wei_scale_per_oc ? OC : 1);
 
     const float *scales = nullptr;
     if (req_copy_scales(attr, scale_adjust_factor)) {
-        const int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_;
+        const int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS);
+        assert(wei_scale_mask >= 0);
+
         size_t size = 0;
         auto loc_scales
                 = scratchpad.template get<float>(key_precomputed_scales, &size);
@@ -108,12 +105,9 @@ const float *precompute_scales(const memory_tracking::grantor_t &scratchpad,
             const dim_t count = nstl::min(
                     static_cast<dim_t>(size / sizeof(float)), wei_scale_count);
             const auto wei_scale_dt
-                    = attr_scales.get(DNNL_ARG_WEIGHTS).data_type_;
-            const auto wei_scale_groups_ndims
-                    = attr_scales.get(DNNL_ARG_WEIGHTS).ndims_;
-            const auto wei_scale_groups_ic = wei_scale_groups_ndims > 0
-                    ? attr_scales.get(DNNL_ARG_WEIGHTS).group_dims_[0]
-                    : 1;
+                    = attr_scales.get_data_type(DNNL_ARG_WEIGHTS);
+            const auto wei_scale_groups_ic
+                    = attr_scales.get_group(DNNL_ARG_WEIGHTS, 0);
             // Note: per-ic-channel scales is only supported for
             // weights decompression for now
             if ((wei_scale_per_ic && wei_scale_groups_ic > 1)
diff --git a/src/cpu/scale_utils.hpp b/src/cpu/scale_utils.hpp
index 7c1ce535889..48164b776d4 100644
--- a/src/cpu/scale_utils.hpp
+++ b/src/cpu/scale_utils.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2022-2024 Intel Corporation
+* Copyright 2022-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -26,7 +26,7 @@ namespace impl {
 namespace cpu {
 
 void book_precomputed_scales(memory_tracking::registrar_t &scratchpad,
-        const arg_scales_t &attr_scales, size_t wei_scales_count,
+        const scales_t &attr_scales, size_t wei_scales_count,
         bool force_scales_book = false);
 
 bool req_copy_scales(
diff --git a/src/cpu/x64/brgemm/brgemm.cpp b/src/cpu/x64/brgemm/brgemm.cpp
index 38e2975fd02..11b3bc8a78e 100644
--- a/src/cpu/x64/brgemm/brgemm.cpp
+++ b/src/cpu/x64/brgemm/brgemm.cpp
@@ -435,23 +435,26 @@ status_t brgemm_desc_set_postops(brgemm_desc_t *brg,
     if (brg->with_scales) {
         // Note. the current version supports only two different output scale
         // types:
-        //     1) common (mask_ = 0)
+        //     1) common (mask = 0)
         //     2) per_n_dim_scale - broadcast across n dimension;
         //        for convolution and inner product promitives it corresponds
-        //        to "per_oc" mask_ = 1 << 1; for matmul - to
-        //        mask_ = (1 << (ndims - 1))), where ndims is number of
+        //        to "per_oc" mask = 1 << 1; for matmul - to
+        //        mask = (1 << (ndims - 1))), where ndims is number of
         //        dimensions for original matmul problem
-        // So if wei_scales.mask_ != 0 (not common) it's assumed here that scale
-        // type is per_n_dim_scale and driver which calls brgemm kernel checked
-        // that mask has correct value for this case
-        brg->is_oc_scale = wei_scales.mask_ != 0;
+        // So if wei_scales.get_mask() > 0 (not common) it's assumed here that
+        // scale type is per_n_dim_scale and driver which calls brgemm kernel
+        // checked that mask has correct value for this case
+        brg->is_oc_scale = wei_scales.get_mask() > 0;
     }
 
     const auto &dst_scales = attr->scales_.get(DNNL_ARG_DST);
     brg->with_dst_scales = !dst_scales.has_default_values();
-    const bool scales_ok = src_scales.mask_ == 0 && dst_scales.mask_ == 0
-            && attr->scales_.has_default_values(
-                    {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST});
+    const bool scales_ok = attr->scales_.has_default_values({DNNL_ARG_SRC,
+                                   DNNL_ARG_WEIGHTS, DNNL_ARG_DST})
+            && IMPLICATION(!src_scales.has_default_values(),
+                    src_scales.get_mask() == 0)
+            && IMPLICATION(!dst_scales.has_default_values(),
+                    dst_scales.get_mask() == 0);
     if (!scales_ok) return status::unimplemented;
 
     auto init_zp_type
diff --git a/src/cpu/x64/brgemm/capi/brgemm_api.cpp b/src/cpu/x64/brgemm/capi/brgemm_api.cpp
index 3dae3e3409c..b70822995b7 100644
--- a/src/cpu/x64/brgemm/capi/brgemm_api.cpp
+++ b/src/cpu/x64/brgemm/capi/brgemm_api.cpp
@@ -259,10 +259,9 @@ status_t brgemm_t::execute(const void *A_ptr, const void *B_ptr,
     // TODO: delegate extra memory to scratchpad?
     std::vector<float> wei_scales_v(N_);
 
-    const bool has_src_scales
-            = !attr_.scales_.get(DNNL_ARG_SRC).has_default_values();
+    const bool has_src_scales = !attr_.scales_.has_default_values(DNNL_ARG_SRC);
     const bool has_wei_scales
-            = !attr_.scales_.get(DNNL_ARG_WEIGHTS).has_default_values();
+            = !attr_.scales_.has_default_values(DNNL_ARG_WEIGHTS);
 
     // Save src scale value to re-use it.
     float src_scale_val = 1.f;
@@ -279,7 +278,7 @@ status_t brgemm_t::execute(const void *A_ptr, const void *B_ptr,
         const void *wei_scales_ptr = attr_params->get_scales(DNNL_ARG_WEIGHTS);
         if (wei_scales_ptr == nullptr) return status::invalid_arguments;
 
-        int wei_mask = attr_.scales_.get(DNNL_ARG_WEIGHTS).mask_;
+        int wei_mask = attr_.scales_.get_mask(DNNL_ARG_WEIGHTS);
         if (wei_mask > 0) {
             for (dim_t i = 0; i < N_; i++) {
                 const float wei_scale_val = cpu::io::load_float_value(
@@ -300,7 +299,7 @@ status_t brgemm_t::execute(const void *A_ptr, const void *B_ptr,
 
     // Destination scales. Require manual extending to full simd broadcast.
     alignas(64) float dst_scales_buf[16] = {0};
-    if (!attr_.scales_.get(DNNL_ARG_DST).has_default_values()) {
+    if (!attr_.scales_.has_default_values(DNNL_ARG_DST)) {
         const void *dst_scales_ptr = attr_params->get_scales(DNNL_ARG_DST);
         if (dst_scales_ptr == nullptr) return status::invalid_arguments;
 
diff --git a/src/cpu/x64/jit_avx512_core_amx_1x1_conv_kernel.cpp b/src/cpu/x64/jit_avx512_core_amx_1x1_conv_kernel.cpp
index e57e4b9ed76..18643219cd4 100644
--- a/src/cpu/x64/jit_avx512_core_amx_1x1_conv_kernel.cpp
+++ b/src/cpu/x64/jit_avx512_core_amx_1x1_conv_kernel.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -1217,9 +1217,8 @@ status_t jit_avx512_core_amx_1x1_fwd_kernel_t::init_conf(jit_conv_conf_t &jcp,
             = (avaliable_ops) ? ops_tile_store / avaliable_ops + 1 : 0;
     if (jcp.per_one_pstore > 12) jcp.per_one_pstore = 0;
 
-    const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS);
     const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST);
-    jcp.is_oc_scale = wei_scales.mask_ != 0;
+    jcp.is_oc_scale = attr.scales_.get_mask(DNNL_ARG_WEIGHTS) > 0;
     jcp.dst_scale = !dst_scales.has_default_values();
 
     return status::success;
diff --git a/src/cpu/x64/jit_avx512_core_amx_1x1_convolution.cpp b/src/cpu/x64/jit_avx512_core_amx_1x1_convolution.cpp
index 5a61a9fa38f..33c84aeda26 100644
--- a/src/cpu/x64/jit_avx512_core_amx_1x1_convolution.cpp
+++ b/src/cpu/x64/jit_avx512_core_amx_1x1_convolution.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -73,11 +73,10 @@ status_t jit_avx512_core_amx_1x1_convolution_fwd_t::execute_forward(
     DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS);
     DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST);
 
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     const float *oscales = scale_utils::precompute_scales(
             ctx.get_scratchpad_grantor(), src_scales, wei_scales, pd()->IC(),
-            pd()->OC(), false, wei_scale_mask != 0, pd()->attr(),
+            pd()->OC(), false, wei_scale_mask > 0, pd()->attr(),
             jit_scale_precompute_.get());
 
     DEFINE_ZERO_POINTS_BUFFER(src_zero_point, DNNL_ARG_SRC);
diff --git a/src/cpu/x64/jit_avx512_core_amx_1x1_convolution.hpp b/src/cpu/x64/jit_avx512_core_amx_1x1_convolution.hpp
index eae42216786..45785a4d12a 100644
--- a/src/cpu/x64/jit_avx512_core_amx_1x1_convolution.hpp
+++ b/src/cpu/x64/jit_avx512_core_amx_1x1_convolution.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -118,8 +118,8 @@ struct jit_avx512_core_amx_1x1_convolution_fwd_t : public primitive_t {
         const auto attr = pd()->attr();
         if (is_jit_supported && pd()->OC() > 1 && req_copy_scales(attr)) {
             const auto &attr_scales = attr->scales_;
-            int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_;
-            if (wei_scale_mask != 0) {
+            int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS);
+            if (wei_scale_mask > 0) {
                 CHECK(safe_ptr_assign(jit_scale_precompute_,
                         new jit_avx512_core_scale_precompute_t(attr)));
                 CHECK(jit_scale_precompute_->create_kernel());
diff --git a/src/cpu/x64/jit_avx512_core_amx_conv_kernel.cpp b/src/cpu/x64/jit_avx512_core_amx_conv_kernel.cpp
index 1e1e532de69..1af812d0e5d 100644
--- a/src/cpu/x64/jit_avx512_core_amx_conv_kernel.cpp
+++ b/src/cpu/x64/jit_avx512_core_amx_conv_kernel.cpp
@@ -2655,7 +2655,7 @@ status_t jit_avx512_core_amx_fwd_kernel_t::init_conf(jit_conv_conf_t &jcp,
 
     const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS);
     const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST);
-    jcp.is_oc_scale = wei_scales.mask_ != 0;
+    jcp.is_oc_scale = wei_scales.get_mask() > 0;
     jcp.dst_scale = !dst_scales.has_default_values();
 
     // Note: currently unsupported, results in seg-fault
@@ -3969,7 +3969,7 @@ status_t jit_avx512_core_amx_bwd_data_kernel_t::init_conf(jit_conv_conf_t &jcp,
 
     const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS);
     const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST);
-    jcp.is_ic_scale = wei_scales.mask_ != 0;
+    jcp.is_ic_scale = wei_scales.get_mask() > 0;
     jcp.dst_scale = !dst_scales.has_default_values();
 
     return status::success;
diff --git a/src/cpu/x64/jit_avx512_core_amx_convolution.cpp b/src/cpu/x64/jit_avx512_core_amx_convolution.cpp
index 2377ca8bf47..55ad96a0245 100644
--- a/src/cpu/x64/jit_avx512_core_amx_convolution.cpp
+++ b/src/cpu/x64/jit_avx512_core_amx_convolution.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -99,11 +99,10 @@ jit_avx512_core_amx_convolution_fwd_t::execute_forward_reduced_lowering(
     DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS);
     DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST);
 
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     const float *oscales = scale_utils::precompute_scales(
             ctx.get_scratchpad_grantor(), src_scales, wei_scales, pd()->IC(),
-            pd()->OC(), false, wei_scale_mask != 0, pd()->attr(),
+            pd()->OC(), false, wei_scale_mask > 0, pd()->attr(),
             jit_scale_precompute_.get());
 
     auto inp_p_buffer = ctx.get_scratchpad_grantor().template get<char>(
@@ -457,11 +456,10 @@ status_t jit_avx512_core_amx_convolution_fwd_t::execute_forward(
     DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS);
     DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST);
 
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     const float *oscales = scale_utils::precompute_scales(
             ctx.get_scratchpad_grantor(), src_scales, wei_scales, pd()->IC(),
-            pd()->OC(), false, wei_scale_mask != 0, pd()->attr(),
+            pd()->OC(), false, wei_scale_mask > 0, pd()->attr(),
             jit_scale_precompute_.get());
 
     // TODO: use block offset instead of hand-calculated one
@@ -831,11 +829,10 @@ status_t jit_avx512_core_amx_convolution_bwd_data_t::execute_backward(
     DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS);
     DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST);
 
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     const float *oscales = scale_utils::precompute_scales(
             ctx.get_scratchpad_grantor(), src_scales, wei_scales, pd()->IC(),
-            pd()->OC(), false, wei_scale_mask != 0, pd()->attr(),
+            pd()->OC(), false, wei_scale_mask > 0, pd()->attr(),
             jit_scale_precompute_.get());
 
     amx_utils::execute_backward_convolution_body(ctx, pd()->jcp_, kernel_,
diff --git a/src/cpu/x64/jit_avx512_core_amx_convolution.hpp b/src/cpu/x64/jit_avx512_core_amx_convolution.hpp
index ccd9a4d0a73..c48820e7d52 100644
--- a/src/cpu/x64/jit_avx512_core_amx_convolution.hpp
+++ b/src/cpu/x64/jit_avx512_core_amx_convolution.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -120,8 +120,8 @@ struct jit_avx512_core_amx_convolution_fwd_t : public primitive_t {
         const auto attr = pd()->attr();
         if (is_jit_supported && pd()->OC() > 1 && req_copy_scales(attr)) {
             const auto &attr_scales = attr->scales_;
-            int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_;
-            if (wei_scale_mask != 0) {
+            int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS);
+            if (wei_scale_mask > 0) {
                 CHECK(safe_ptr_assign(jit_scale_precompute_,
                         new jit_avx512_core_scale_precompute_t(attr)));
                 CHECK(jit_scale_precompute_->create_kernel());
@@ -203,8 +203,8 @@ struct jit_avx512_core_amx_convolution_bwd_data_t : public primitive_t {
         const auto attr = pd()->attr();
         if (is_jit_supported && pd()->OC() > 1 && req_copy_scales(attr)) {
             const auto &attr_scales = attr->scales_;
-            int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_;
-            if (wei_scale_mask != 0) {
+            int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS);
+            if (wei_scale_mask > 0) {
                 CHECK(safe_ptr_assign(jit_scale_precompute_,
                         new jit_avx512_core_scale_precompute_t(attr)));
                 CHECK(jit_scale_precompute_->create_kernel());
diff --git a/src/cpu/x64/jit_avx512_core_amx_deconvolution.cpp b/src/cpu/x64/jit_avx512_core_amx_deconvolution.cpp
index de930820f80..cabd338661d 100644
--- a/src/cpu/x64/jit_avx512_core_amx_deconvolution.cpp
+++ b/src/cpu/x64/jit_avx512_core_amx_deconvolution.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2021-2024 Intel Corporation
+* Copyright 2021-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -78,11 +78,10 @@ status_t jit_avx512_core_amx_deconvolution_fwd_t::execute_forward(
     DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS);
     DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST);
 
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     const float *oscales = precompute_scales(ctx.get_scratchpad_grantor(),
             src_scales, wei_scales, src_d.dims()[1], dst_d.dims()[1], false,
-            wei_scale_mask != 0, pd()->attr());
+            wei_scale_mask > 0, pd()->attr());
 
     // The body of bwd/d convolution harness is called with:
     //   1. src as input instead of diff_dst
diff --git a/src/cpu/x64/jit_avx512_core_scale_precompute.cpp b/src/cpu/x64/jit_avx512_core_scale_precompute.cpp
index 1f98294a715..9abae9b9e1f 100644
--- a/src/cpu/x64/jit_avx512_core_scale_precompute.cpp
+++ b/src/cpu/x64/jit_avx512_core_scale_precompute.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -44,7 +44,7 @@ const float *precompute_scales(const memory_tracking::grantor_t &scratchpad,
     ;
     if (jit_scale_precompute) {
         const auto &attr_scales = attr->scales_;
-        const int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_;
+        const int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS);
         size_t size = 0;
         auto loc_scales = scratchpad.template get<float>(
                 memory_tracking::names::key_precomputed_scales, &size);
@@ -53,10 +53,10 @@ const float *precompute_scales(const memory_tracking::grantor_t &scratchpad,
         const auto wei_scale_stride_ic
                 = wei_scale_per_ic ? wei_scale_per_oc ? OC : 1 : 0;
         const auto with_wei_scale
-                = !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_values();
-        const auto wei_scale_groups_ndims
-                = with_wei_scale ? attr_scales.get(DNNL_ARG_WEIGHTS).ndims_ : 0;
-        const auto wei_scale_group_stride = wei_scale_groups_ndims > 0
+                = !attr_scales.has_default_values(DNNL_ARG_WEIGHTS);
+        const auto wei_scale_has_groups = with_wei_scale
+                && !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_groups();
+        const auto wei_scale_group_stride = wei_scale_has_groups
                 ? wei_scale_stride_ic * sizeof(float)
                 : 0;
 
@@ -66,14 +66,14 @@ const float *precompute_scales(const memory_tracking::grantor_t &scratchpad,
 
         assert(req_copy_scales(attr, scale_adjust_factor));
         assert(mayiuse(avx512_core));
-        assert(wei_scale_mask != 0);
-        if (wei_scale_groups_ndims > 0) {
+        assert(wei_scale_mask > 0);
+        if (wei_scale_has_groups) {
             assert(count == wei_scale_count);
             const auto wei_scale_groups_ic
-                    = attr_scales.get(DNNL_ARG_WEIGHTS).group_dims_[0];
+                    = attr_scales.get_group(DNNL_ARG_WEIGHTS, 0);
             const dim_t wei_scale_nb_ic = IC / wei_scale_groups_ic;
             const auto wei_scale_dt_sz = types::data_type_size(
-                    attr_scales.get(DNNL_ARG_WEIGHTS).data_type_);
+                    attr_scales.get_data_type(DNNL_ARG_WEIGHTS));
             for (int nb_ic = 0; nb_ic < wei_scale_nb_ic; nb_ic++) {
                 const auto offset = nb_ic * wei_scale_stride_ic;
                 jrp.nelems_ = wei_scale_stride_ic;
diff --git a/src/cpu/x64/jit_avx512_core_scale_precompute.hpp b/src/cpu/x64/jit_avx512_core_scale_precompute.hpp
index 432a9aee05c..a93fceeca65 100644
--- a/src/cpu/x64/jit_avx512_core_scale_precompute.hpp
+++ b/src/cpu/x64/jit_avx512_core_scale_precompute.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -70,17 +70,12 @@ struct jit_avx512_core_scale_precompute_t : public jit_generator {
             const primitive_attr_t *attr, const float scale_adjust_factor = 1)
         : jit_generator(jit_name())
         , attr_(attr)
-        , with_wei_scales_(
-                  !attr_->scales_.get(DNNL_ARG_WEIGHTS).has_default_values())
+        , with_wei_scales_(!attr_->scales_.has_default_values(DNNL_ARG_WEIGHTS))
         , wei_scales_dt_(with_wei_scales_
-                          ? attr_->scales_.get(DNNL_ARG_WEIGHTS).data_type_
+                          ? attr_->scales_.get_data_type(DNNL_ARG_WEIGHTS)
                           : data_type::f32)
         , wei_scales_dsz_(types::data_type_size(wei_scales_dt_))
-        , wei_groups_ic_(with_wei_scales_
-                                  && attr_->scales_.get(DNNL_ARG_WEIGHTS).ndims_
-                                          > 0
-                          ? attr_->scales_.get(DNNL_ARG_WEIGHTS).group_dims_[0]
-                          : 1)
+        , wei_groups_ic_(attr_->scales_.get_group(DNNL_ARG_WEIGHTS, 0))
         , scale_adjust_factor_(scale_adjust_factor)
         , compute_scale_factor_(scale_adjust_factor_ != 1) {}
 
diff --git a/src/cpu/x64/jit_avx512_core_x8s8s32x_1x1_conv_kernel.cpp b/src/cpu/x64/jit_avx512_core_x8s8s32x_1x1_conv_kernel.cpp
index b8e581de704..a5387dbefad 100644
--- a/src/cpu/x64/jit_avx512_core_x8s8s32x_1x1_conv_kernel.cpp
+++ b/src/cpu/x64/jit_avx512_core_x8s8s32x_1x1_conv_kernel.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018-2024 Intel Corporation
+* Copyright 2018-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -1206,7 +1206,7 @@ status_t jit_avx512_core_x8s8s32x_1x1_conv_kernel::init_conf(
 
     const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS);
     const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST);
-    jcp.is_oc_scale = wei_scales.mask_ != 0;
+    jcp.is_oc_scale = wei_scales.get_mask() > 0;
     jcp.dst_scale = !dst_scales.has_default_values();
 
     jcp.wei_adj_scale
@@ -1222,7 +1222,7 @@ void jit_avx512_core_x8s8s32x_1x1_conv_kernel::init_scratchpad(
         const jit_1x1_conv_conf_t &jcp, const primitive_attr_t &attr) {
     using namespace dnnl::impl::memory_tracking::names;
 
-    const int wei_mask = attr.scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_mask = attr.scales_.get_mask(DNNL_ARG_WEIGHTS);
     const dim_t scales_count
             = wei_mask == 0 ? 1 : static_cast<dim_t>(jcp.oc) * jcp.ngroups;
     const dim_t count = nstl::max<dim_t>(scales_count, (dim_t)jcp.ic_block);
diff --git a/src/cpu/x64/jit_avx512_core_x8s8s32x_1x1_convolution.cpp b/src/cpu/x64/jit_avx512_core_x8s8s32x_1x1_convolution.cpp
index 538a89cf5ee..c1b0c07dfe3 100644
--- a/src/cpu/x64/jit_avx512_core_x8s8s32x_1x1_convolution.cpp
+++ b/src/cpu/x64/jit_avx512_core_x8s8s32x_1x1_convolution.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018-2023 Intel Corporation
+* Copyright 2018-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -70,7 +70,7 @@ status_t jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t::execute_forward(
             = scratchpad.template get<float>(key_conv_adjusted_scales);
     // Src scale is always a single value
     float src_scale = src_scales[0];
-    int wei_mask = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    int wei_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     float factor = (pd()->jcp_.signed_input && (!pd()->jcp_.has_vnni))
             ? 1.f / pd()->jcp_.wei_adj_scale
             : 1.f;
@@ -92,7 +92,7 @@ status_t jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t::execute_forward(
         auto dw_local_scales
                 = dw_scratchpad.template get<float>(key_conv_adjusted_scales);
         auto attr_dw = pd()->dw_conv_pd_->attr();
-        int wei_mask = attr_dw->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+        int wei_mask = attr_dw->scales_.get_mask(DNNL_ARG_WEIGHTS);
         dim_t count = wei_mask == 0 ? 1 : pd()->dw_conv_pd_->OC();
         float factor = 1.f / jcp_dw->wei_adj_scale;
         if (count == 1) {
diff --git a/src/cpu/x64/jit_avx512_core_x8s8s32x_conv_kernel.cpp b/src/cpu/x64/jit_avx512_core_x8s8s32x_conv_kernel.cpp
index a808e692752..f933cbf2071 100644
--- a/src/cpu/x64/jit_avx512_core_x8s8s32x_conv_kernel.cpp
+++ b/src/cpu/x64/jit_avx512_core_x8s8s32x_conv_kernel.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2016-2024 Intel Corporation
+* Copyright 2016-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -1486,7 +1486,7 @@ status_t jit_avx512_core_x8s8s32x_fwd_kernel::init_conf(jit_conv_conf_t &jcp,
 
     const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS);
     const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST);
-    jcp.is_oc_scale = wei_scales.mask_ != 0;
+    jcp.is_oc_scale = wei_scales.get_mask() > 0;
     jcp.dst_scale = !dst_scales.has_default_values();
 
     jcp.has_vnni = mayiuse(avx512_core_vnni);
@@ -1765,7 +1765,7 @@ status_t jit_avx512_core_x8s8s32x_fwd_kernel::init_conf(jit_conv_conf_t &jcp,
 void jit_avx512_core_x8s8s32x_fwd_kernel::init_scratchpad(
         memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp,
         const primitive_attr_t &attr) {
-    const int wei_mask = attr.scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_mask = attr.scales_.get_mask(DNNL_ARG_WEIGHTS);
     const dim_t scales_count = wei_mask == 0 ? 1 : jcp.oc * jcp.ngroups;
     dim_t count = wei_mask == 0 ? (dim_t)16 : scales_count;
     scratchpad.book<float>(key_conv_adjusted_scales, count);
diff --git a/src/cpu/x64/jit_avx512_core_x8s8s32x_convolution.cpp b/src/cpu/x64/jit_avx512_core_x8s8s32x_convolution.cpp
index 1242fd8e3f8..50d6915d2b1 100644
--- a/src/cpu/x64/jit_avx512_core_x8s8s32x_convolution.cpp
+++ b/src/cpu/x64/jit_avx512_core_x8s8s32x_convolution.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2016-2023 Intel Corporation
+* Copyright 2016-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -43,7 +43,7 @@ const float *jit_avx512_core_x8s8s32x_convolution_fwd_t::adjust_oscales(
         const float *wei_scales) const {
     auto loc_scales = scratchpad.template get<float>(key_conv_adjusted_scales);
     const float src_scale = src_scales[0];
-    const int wei_mask = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     float factor = (pd()->jcp_.signed_input && (!pd()->jcp_.has_vnni))
             ? 1.f / pd()->jcp_.wei_adj_scale
             : 1.f;
diff --git a/src/cpu/x64/jit_avx512_core_x8s8s32x_deconvolution.cpp b/src/cpu/x64/jit_avx512_core_x8s8s32x_deconvolution.cpp
index c02bc84bdfe..b082a3f68e2 100644
--- a/src/cpu/x64/jit_avx512_core_x8s8s32x_deconvolution.cpp
+++ b/src/cpu/x64/jit_avx512_core_x8s8s32x_deconvolution.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018-2024 Intel Corporation
+* Copyright 2018-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -294,7 +294,7 @@ status_t _jit_avx512_core_x8s8s32x_deconv_fwd_kernel::init_conf(
 
     const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS);
     const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST);
-    jcp.is_oc_scale = wei_scales.mask_ != 0;
+    jcp.is_oc_scale = wei_scales.get_mask() > 0;
     jcp.dst_scale = !dst_scales.has_default_values();
 
     jcp.dst_dt = dst_d.data_type();
@@ -386,7 +386,7 @@ bool _jit_avx512_core_x8s8s32x_deconv_fwd_kernel::post_ops_ok(
 void _jit_avx512_core_x8s8s32x_deconv_fwd_kernel::init_scratchpad(
         memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp,
         const primitive_attr_t &attr) {
-    const int mask = attr.scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int mask = attr.scales_.get_mask(DNNL_ARG_WEIGHTS);
     const dim_t scales_count
             = mask == 0 ? 1 : static_cast<dim_t>(jcp.oc) * jcp.ngroups;
     const dim_t count = nstl::max<dim_t>(scales_count, 16);
@@ -1393,7 +1393,7 @@ const float *jit_avx512_core_x8s8s32x_deconvolution_fwd_t::adjust_oscales(
         const memory_tracking::grantor_t &scratchpad, const float *src_scales,
         const float *wei_scales) const {
     auto loc_scales = scratchpad.template get<float>(key_conv_adjusted_scales);
-    int wei_mask = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    int wei_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     float factor = (pd()->jcp_.signed_input && (!pd()->jcp_.has_vnni))
             ? 1.f / pd()->jcp_.wei_adj_scale
             : 1.0f;
diff --git a/src/cpu/x64/jit_brdgmm_dw_conv.cpp b/src/cpu/x64/jit_brdgmm_dw_conv.cpp
index 481fcf930cd..ab12b82bccf 100644
--- a/src/cpu/x64/jit_brdgmm_dw_conv.cpp
+++ b/src/cpu/x64/jit_brdgmm_dw_conv.cpp
@@ -240,7 +240,7 @@ status_t brdgmm_dw_convolution_fwd_t::pd_t::init(engine_t *engine) {
     const auto &wei_scales = attr_.scales_.get(DNNL_ARG_WEIGHTS);
     jcp.with_scale = !src_scales.has_default_values()
             || !wei_scales.has_default_values();
-    jcp.is_oc_scale = wei_scales.mask_ != 0;
+    jcp.is_oc_scale = wei_scales.get_mask() > 0;
 
     const bool scales_ok
             = attr_scales_ok({DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST});
@@ -556,8 +556,8 @@ status_t brdgmm_dw_convolution_fwd_t::init(engine_t *engine) {
     const auto attr = pd()->attr();
     if (is_jit_supported && pd()->OC() > 1 && req_copy_scales(attr)) {
         const auto &attr_scales = attr->scales_;
-        int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_;
-        if (wei_scale_mask != 0) {
+        int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS);
+        if (wei_scale_mask > 0) {
             CHECK(safe_ptr_assign(jit_scale_precompute_,
                     new jit_avx512_core_scale_precompute_t(attr)));
             CHECK(jit_scale_precompute_->create_kernel());
@@ -587,11 +587,10 @@ status_t brdgmm_dw_convolution_fwd_t::execute(const exec_ctx_t &ctx) const {
     DEFINE_ZERO_POINTS_BUFFER(src_zero_point, DNNL_ARG_SRC);
     DEFINE_ZERO_POINTS_BUFFER(dst_zero_point, DNNL_ARG_DST);
 
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     const float *oscales = scale_utils::precompute_scales(
             ctx.get_scratchpad_grantor(), src_scales, wei_scales, pd()->IC(),
-            pd()->OC(), false, wei_scale_mask != 0, pd()->attr(),
+            pd()->OC(), false, wei_scale_mask > 0, pd()->attr(),
             jit_scale_precompute_.get());
 
     const memory_desc_wrapper weights_d(pd()->weights_md(0));
diff --git a/src/cpu/x64/jit_brgemm_1x1_conv.cpp b/src/cpu/x64/jit_brgemm_1x1_conv.cpp
index 4d6bb70659b..c70d377fc12 100644
--- a/src/cpu/x64/jit_brgemm_1x1_conv.cpp
+++ b/src/cpu/x64/jit_brgemm_1x1_conv.cpp
@@ -284,8 +284,8 @@ status_t brgemm_1x1_convolution_fwd_t<isa>::init(engine_t *engine) {
     if (is_jit_supported && pd()->OC() > 1
             && req_copy_scales(attr, jcp.scale_adjust_factor)) {
         const auto &attr_scales = attr->scales_;
-        int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_;
-        if (wei_scale_mask != 0) {
+        int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS);
+        if (wei_scale_mask > 0) {
             CHECK(safe_ptr_assign(jit_scale_precompute_,
                     new jit_avx512_core_scale_precompute_t(
                             attr, jcp.scale_adjust_factor)));
@@ -721,11 +721,10 @@ status_t brgemm_1x1_convolution_fwd_t<isa>::execute_forward_all(
     DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS);
     DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST);
 
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     const float *oscales = scale_utils::precompute_scales(scratchpad,
             src_scales, wei_scales, pd()->IC(), pd()->OC(), false,
-            wei_scale_mask != 0, pd()->attr(), jit_scale_precompute_.get(),
+            wei_scale_mask > 0, pd()->attr(), jit_scale_precompute_.get(),
             jcp.scale_adjust_factor);
 
     DEFINE_ZERO_POINT_VALUE(src_zero_point, DNNL_ARG_SRC);
diff --git a/src/cpu/x64/jit_brgemm_conv.cpp b/src/cpu/x64/jit_brgemm_conv.cpp
index b55281b1248..6bdf480102a 100644
--- a/src/cpu/x64/jit_brgemm_conv.cpp
+++ b/src/cpu/x64/jit_brgemm_conv.cpp
@@ -929,8 +929,8 @@ status_t brgemm_convolution_fwd_t<isa>::init(engine_t *engine) {
     if (is_jit_supported && pd()->OC() > 1
             && req_copy_scales(attr, jcp.scale_adjust_factor)) {
         const auto &attr_scales = attr->scales_;
-        int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_;
-        if (wei_scale_mask != 0) {
+        int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS);
+        if (wei_scale_mask > 0) {
             CHECK(safe_ptr_assign(jit_scale_precompute_,
                     new jit_avx512_core_scale_precompute_t(
                             attr, jcp.scale_adjust_factor)));
@@ -1270,11 +1270,10 @@ status_t brgemm_convolution_fwd_t<isa>::execute(const exec_ctx_t &ctx) const {
 
     const memory_tracking::grantor_t scratchpad = ctx.get_scratchpad_grantor();
 
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     const float *oscales = scale_utils::precompute_scales(scratchpad,
             src_scales, wei_scales, pd()->IC(), pd()->OC(), false,
-            wei_scale_mask != 0, pd()->attr(), jit_scale_precompute_.get(),
+            wei_scale_mask > 0, pd()->attr(), jit_scale_precompute_.get(),
             jcp.scale_adjust_factor);
 
     brgemm_exec_ctx_t brgemm_ctx(ctx, _pd);
diff --git a/src/cpu/x64/jit_brgemm_conv_bwd_strided.cpp b/src/cpu/x64/jit_brgemm_conv_bwd_strided.cpp
index cf15c05c114..0b0cb2d8c4d 100644
--- a/src/cpu/x64/jit_brgemm_conv_bwd_strided.cpp
+++ b/src/cpu/x64/jit_brgemm_conv_bwd_strided.cpp
@@ -647,8 +647,8 @@ status_t brgemm_convolution_bwd_strided_t<isa>::init(engine_t *engine) {
     if (is_jit_supported && pd()->IC() > 1
             && req_copy_scales(attr, jcp.scale_adjust_factor)) {
         const auto &attr_scales = attr->scales_;
-        int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_;
-        if (wei_scale_mask != 0) {
+        int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS);
+        if (wei_scale_mask > 0) {
             CHECK(safe_ptr_assign(jit_scale_precompute_,
                     new jit_avx512_core_scale_precompute_t(
                             attr, jcp.scale_adjust_factor)));
@@ -698,11 +698,10 @@ status_t brgemm_convolution_bwd_strided_t<isa>::execute(
 
     const memory_tracking::grantor_t scratchpad = ctx.get_scratchpad_grantor();
 
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     const float *oscales = scale_utils::precompute_scales(scratchpad,
             src_scales, wei_scales, pd()->OC(), pd()->IC(), false,
-            wei_scale_mask != 0, pd()->attr(), jit_scale_precompute_.get(),
+            wei_scale_mask > 0, pd()->attr(), jit_scale_precompute_.get(),
             jcp.scale_adjust_factor);
 
     brgemm_bwd_exec_ctx_t brgemm_ctx(ctx, _pd);
diff --git a/src/cpu/x64/jit_brgemm_conv_bwd_utils.cpp b/src/cpu/x64/jit_brgemm_conv_bwd_utils.cpp
index 268755c69ab..c7d4e12f9d8 100644
--- a/src/cpu/x64/jit_brgemm_conv_bwd_utils.cpp
+++ b/src/cpu/x64/jit_brgemm_conv_bwd_utils.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2022-2024 Intel Corporation
+* Copyright 2022-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -2048,7 +2048,7 @@ status_t init_conf(jit_brgemm_conv_conf_t &jcp, cpu_isa_t isa,
         jcp.with_scales = !src_scales.has_default_values()
                 || !wei_scales.has_default_values()
                 || jcp.scale_adjust_factor != 1.0f;
-        jcp.is_ic_scale = wei_scales.mask_ != 0;
+        jcp.is_ic_scale = wei_scales.get_mask() > 0;
     }
 
     jcp.req_brg_comp_pad = false;
diff --git a/src/cpu/x64/jit_brgemm_conv_utils.cpp b/src/cpu/x64/jit_brgemm_conv_utils.cpp
index 8e0e284cc7c..a0f973ed87e 100644
--- a/src/cpu/x64/jit_brgemm_conv_utils.cpp
+++ b/src/cpu/x64/jit_brgemm_conv_utils.cpp
@@ -2314,7 +2314,7 @@ status_t init_conf(jit_brgemm_conv_conf_t &jcp, cpu_isa_t isa,
     jcp.with_scales = !src_scales.has_default_values()
             || !wei_scales.has_default_values()
             || jcp.scale_adjust_factor != 1.0f;
-    jcp.is_oc_scale = wei_scales.mask_ != 0;
+    jcp.is_oc_scale = wei_scales.get_mask() > 0;
 
     const bool compensation_w_padding
             = (jcp.s8s8_compensation_required || jcp.src_zero_point)
@@ -2577,7 +2577,7 @@ status_t init_1x1_conf(jit_brgemm_conv_conf_t &jcp, cpu_isa_t isa,
     jcp.with_scales = !src_scales.has_default_values()
             || !wei_scales.has_default_values()
             || jcp.scale_adjust_factor != 1.0f;
-    jcp.is_oc_scale = wei_scales.mask_ != 0;
+    jcp.is_oc_scale = wei_scales.get_mask() > 0;
 
     // enable ununroll_bd_loop for big shapes to reduce kernel sizes
     jcp.ununroll_bd_loop
diff --git a/src/cpu/x64/jit_brgemm_inner_product.cpp b/src/cpu/x64/jit_brgemm_inner_product.cpp
index 45d40417af8..c9d21c9cb19 100644
--- a/src/cpu/x64/jit_brgemm_inner_product.cpp
+++ b/src/cpu/x64/jit_brgemm_inner_product.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -94,8 +94,7 @@ status_t brgemm_inner_product_fwd_t<isa>::execute_forward(
     DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS);
     DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST);
 
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     const float *oscales
             = scale_utils::precompute_scales(scratchpad, src_scales, wei_scales,
                     pd()->IC(), pd()->OC(), false, wei_scale_mask == (1 << 0),
diff --git a/src/cpu/x64/jit_brgemm_inner_product.hpp b/src/cpu/x64/jit_brgemm_inner_product.hpp
index 0601a0b3cf7..8dbf1bf29eb 100644
--- a/src/cpu/x64/jit_brgemm_inner_product.hpp
+++ b/src/cpu/x64/jit_brgemm_inner_product.hpp
@@ -224,8 +224,8 @@ struct brgemm_inner_product_fwd_t : public primitive_t {
         const auto attr = pd()->attr();
         if (is_jit_supported && pd()->OC() > 1 && req_copy_scales(attr)) {
             const auto &attr_scales = attr->scales_;
-            int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_;
-            if (wei_scale_mask != 0) {
+            int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS);
+            if (wei_scale_mask > 0) {
                 CHECK(safe_ptr_assign(jit_scale_precompute_,
                         new jit_avx512_core_scale_precompute_t(attr)));
                 CHECK(jit_scale_precompute_->create_kernel());
diff --git a/src/cpu/x64/jit_brgemm_inner_product_utils.cpp b/src/cpu/x64/jit_brgemm_inner_product_utils.cpp
index 3de2b4c10eb..b66bce716cd 100644
--- a/src/cpu/x64/jit_brgemm_inner_product_utils.cpp
+++ b/src/cpu/x64/jit_brgemm_inner_product_utils.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -444,8 +444,7 @@ status_t jit_brgemm_ip_fwd_conf_t::init_conf(cpu_isa_t isa,
     const memory_desc_wrapper dst_d(&dst_md);
     if (!post_ops_ok(attr, dst_d)) return status::unimplemented;
     if (jbgp.with_scales) {
-        const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS);
-        jbgp.is_oc_scale = wei_scales.mask_ != 0;
+        jbgp.is_oc_scale = attr.scales_.get_mask(DNNL_ARG_WEIGHTS) > 0;
     }
 
     const int min_ic_divisor = is_amx_int8 ? 4 : is_amx_xf16 ? 2 : 1;
diff --git a/src/cpu/x64/jit_brgemm_post_ops.cpp b/src/cpu/x64/jit_brgemm_post_ops.cpp
index 97eebf89ce3..1ea3558c533 100644
--- a/src/cpu/x64/jit_brgemm_post_ops.cpp
+++ b/src/cpu/x64/jit_brgemm_post_ops.cpp
@@ -579,7 +579,8 @@ dnnl::impl::cpu::x64::jit_brgemm_kernel_post_ops_t<
     const auto &wei_scales = attr_.scales_.get(DNNL_ARG_WEIGHTS);
     // per_oc: conv: 1 << 0, (1 << 1) + (1 << 0) (with groups)
     // per_oc: ip: 1 << 0
-    is_oc_scale_ = utils::one_of(wei_scales.mask_, 1 << 0, (1 << 1) + (1 << 0));
+    is_oc_scale_
+            = utils::one_of(wei_scales.get_mask(), 1 << 0, (1 << 1) + (1 << 0));
 
     inp_dt_ = brg_.dt_c;
     out_dt_ = brg_.dt_d;
diff --git a/src/cpu/x64/jit_gemm_inner_product_utils.cpp b/src/cpu/x64/jit_gemm_inner_product_utils.cpp
index 52854681668..782a7e0887a 100644
--- a/src/cpu/x64/jit_gemm_inner_product_utils.cpp
+++ b/src/cpu/x64/jit_gemm_inner_product_utils.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -288,7 +288,7 @@ jit_pp_kernel_t<isa>::jit_pp_kernel_t(size_t OC, size_t MB, dim_t dst_mb_stride,
 
     if (this->do_bias()) compute_vreg_bias_shift_ = compute_vregs_per_iter_++;
 
-    if (!attr->scales_.get(DNNL_ARG_DST).has_default_values()) {
+    if (!attr->scales_.has_default_values(DNNL_ARG_DST)) {
         this->do_dst_scale_ = true;
         vreg_dst_scale = Vmm(idx_compute_vreg_start_++);
     }
diff --git a/src/cpu/x64/jit_uni_binary.cpp b/src/cpu/x64/jit_uni_binary.cpp
index 889fea1df0e..4e713ff3daa 100644
--- a/src/cpu/x64/jit_uni_binary.cpp
+++ b/src/cpu/x64/jit_uni_binary.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -172,10 +172,8 @@ status_t jit_uni_binary_t::pd_t::init(engine_t *engine) {
     conf_.is_f16 = conf_.dst_type == f16;
     conf_.op_type = get_op_type(src0_md_);
     assert(conf_.op_type != op_t::none);
-    conf_.do_scale_src0
-            = !attr()->scales_.get(DNNL_ARG_SRC_0).has_default_values();
-    conf_.do_scale_src1
-            = !attr()->scales_.get(DNNL_ARG_SRC_1).has_default_values();
+    conf_.do_scale_src0 = !attr()->scales_.has_default_values(DNNL_ARG_SRC_0);
+    conf_.do_scale_src1 = !attr()->scales_.has_default_values(DNNL_ARG_SRC_1);
     const auto sum_idx = po.find(primitive_kind::sum);
     conf_.do_sum = sum_idx != -1 && po.entry_[sum_idx].sum.scale != 0.f;
     conf_.with_eltwise = po.find(primitive_kind::eltwise) != -1;
diff --git a/src/cpu/x64/jit_uni_group_normalization.cpp b/src/cpu/x64/jit_uni_group_normalization.cpp
index 8f4b16c9806..e9196ea81cf 100644
--- a/src/cpu/x64/jit_uni_group_normalization.cpp
+++ b/src/cpu/x64/jit_uni_group_normalization.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023-2024 Intel Corporation
+* Copyright 2023-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -84,8 +84,8 @@ struct kernel_t : public jit_uni_group_normalization_fwd_t::kernel_base_t,
         with_eltwise_ = post_ops.find(primitive_kind::eltwise) != -1;
 
         const auto &attr_scales = pd->attr()->scales_;
-        with_src_scales_ = !attr_scales.get(DNNL_ARG_SRC).has_default_values();
-        with_dst_scales_ = !attr_scales.get(DNNL_ARG_DST).has_default_values();
+        with_src_scales_ = !attr_scales.has_default_values(DNNL_ARG_SRC);
+        with_dst_scales_ = !attr_scales.has_default_values(DNNL_ARG_DST);
 
         io::io_conf_t io_conf;
         io::io_tail_conf_t io_tail_conf(simd_w_, axis_simd_tail_,
diff --git a/src/cpu/x64/jit_uni_instance_normalization.cpp b/src/cpu/x64/jit_uni_instance_normalization.cpp
index d441712e2e0..7e0d86b0a7a 100644
--- a/src/cpu/x64/jit_uni_instance_normalization.cpp
+++ b/src/cpu/x64/jit_uni_instance_normalization.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -82,8 +82,8 @@ struct kernel_t : public jit_uni_instance_normalization_fwd_t::kernel_base_t,
         with_eltwise_ = post_ops.find(primitive_kind::eltwise) != -1;
 
         const auto &attr_scales = pd->attr()->scales_;
-        with_src_scales_ = !attr_scales.get(DNNL_ARG_SRC).has_default_values();
-        with_dst_scales_ = !attr_scales.get(DNNL_ARG_DST).has_default_values();
+        with_src_scales_ = !attr_scales.has_default_values(DNNL_ARG_SRC);
+        with_dst_scales_ = !attr_scales.has_default_values(DNNL_ARG_DST);
 
         io::io_conf_t io_conf;
         io::io_tail_conf_t io_tail_conf(simd_w_, axis_simd_tail_,
diff --git a/src/cpu/x64/jit_uni_layer_normalization.cpp b/src/cpu/x64/jit_uni_layer_normalization.cpp
index 5d532c43e01..2665cf2fe2c 100644
--- a/src/cpu/x64/jit_uni_layer_normalization.cpp
+++ b/src/cpu/x64/jit_uni_layer_normalization.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -128,8 +128,8 @@ struct jit_stat_and_data_base_kernel_t : stat_and_data_kernel_t,
         with_eltwise_ = post_ops.find(primitive_kind::eltwise) != -1;
 
         const auto &attr_scales = pd_->attr()->scales_;
-        with_src_scales_ = !attr_scales.get(DNNL_ARG_SRC).has_default_values();
-        with_dst_scales_ = !attr_scales.get(DNNL_ARG_DST).has_default_values();
+        with_src_scales_ = !attr_scales.has_default_values(DNNL_ARG_SRC);
+        with_dst_scales_ = !attr_scales.has_default_values(DNNL_ARG_DST);
 
         io::io_conf_t io_conf;
         io::io_tail_conf_t io_tail_conf(simd_w_, axis_simd_tail_,
diff --git a/src/cpu/x64/jit_uni_reorder.cpp b/src/cpu/x64/jit_uni_reorder.cpp
index f79a5d32b1f..0ed3361133c 100644
--- a/src/cpu/x64/jit_uni_reorder.cpp
+++ b/src/cpu/x64/jit_uni_reorder.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018-2024 Intel Corporation
+* Copyright 2018-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -2324,13 +2324,10 @@ status_t jit_uni_reorder_t::pd_t::init_scratchpad() {
                 compensation_reduce_size);
     }
 
-    const memory_desc_wrapper input_d(src_md());
-    int scales_mask = -1;
-    bool is_set = false;
-    CHECK(attr()->scales_.get(DNNL_ARG_DST, &scales_mask, &is_set));
-
-    if (is_set && scales_mask > 0) {
-        get_D_values(input_d, scales_mask, nullptr, &D_mask_, nullptr);
+    if (!attr()->scales_.has_default_values(DNNL_ARG_DST)) {
+        const memory_desc_wrapper input_d(src_md());
+        int mask = attr()->scales_.get_mask(DNNL_ARG_DST);
+        get_D_values(input_d, mask, nullptr, &D_mask_, nullptr);
         if (D_mask_ > 1) {
             scratchpad.template book<float>(
                     memory_tracking::names::key_reorder_precomputed_dst_scales,
diff --git a/src/cpu/x64/jit_uni_reorder_utils.cpp b/src/cpu/x64/jit_uni_reorder_utils.cpp
index cf9b343cb37..5ef629ca359 100644
--- a/src/cpu/x64/jit_uni_reorder_utils.cpp
+++ b/src/cpu/x64/jit_uni_reorder_utils.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018-2024 Intel Corporation
+* Copyright 2018-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -273,24 +273,21 @@ status_t prb_init(prb_t &p, const memory_desc_t &imd, const memory_desc_t &omd,
 
     p.src_scale_type = scale_type_t::NONE;
     int src_mask = 0;
-    bool is_src_set = false;
-    CHECK(attr->scales_.get(DNNL_ARG_SRC, &src_mask, &is_src_set));
-    if (is_src_set) {
+    if (!attr->scales_.has_default_values(DNNL_ARG_SRC)) {
+        src_mask = attr->scales_.get_mask(DNNL_ARG_SRC);
         p.src_scale_type
                 = src_mask == 0 ? scale_type_t::COMMON : scale_type_t::MANY;
     }
 
     p.dst_scale_type = scale_type_t::NONE;
     int dst_mask = 0;
-    bool is_dst_set = false;
-    CHECK(attr->scales_.get(DNNL_ARG_DST, &dst_mask, &is_dst_set));
-    if (is_dst_set) {
+    if (!attr->scales_.has_default_values(DNNL_ARG_DST)) {
+        dst_mask = attr->scales_.get_mask(DNNL_ARG_DST);
         p.dst_scale_type
                 = dst_mask == 0 ? scale_type_t::COMMON : scale_type_t::MANY;
     }
 
-    if (is_src_set && is_dst_set && src_mask != dst_mask)
-        return status::unimplemented;
+    if (src_mask != dst_mask) return status::unimplemented;
 
     p.scale_adjust = (om_d.extra().flags & memory_extra_flags::scale_adjust)
             ? om_d.extra().scale_adjust
diff --git a/src/cpu/x64/jit_uni_softmax.cpp b/src/cpu/x64/jit_uni_softmax.cpp
index 1ccd7f0cb08..89837058eb5 100644
--- a/src/cpu/x64/jit_uni_softmax.cpp
+++ b/src/cpu/x64/jit_uni_softmax.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -969,9 +969,9 @@ struct jit_softmax_dense_kernel_t : jit_softmax_kernel_base_t,
 
         const auto &attr_scales = pd_->attr()->scales_;
         with_src_scales_ = is_superset(isa, avx2)
-                && !attr_scales.get(DNNL_ARG_SRC).has_default_values();
+                && !attr_scales.has_default_values(DNNL_ARG_SRC);
         with_dst_scales_ = is_superset(isa, avx2)
-                && !attr_scales.get(DNNL_ARG_DST).has_default_values();
+                && !attr_scales.has_default_values(DNNL_ARG_DST);
 
         io::io_conf_t io_conf;
         io::io_tail_conf_t io_tail_conf(simd_w_, axis_simd_tail_,
@@ -1529,9 +1529,9 @@ struct jit_softmax_strided_kernel_t : jit_softmax_kernel_base_t,
 
         const auto &attr_scales = pd_->attr()->scales_;
         with_src_scales_ = is_superset(isa, avx2)
-                && !attr_scales.get(DNNL_ARG_SRC).has_default_values();
+                && !attr_scales.has_default_values(DNNL_ARG_SRC);
         with_dst_scales_ = is_superset(isa, avx2)
-                && !attr_scales.get(DNNL_ARG_DST).has_default_values();
+                && !attr_scales.has_default_values(DNNL_ARG_DST);
 
         io::io_conf_t io_conf;
         io::io_tail_conf_t io_tail_conf(simd_w_, axis_simd_tail_,
diff --git a/src/cpu/x64/jit_uni_x8s8s32x_1x1_conv_kernel.cpp b/src/cpu/x64/jit_uni_x8s8s32x_1x1_conv_kernel.cpp
index 8186789adc4..d2ff20d8db4 100644
--- a/src/cpu/x64/jit_uni_x8s8s32x_1x1_conv_kernel.cpp
+++ b/src/cpu/x64/jit_uni_x8s8s32x_1x1_conv_kernel.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -910,7 +910,7 @@ status_t jit_uni_x8s8s32x_1x1_conv_kernel<isa>::init_conf(
 
     const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS);
     const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST);
-    jcp.is_oc_scale = wei_scales.mask_ != 0;
+    jcp.is_oc_scale = wei_scales.get_mask() > 0;
     jcp.dst_scale = !dst_scales.has_default_values();
 
     jcp.wei_adj_scale
@@ -927,7 +927,7 @@ void jit_uni_x8s8s32x_1x1_conv_kernel<isa>::init_scratchpad(
         const jit_1x1_conv_conf_t &jcp, const primitive_attr_t &attr) {
     using namespace dnnl::impl::memory_tracking::names;
 
-    const int wei_mask = attr.scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_mask = attr.scales_.get_mask(DNNL_ARG_WEIGHTS);
     const dim_t scales_count
             = wei_mask == 0 ? 1 : static_cast<dim_t>(jcp.oc) * jcp.ngroups;
     const dim_t count = nstl::max<dim_t>(scales_count, 8);
diff --git a/src/cpu/x64/jit_uni_x8s8s32x_1x1_convolution.cpp b/src/cpu/x64/jit_uni_x8s8s32x_1x1_convolution.cpp
index 6b0e7e9d9e9..8dadfd91d06 100644
--- a/src/cpu/x64/jit_uni_x8s8s32x_1x1_convolution.cpp
+++ b/src/cpu/x64/jit_uni_x8s8s32x_1x1_convolution.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2023 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -76,7 +76,7 @@ status_t jit_uni_x8s8s32x_1x1_convolution_fwd_t<isa>::execute_forward(
     const float factor = (pd()->jcp_.signed_input && (!pd()->jcp_.has_vnni))
             ? 1.f / pd()->jcp_.wei_adj_scale
             : 1.0f;
-    int wei_mask = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    int wei_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     if (wei_mask == 0) {
         utils::array_set(
                 local_scales, src_scales[0] * wei_scales[0] * factor, 8);
@@ -94,7 +94,7 @@ status_t jit_uni_x8s8s32x_1x1_convolution_fwd_t<isa>::execute_forward(
 
         auto dw_local_scales
                 = dw_scratchpad.template get<float>(key_conv_adjusted_scales);
-        int wei_mask = attr_dw->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+        int wei_mask = attr_dw->scales_.get_mask(DNNL_ARG_WEIGHTS);
         float factor = 1.f / jcp_dw->wei_adj_scale;
         if (wei_mask == 0) {
             utils::array_set(dw_local_scales,
diff --git a/src/cpu/x64/jit_uni_x8s8s32x_conv_kernel.cpp b/src/cpu/x64/jit_uni_x8s8s32x_conv_kernel.cpp
index d4512765ee3..24275ba63cf 100644
--- a/src/cpu/x64/jit_uni_x8s8s32x_conv_kernel.cpp
+++ b/src/cpu/x64/jit_uni_x8s8s32x_conv_kernel.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -1315,7 +1315,7 @@ status_t jit_uni_x8s8s32x_fwd_kernel<isa>::init_conf(jit_conv_conf_t &jcp,
     const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS);
     const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST);
 
-    jcp.is_oc_scale = wei_scales.mask_ != 0;
+    jcp.is_oc_scale = wei_scales.get_mask() > 0;
     jcp.dst_scale = !dst_scales.has_default_values();
 
     const auto zp = attr.zero_points_;
@@ -1614,7 +1614,7 @@ void jit_uni_x8s8s32x_fwd_kernel<isa>::init_scratchpad(
         memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp,
         const primitive_attr_t &attr) {
 
-    const int mask = attr.scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int mask = attr.scales_.get_mask(DNNL_ARG_WEIGHTS);
     const dim_t scales_count
             = mask == 0 ? 1 : static_cast<dim_t>(jcp.oc) * jcp.ngroups;
     dim_t count = scales_count == 1 ? (dim_t)8 : scales_count;
diff --git a/src/cpu/x64/jit_uni_x8s8s32x_convolution.cpp b/src/cpu/x64/jit_uni_x8s8s32x_convolution.cpp
index bfa20f0d33e..8fc730b1cec 100644
--- a/src/cpu/x64/jit_uni_x8s8s32x_convolution.cpp
+++ b/src/cpu/x64/jit_uni_x8s8s32x_convolution.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2023 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -44,7 +44,7 @@ const float *jit_uni_x8s8s32x_convolution_fwd_t<isa>::adjust_oscales(
         const memory_tracking::grantor_t &scratchpad, const float *src_scales,
         const float *wei_scales) const {
     auto loc_scales = scratchpad.template get<float>(key_conv_adjusted_scales);
-    int wei_mask = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    int wei_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     float factor = (pd()->jcp_.signed_input && (!pd()->jcp_.has_vnni))
             ? 1.f / pd()->jcp_.wei_adj_scale
             : 1.0f;
diff --git a/src/cpu/x64/jit_uni_x8s8s32x_deconvolution.cpp b/src/cpu/x64/jit_uni_x8s8s32x_deconvolution.cpp
index 9b03681d900..d8596136576 100644
--- a/src/cpu/x64/jit_uni_x8s8s32x_deconvolution.cpp
+++ b/src/cpu/x64/jit_uni_x8s8s32x_deconvolution.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -272,7 +272,7 @@ status_t jit_uni_x8s8s32x_deconv_fwd_kernel<isa>::init_conf(
 
     const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS);
     const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST);
-    jcp.is_oc_scale = wei_scales.mask_ != 0;
+    jcp.is_oc_scale = wei_scales.get_mask() > 0;
     jcp.dst_scale = !dst_scales.has_default_values();
 
     jcp.post_ops = p;
@@ -386,7 +386,7 @@ template <cpu_isa_t isa>
 void jit_uni_x8s8s32x_deconv_fwd_kernel<isa>::init_scratchpad(
         memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp,
         const primitive_attr_t &attr) {
-    const int mask = attr.scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    const int mask = attr.scales_.get_mask(DNNL_ARG_WEIGHTS);
     const dim_t scales_count
             = mask == 0 ? 1 : static_cast<dim_t>(jcp.oc) * jcp.ngroups;
     dim_t count = nstl::max<dim_t>(scales_count, 8);
@@ -1451,7 +1451,7 @@ const float *jit_uni_x8s8s32x_deconvolution_fwd_t<isa>::adjust_oscales(
         const memory_tracking::grantor_t &scratchpad, const float *src_scales,
         const float *wei_scales) const {
     auto loc_scales = scratchpad.template get<float>(key_conv_adjusted_scales);
-    int wei_mask = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
+    int wei_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
     float factor = (pd()->jcp_.signed_input && (!pd()->jcp_.has_vnni))
             ? 1.f / pd()->jcp_.wei_adj_scale
             : 1.0f;
diff --git a/src/cpu/x64/matmul/brgemm_matmul.cpp b/src/cpu/x64/matmul/brgemm_matmul.cpp
index 0301b525d11..bd41e8af008 100644
--- a/src/cpu/x64/matmul/brgemm_matmul.cpp
+++ b/src/cpu/x64/matmul/brgemm_matmul.cpp
@@ -105,9 +105,9 @@ status_t brgemm_matmul_t<isa>::pd_t::init(engine_t *engine) {
                 = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST};
         bool ok = attr_scales_ok(supported_args);
         const auto &asc = attr()->scales_;
-        if (!asc.get(DNNL_ARG_SRC).has_default_values()
-                && !asc.get(DNNL_ARG_WEIGHTS).has_default_values()
-                && asc.get(DNNL_ARG_WEIGHTS).mask_ != 0) {
+        if (!asc.has_default_values(DNNL_ARG_SRC)
+                && !asc.has_default_values(DNNL_ARG_WEIGHTS)
+                && asc.get_mask(DNNL_ARG_WEIGHTS) > 0) {
             // This case requires scratchpad
             if (N() == DNNL_RUNTIME_DIM_VAL) ok = false;
         }
@@ -118,14 +118,14 @@ status_t brgemm_matmul_t<isa>::pd_t::init(engine_t *engine) {
             ok = ok && one_of(asc.get_data_type(DNNL_ARG_DST), undef, f32);
         }
         // Implementation has limited support w.r.t. scales groups.
-        if (!asc.get(DNNL_ARG_WEIGHTS).has_default_values()) {
+        if (!asc.has_default_values(DNNL_ARG_WEIGHTS)) {
             if (!asc.get(DNNL_ARG_WEIGHTS).has_default_groups()) {
                 // Only grouping over K is supported.
-                ok = ok && asc.get(DNNL_ARG_WEIGHTS).group_dims_[1] == 1;
+                ok = ok && asc.get_group(DNNL_ARG_WEIGHTS, 1) == 1;
                 // Only 'per_ocic' mask is supported, but not 'per_tensor' in
                 // benchdnn terms. In numbers, it's '12' is supported while for
                 // 4D '15' is required.
-                const int mask = asc.get(DNNL_ARG_WEIGHTS).mask_;
+                const int mask = asc.get_mask(DNNL_ARG_WEIGHTS);
                 const int ndims = weights_md_.ndims;
                 const int last_dim = (1 << (ndims - 1));
                 const int prelast_dim = (1 << (ndims - 2));
@@ -357,8 +357,8 @@ status_t brgemm_matmul_t<isa>::init(engine_t *engine) {
     if (is_jit_supported && wei_scale_count > 1 && req_copy_scales(attr)
             && !bgmmc.req_transpose_scales) {
         const auto &attr_scales = attr->scales_;
-        int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_;
-        if (wei_scale_mask != 0) {
+        int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS);
+        if (wei_scale_mask > 0) {
             CHECK(safe_ptr_assign(jit_scale_precompute_,
                     new jit_avx512_core_scale_precompute_t(attr)));
             CHECK(jit_scale_precompute_->create_kernel());
@@ -383,10 +383,13 @@ status_t brgemm_matmul_t<isa>::execute_body(const exec_ctx_t &ctx) const {
     matmul_helper_t helper(src_d, weights_d, dst_d);
 
     const auto &bgmmc = pd()->get_brgemm_matmul_conf();
-    const int wei_scale_mask
-            = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_;
-    const bool wei_scale_per_k = wei_scale_mask & pd()->wei_qmask_K();
-    const bool wei_scale_per_n = wei_scale_mask & pd()->wei_qmask_N();
+    const bool has_wei_scales
+            = !pd()->attr()->scales_.has_default_values(DNNL_ARG_WEIGHTS);
+    const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS);
+    const bool wei_scale_per_k
+            = has_wei_scales && (wei_scale_mask & pd()->wei_qmask_K());
+    const bool wei_scale_per_n
+            = has_wei_scales && (wei_scale_mask & pd()->wei_qmask_N());
     const float *oscales = scale_utils::precompute_scales(
             ctx.get_scratchpad_grantor(), src_scales, wei_scales, pd()->K(),
             pd()->N(), wei_scale_per_k, wei_scale_per_n, pd()->attr(),
diff --git a/src/cpu/x64/matmul/brgemm_matmul_utils.cpp b/src/cpu/x64/matmul/brgemm_matmul_utils.cpp
index 26cf27ef5c5..9e5b6021c01 100644
--- a/src/cpu/x64/matmul/brgemm_matmul_utils.cpp
+++ b/src/cpu/x64/matmul/brgemm_matmul_utils.cpp
@@ -1369,19 +1369,19 @@ status_t init_brgemm_matmul_conf(cpu_isa_t isa, brgemm_matmul_conf_t &bgmmc,
 
     const auto &src_scales = attr.scales_.get(DNNL_ARG_SRC);
     const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS);
-    bgmmc.with_scales = !src_scales.has_default_values()
-            || !wei_scales.has_default_values();
-    if (bgmmc.with_scales) {
+    const bool has_wei_scales = !wei_scales.has_default_values();
+    bgmmc.with_scales = !src_scales.has_default_values() || has_wei_scales;
+    if (has_wei_scales) {
         const auto wei_qmask_N = 1 << (bgmmc.ndims - 1);
         const auto wei_qmask_K = 1 << (bgmmc.ndims - 2);
-        bgmmc.is_oscale_per_k = wei_scales.mask_ & wei_qmask_K;
-        bgmmc.is_oscale_per_n = wei_scales.mask_ & wei_qmask_N;
+        bgmmc.is_oscale_per_k = wei_scales.get_mask() & wei_qmask_K;
+        bgmmc.is_oscale_per_n = wei_scales.get_mask() & wei_qmask_N;
         bgmmc.apply_scales_in_buffer_b = bgmmc.is_oscale_per_k
                 && bgmmc.with_wei_decompression && bgmmc.N * bgmmc.K != 1;
 
         // only common and per-oc-channel scales are supported
         // only per-ic-channel scales is supprted with weight decompression
-        VCONDCHECK_BG(wei_scales.mask_ == 0 || bgmmc.is_oscale_per_n
+        VCONDCHECK_BG(wei_scales.get_mask() == 0 || bgmmc.is_oscale_per_n
                         || IMPLICATION(bgmmc.is_oscale_per_k,
                                 bgmmc.with_wei_decompression),
                 VERBOSE_UNSUPPORTED_SCALES_CFG);
@@ -1390,8 +1390,8 @@ status_t init_brgemm_matmul_conf(cpu_isa_t isa, brgemm_matmul_conf_t &bgmmc,
     const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST);
     bgmmc.with_dst_scales = !dst_scales.has_default_values();
     // only common scales are supported
-    VCONDCHECK_BG(!(bgmmc.with_dst_scales && dst_scales.mask_ != 0),
-            VERBOSE_UNSUPPORTED_SCALES_CFG)
+    VCONDCHECK_BG(!(bgmmc.with_dst_scales && dst_scales.get_mask() > 0),
+            VERBOSE_UNSUPPORTED_SCALES_CFG);
 
     const auto &p = attr.post_ops_;
     bgmmc.with_sum = p.find(primitive_kind::sum) != -1;
diff --git a/src/cpu/x64/matmul_inner_product.cpp b/src/cpu/x64/matmul_inner_product.cpp
index 9a6e547cb24..48e0cf81f9f 100644
--- a/src/cpu/x64/matmul_inner_product.cpp
+++ b/src/cpu/x64/matmul_inner_product.cpp
@@ -244,13 +244,16 @@ status_t matmul_inner_product_fwd_t::pd_t::init_matmul_params(
             attr_.set_default_formats(dst_md(0)), VERBOSE_UNSUPPORTED_POSTOP);
 
     primitive_attr_t matmul_attr = *attr();
-    const auto wei_mask = matmul_attr.scales_.get(DNNL_ARG_WEIGHTS).mask_;
-    if (wei_mask == 1)
-        VDISPATCH_INNER_PRODUCT_SC(matmul_attr.scales_.set(DNNL_ARG_WEIGHTS,
-                                           1 << (mm_wei_md.ndims - 1)),
-                VERBOSE_UNSUPPORTED_ATTR);
-    else if (wei_mask != 0)
-        VDISPATCH_INNER_PRODUCT(false, VERBOSE_UNSUPPORTED_SCALES_CFG);
+    if (!matmul_attr.scales_.has_default_values(DNNL_ARG_WEIGHTS)) {
+        const auto wei_mask = matmul_attr.scales_.get_mask(DNNL_ARG_WEIGHTS);
+        if (wei_mask == 1) {
+            VDISPATCH_INNER_PRODUCT_SC(matmul_attr.scales_.set(DNNL_ARG_WEIGHTS,
+                                               1 << (mm_wei_md.ndims - 1)),
+                    VERBOSE_UNSUPPORTED_ATTR);
+        } else if (wei_mask > 0) {
+            VDISPATCH_INNER_PRODUCT(false, VERBOSE_UNSUPPORTED_SCALES_CFG);
+        }
+    }
 
     memory_desc_t mm_bia_md {};
     // Inner Product bias is always a vector while MatMul requires bias to have
diff --git a/src/gpu/generic/ref_concat.hpp b/src/gpu/generic/ref_concat.hpp
index 5c230cc634d..cb4b510b1ed 100644
--- a/src/gpu/generic/ref_concat.hpp
+++ b/src/gpu/generic/ref_concat.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -60,12 +60,8 @@ struct ref_concat_t : public gpu::primitive_t {
             reorder_pds_.resize(n_ + use_tent_dst());
             for (int i = 0; i < n_; ++i) {
                 primitive_attr_t r_attr;
-                int mask = 0;
-                bool is_set = false;
-                VDISPATCH_CONCAT_SC(
-                        sc.get(DNNL_ARG_MULTIPLE_SRC + i, &mask, &is_set),
-                        VERBOSE_UNSUPPORTED_SCALES_CFG);
-                if (is_set) {
+                if (!sc.get(DNNL_ARG_MULTIPLE_SRC + i).has_default_values()) {
+                    int mask = sc.get_mask(DNNL_ARG_MULTIPLE_SRC + i);
                     VDISPATCH_CONCAT(mask == 0, "non-zero mask");
                     VDISPATCH_CONCAT_SC(r_attr.scales_.set(DNNL_ARG_SRC, mask),
                             VERBOSE_UNSUPPORTED_SCALES_CFG);
diff --git a/src/gpu/generic/sycl/layer_normalizations_kernels.hpp b/src/gpu/generic/sycl/layer_normalizations_kernels.hpp
index 1c09075158a..8e1a4b5e53e 100644
--- a/src/gpu/generic/sycl/layer_normalizations_kernels.hpp
+++ b/src/gpu/generic/sycl/layer_normalizations_kernels.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023-2024 Intel Corporation
+* Copyright 2023-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -79,12 +79,20 @@ struct layer_normalization_fwd_kernel_vec_t {
         memory_tensor_t data_mem(data_, conf_.data_md);
         memory_tensor_t scale_mem(scale_, conf_.data_scaleshift_md);
         memory_tensor_t shift_mem(shift_, conf_.data_scaleshift_md);
-        memory_plain_t rt_scale_mem(rt_scale_, conf_.scales_src_dt);
-        memory_plain_t dst_scale_mem(dst_scale_, conf_.scales_dst_dt);
         memory_tensor_t stat_mem(stat_, conf_.stat_md);
         memory_plain_t var_mem(var_, conf_.var_dt);
         memory_tensor_t dst_mem(dst_, conf_.dst_md);
 
+        float sr = 1.f;
+        if (!conf_.src_def) {
+            memory_plain_t rt_scale_mem(rt_scale_, conf_.scales_src_dt);
+            sr = rt_scale_mem.load(0);
+        }
+        float ds = 1.f;
+        if (!conf_.dst_def) {
+            memory_plain_t dst_scale_mem(dst_scale_, conf_.scales_dst_dt);
+            ds = dst_scale_mem.load(0);
+        }
         float eps = epsilon();
         const size_t s_off = conf_.stat_md.off_l(idx);
         auto v_mean = stat_mem.load(s_off);
@@ -104,8 +112,6 @@ struct layer_normalization_fwd_kernel_vec_t {
                 float s = data_mem.load(src_off);
                 float d = sm * (s - v_mean) + sv;
 
-                float sr = conf_.src_def ? 1.f : rt_scale_mem.load(0);
-                float ds = conf_.dst_def ? 1.f : dst_scale_mem.load(0);
                 d = (d * sr * (1.f / ds));
                 dst_mem.store(d, d_off);
             }
@@ -171,8 +177,6 @@ struct layer_normalization_fwd_kernel_vec1_t {
         memory_tensor_t data_mem(data_, conf_.data_md);
         memory_tensor_t scale_mem(scale_, conf_.data_scaleshift_md);
         memory_tensor_t shift_mem(shift_, conf_.data_scaleshift_md);
-        memory_plain_t rt_scale_mem(rt_scale_, conf_.scales_src_dt);
-        memory_plain_t dst_scale_mem(dst_scale_, conf_.scales_dst_dt);
         memory_tensor_t stat_out_mem(mean_out_, conf_.stat_md);
         memory_plain_t var_out_mem(var_out_, conf_.var_dt);
         memory_tensor_t dst_mem(dst_, conf_.dst_md);
@@ -181,6 +185,17 @@ struct layer_normalization_fwd_kernel_vec1_t {
             stat_out_mem.store(0, idx);
             var_out_mem.store(0, idx);
         }
+        float sr = 1.f;
+        if (!conf_.src_def) {
+            memory_plain_t rt_scale_mem(rt_scale_, conf_.scales_src_dt);
+            sr = rt_scale_mem.load(0);
+        }
+        float ds = 1.f;
+        if (!conf_.dst_def) {
+            memory_plain_t dst_scale_mem(dst_scale_, conf_.scales_dst_dt);
+            ds = dst_scale_mem.load(0);
+        }
+
         float eps = epsilon();
         const size_t s_off = conf_.stat_md.off_l(idx);
         float v_mean = 0.f;
@@ -217,9 +232,6 @@ struct layer_normalization_fwd_kernel_vec1_t {
                 const auto d_off = dst_md().off_l(index);
                 float s = data_mem.load(src_off);
                 float d = sm * (s - v_mean) + sv;
-
-                float sr = conf_.src_def ? 1.f : rt_scale_mem.load(0);
-                float ds = conf_.dst_def ? 1.f : dst_scale_mem.load(0);
                 d = (d * sr * (1.f / ds));
 
                 dst_mem.store(d, d_off);
diff --git a/src/gpu/generic/sycl/ref_binary.hpp b/src/gpu/generic/sycl/ref_binary.hpp
index e6b8ec6f2ce..69c5af70b52 100644
--- a/src/gpu/generic/sycl/ref_binary.hpp
+++ b/src/gpu/generic/sycl/ref_binary.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2022-2024 Intel Corporation
+* Copyright 2022-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -87,8 +87,7 @@ struct ref_binary_t : public gpu::generic::sycl::primitive_t {
             const auto &scales = attr()->scales_;
             bool dt_ok = true;
             for (auto arg : supported_args) {
-                auto &s = scales.get(arg);
-                dt_ok = dt_ok && is_supported_type(s.data_type_);
+                dt_ok = dt_ok && is_supported_type(scales.get_data_type(arg));
             }
             return dt_ok && attr_scales_ok(supported_args);
         }
diff --git a/src/gpu/generic/sycl/ref_convolution.cpp b/src/gpu/generic/sycl/ref_convolution.cpp
index b2f0458eba3..c448a32cbcf 100644
--- a/src/gpu/generic/sycl/ref_convolution.cpp
+++ b/src/gpu/generic/sycl/ref_convolution.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -41,11 +41,9 @@ status_t ref_convolution_fwd_t::pd_t::init_conf() {
     conf_.do_scale_data
             = !attr()->scales_.get(DNNL_ARG_SRC_0).has_default_values();
     conf_.do_scale_weights
-            = !attr()->scales_.get(DNNL_ARG_WEIGHTS).has_default_values();
-    conf_.do_scale_dst
-            = !attr()->scales_.get(DNNL_ARG_DST).has_default_values();
-    conf_.single_weight_scale
-            = attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ == 0;
+            = !attr()->scales_.has_default_values(DNNL_ARG_WEIGHTS);
+    conf_.do_scale_dst = !attr()->scales_.has_default_values(DNNL_ARG_DST);
+    conf_.single_weight_scale = attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == 0;
 
     conf_.use_data_zeropoints
             = !attr()->zero_points_.has_default_values(DNNL_ARG_SRC_0);
@@ -106,11 +104,9 @@ status_t ref_convolution_bwd_data_t::pd_t::init_conf() {
     conf_.do_scale_data
             = !attr()->scales_.get(DNNL_ARG_SRC_0).has_default_values();
     conf_.do_scale_weights
-            = !attr()->scales_.get(DNNL_ARG_WEIGHTS).has_default_values();
-    conf_.do_scale_dst
-            = !attr()->scales_.get(DNNL_ARG_DST).has_default_values();
-    conf_.single_weight_scale
-            = attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ == 0;
+            = !attr()->scales_.has_default_values(DNNL_ARG_WEIGHTS);
+    conf_.do_scale_dst = !attr()->scales_.has_default_values(DNNL_ARG_DST);
+    conf_.single_weight_scale = attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == 0;
 
     conf_.use_data_zeropoints
             = !attr()->zero_points_.has_default_values(DNNL_ARG_SRC_0);
@@ -172,11 +168,9 @@ status_t ref_convolution_bwd_weights_t::pd_t::init_conf() {
     conf_.do_scale_data
             = !attr()->scales_.get(DNNL_ARG_SRC_0).has_default_values();
     conf_.do_scale_weights
-            = !attr()->scales_.get(DNNL_ARG_WEIGHTS).has_default_values();
-    conf_.do_scale_dst
-            = !attr()->scales_.get(DNNL_ARG_DST).has_default_values();
-    conf_.single_weight_scale
-            = attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ == 0;
+            = !attr()->scales_.has_default_values(DNNL_ARG_WEIGHTS);
+    conf_.do_scale_dst = !attr()->scales_.has_default_values(DNNL_ARG_DST);
+    conf_.single_weight_scale = attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == 0;
 
     conf_.use_data_zeropoints
             = !attr()->zero_points_.has_default_values(DNNL_ARG_SRC_0);
diff --git a/src/gpu/generic/sycl/ref_convolution.hpp b/src/gpu/generic/sycl/ref_convolution.hpp
index 55faae92c08..738455ddcba 100644
--- a/src/gpu/generic/sycl/ref_convolution.hpp
+++ b/src/gpu/generic/sycl/ref_convolution.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -66,7 +66,7 @@ inline bool check_convolution_scales_types(const primitive_attr_t *attr) {
 
     const auto &scales = attr->scales_;
     for (auto arg : supported_args) {
-        auto dt = scales.get(arg).data_type_;
+        const auto dt = scales.get_data_type(arg);
         if (!is_supported_type(dt)) { return false; }
     }
     return true;
diff --git a/src/gpu/generic/sycl/ref_layer_normalizations.cpp b/src/gpu/generic/sycl/ref_layer_normalizations.cpp
index 69763aa2568..f5c7b487594 100644
--- a/src/gpu/generic/sycl/ref_layer_normalizations.cpp
+++ b/src/gpu/generic/sycl/ref_layer_normalizations.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023-2024 Intel Corporation
+* Copyright 2023-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -41,15 +41,11 @@ status_t ref_layer_normalization_fwd_t::pd_t::init_conf() {
     conf_.block_size = 16;
 
     conf_.rt_scaling = !attr()->scales_.has_default_values();
-    conf_.src_def = attr()->scales_.get(DNNL_ARG_SRC).has_default_values();
-    conf_.dst_def = attr()->scales_.get(DNNL_ARG_DST).has_default_values();
-
-    conf_.scales_src_dt = conf_.src_def
-            ? data_type_t::dnnl_f32
-            : attr()->scales_.get(DNNL_ARG_SRC).data_type_;
-    conf_.scales_dst_dt = conf_.dst_def
-            ? data_type_t::dnnl_f32
-            : attr()->scales_.get(DNNL_ARG_DST).data_type_;
+    conf_.src_def = attr()->scales_.has_default_values(DNNL_ARG_SRC);
+    conf_.dst_def = attr()->scales_.has_default_values(DNNL_ARG_DST);
+
+    conf_.scales_src_dt = attr()->scales_.get_data_type(DNNL_ARG_SRC);
+    conf_.scales_dst_dt = attr()->scales_.get_data_type(DNNL_ARG_DST);
 
     conf_.use_scale = use_scale();
     conf_.use_shift = use_shift();
diff --git a/src/gpu/generic/sycl/ref_layer_normalizations.hpp b/src/gpu/generic/sycl/ref_layer_normalizations.hpp
index 52bd8e6f16f..1f5a23fd238 100644
--- a/src/gpu/generic/sycl/ref_layer_normalizations.hpp
+++ b/src/gpu/generic/sycl/ref_layer_normalizations.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023-2024 Intel Corporation
+* Copyright 2023-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -81,7 +81,7 @@ struct ref_layer_normalization_fwd_t : public gpu::generic::sycl::primitive_t {
 
             const auto &scales = attr()->scales_;
             for (auto arg : supported_args) {
-                auto dt = scales.get(arg).data_type_;
+                const auto dt = scales.get_data_type(arg);
                 if (!is_supported_type(dt)) { return false; }
             }
             return true;
diff --git a/src/gpu/generic/sycl/ref_matmul.cpp b/src/gpu/generic/sycl/ref_matmul.cpp
index d79f80b3cf0..2aaaa9d7a91 100644
--- a/src/gpu/generic/sycl/ref_matmul.cpp
+++ b/src/gpu/generic/sycl/ref_matmul.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -29,11 +29,10 @@ void ref_matmul_t::pd_t::init_conf() {
     conf_.do_scale_data
             = !attr()->scales_.get(DNNL_ARG_SRC_0).has_default_values();
     conf_.do_scale_weights
-            = !attr()->scales_.get(DNNL_ARG_WEIGHTS).has_default_values();
-    conf_.do_scale_dst
-            = !attr()->scales_.get(DNNL_ARG_DST).has_default_values();
+            = !attr()->scales_.has_default_values(DNNL_ARG_WEIGHTS);
+    conf_.do_scale_dst = !attr()->scales_.has_default_values(DNNL_ARG_DST);
     conf_.single_weights_scale
-            = attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ == 0;
+            = attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == 0;
 
     conf_.use_data_zeropoints
             = !attr()->zero_points_.has_default_values(DNNL_ARG_SRC_0);
diff --git a/src/gpu/generic/sycl/ref_matmul.hpp b/src/gpu/generic/sycl/ref_matmul.hpp
index 2fa308b419f..06eba1d4215 100644
--- a/src/gpu/generic/sycl/ref_matmul.hpp
+++ b/src/gpu/generic/sycl/ref_matmul.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -122,8 +122,7 @@ struct ref_matmul_t : public gpu::generic::sycl::primitive_t {
             const auto &scales = attr()->scales_;
             bool dt_ok = true;
             for (auto arg : supported_args) {
-                auto &s = scales.get(arg);
-                dt_ok = dt_ok && is_supported_type(s.data_type_);
+                dt_ok = dt_ok && is_supported_type(scales.get_data_type(arg));
             }
             return dt_ok && attr_scales_ok(supported_args);
         }
diff --git a/src/gpu/generic/sycl/ref_reorder.cpp b/src/gpu/generic/sycl/ref_reorder.cpp
index c7c8c25bfdc..135bc5b77dd 100644
--- a/src/gpu/generic/sycl/ref_reorder.cpp
+++ b/src/gpu/generic/sycl/ref_reorder.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -34,10 +34,9 @@ status_t ref_reorder_t::pd_t::init_conf() {
 
     conf_.do_scale_src
             = !attr()->scales_.get(DNNL_ARG_SRC_0).has_default_values();
-    conf_.scale_src_mask = attr()->scales_.get(DNNL_ARG_SRC_0).mask_;
-    conf_.do_scale_dst
-            = !attr()->scales_.get(DNNL_ARG_DST).has_default_values();
-    conf_.scale_dst_mask = attr()->scales_.get(DNNL_ARG_DST).mask_;
+    conf_.scale_src_mask = attr()->scales_.get_mask(DNNL_ARG_SRC_0);
+    conf_.do_scale_dst = !attr()->scales_.has_default_values(DNNL_ARG_DST);
+    conf_.scale_dst_mask = attr()->scales_.get_mask(DNNL_ARG_DST);
     conf_.post_ops = sycl_post_ops_t(attr(), dst_md());
 
     return status::success;
diff --git a/src/gpu/generic/sycl/ref_reorder.hpp b/src/gpu/generic/sycl/ref_reorder.hpp
index 3155d8d1486..91f77e8ac6c 100644
--- a/src/gpu/generic/sycl/ref_reorder.hpp
+++ b/src/gpu/generic/sycl/ref_reorder.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -103,7 +103,7 @@ struct ref_reorder_t : public gpu::generic::sycl::primitive_t {
 
             const auto &scales = attr()->scales_;
             for (auto arg : supported_args) {
-                auto dt = scales.get(arg).data_type_;
+                const auto dt = scales.get_data_type(arg);
                 if (!is_supported_type(dt)) { return false; }
             }
             return true;
diff --git a/src/gpu/generic/sycl/ref_softmax.cpp b/src/gpu/generic/sycl/ref_softmax.cpp
index 96ab1b4f79a..01a9cbedc54 100644
--- a/src/gpu/generic/sycl/ref_softmax.cpp
+++ b/src/gpu/generic/sycl/ref_softmax.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023-2024 Intel Corporation
+* Copyright 2023-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -36,10 +36,8 @@ status_t ref_sycl_softmax_fwd_t::pd_t::init_conf() {
     conf_.channels = axis_size();
     conf_.wk_size = inner_size() * outer_size();
 
-    conf_.do_scale_src
-            = !attr()->scales_.get(DNNL_ARG_SRC).has_default_values();
-    conf_.do_scale_dst
-            = !attr()->scales_.get(DNNL_ARG_DST).has_default_values();
+    conf_.do_scale_src = !attr()->scales_.has_default_values(DNNL_ARG_SRC);
+    conf_.do_scale_dst = !attr()->scales_.has_default_values(DNNL_ARG_DST);
 
     conf_.post_ops = sycl_post_ops_t(attr(), dst_md());
 
diff --git a/src/gpu/generic/sycl/ref_softmax.hpp b/src/gpu/generic/sycl/ref_softmax.hpp
index 8143eb6bc51..c4049fa56b2 100644
--- a/src/gpu/generic/sycl/ref_softmax.hpp
+++ b/src/gpu/generic/sycl/ref_softmax.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023-2024 Intel Corporation
+* Copyright 2023-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -50,7 +50,7 @@ struct ref_sycl_softmax_fwd_t : public gpu::generic::sycl::primitive_t {
             VDISPATCH_SOFTMAX(attr()->has_default_values(
                                       sm::scales_runtime | sm::post_ops),
                     VERBOSE_UNSUPPORTED_ATTR);
-            VDISPATCH_SOFTMAX(attr_oscale_ok(), VERBOSE_UNSUPPORTED_SCALES_CFG);
+            VDISPATCH_SOFTMAX(attr_scales_ok(), VERBOSE_UNSUPPORTED_SCALES_CFG);
             VDISPATCH_SOFTMAX(sycl_post_ops_t::post_ops_ok(attr(), true, false),
                     VERBOSE_UNSUPPORTED_POSTOP);
             VDISPATCH_SOFTMAX_SC(
@@ -67,15 +67,6 @@ struct ref_sycl_softmax_fwd_t : public gpu::generic::sycl::primitive_t {
         sycl_softmax_conf_t conf_;
         status_t init_conf();
 
-        bool attr_oscale_ok() const {
-            const auto &scales = attr()->scales_;
-            bool ok = true;
-            for (const auto &e : scales.scales_) {
-                ok = ok && e.second.mask_ == 0;
-            }
-            return ok;
-        }
-
         bool check_data_types(data_type_t src) {
             return utils::one_of(src, data_type::f32, data_type::bf16,
                     data_type::f16, data_type::s8, data_type::u8);
diff --git a/src/gpu/generic/sycl/ref_sum.hpp b/src/gpu/generic/sycl/ref_sum.hpp
index 0bec1af1845..d289e461e6d 100644
--- a/src/gpu/generic/sycl/ref_sum.hpp
+++ b/src/gpu/generic/sycl/ref_sum.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2022-2024 Intel Corporation
+* Copyright 2022-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -60,15 +60,13 @@ struct ref_sum_t : public gpu::generic::sycl::primitive_t {
 
                 // Block formats are not yet supported
                 // Dimensions can not be > 6
-                VDISPATCH_SUM(
-                        !(!src_d.is_plain()
-                                || src_d.ndims() > xpu::sycl::md_t::max_dims),
+                VDISPATCH_SUM(src_d.is_plain()
+                                && src_d.ndims() <= xpu::sycl::md_t::max_dims,
                         VERBOSE_UNSUPPORTED_TENSOR_LAYOUT, "src");
 
-                VDISPATCH_SUM(!(!attr()->scales_.has_default_values()
-                                      && !is_supported_type(
-                                              scales.get(DNNL_ARG_SRC + i)
-                                                      .data_type_)),
+                VDISPATCH_SUM(attr()->scales_.has_default_values()
+                                || is_supported_type(
+                                        scales.get_data_type(DNNL_ARG_SRC + i)),
                         VERBOSE_UNSUPPORTED_ATTR);
             }
 
diff --git a/src/gpu/generic/sycl/reorder_kernels.hpp b/src/gpu/generic/sycl/reorder_kernels.hpp
index e9e317fcd99..2a3ff1d6281 100644
--- a/src/gpu/generic/sycl/reorder_kernels.hpp
+++ b/src/gpu/generic/sycl/reorder_kernels.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -73,14 +73,14 @@ struct reorder_kernel_t {
                     = (i < src_md().ndims()) ? src_md().strides()[i] : INT_MAX;
         }
         dims_t dims_scales_src;
-        if (conf_.scale_src_mask != 0) {
+        if (conf_.scale_src_mask > 0) {
             for (int i = 0; i < max_supported_ndims; i++) {
                 dims_scales_src[i]
                         = conf_.scale_src_mask >> i & 1 ? dims[i] : 1;
             }
         }
         dims_t dims_scales_dst;
-        if (conf_.scale_dst_mask != 0) {
+        if (conf_.scale_dst_mask > 0) {
             for (int i = 0; i < max_supported_ndims; i++) {
                 dims_scales_dst[i]
                         = conf_.scale_dst_mask >> i & 1 ? dims[i] : 1;
@@ -97,7 +97,7 @@ struct reorder_kernel_t {
             auto src = src_mem.load(idx);
 
             if (conf_.do_scale_src) {
-                if (conf_.scale_src_mask != 0) {
+                if (conf_.scale_src_mask > 0) {
                     int scale_idx = 0;
                     for (int i = 0; i < max_supported_ndims; i++) {
                         if (i < src_md().ndims()) {
@@ -116,7 +116,7 @@ struct reorder_kernel_t {
             auto acc = src;
             acc = conf_.post_ops.apply(acc, dst_, dst_idx);
             if (conf_.do_scale_dst) {
-                if (conf_.scale_dst_mask != 0) {
+                if (conf_.scale_dst_mask > 0) {
                     int scale_idx = 0;
                     for (int i = 0; i < max_supported_ndims; i++) {
                         if (i < src_md().ndims()) {
diff --git a/src/gpu/gpu_utils.hpp b/src/gpu/gpu_utils.hpp
index fe56ccaba41..66a73f4f673 100644
--- a/src/gpu/gpu_utils.hpp
+++ b/src/gpu/gpu_utils.hpp
@@ -32,7 +32,7 @@ namespace gpu {
 
 inline dim_t get_attr_oscales_count(int mask, const memory_desc_wrapper &md) {
     dim_t count = 1;
-    if (mask == 0) return count;
+    if (mask <= 0) return count;
 
     for (int d = 0; d < md.ndims(); d++) {
         const int dim_mask = 1 << d;
@@ -45,13 +45,14 @@ inline dim_t get_attr_oscales_count(int mask, const memory_desc_wrapper &md) {
 class scales_query_t {
 public:
     bool has_default_values() const { return scales_.has_default_values(); }
-    int get_mask() const { return scales_.mask_; }
+    int get_mask() const { return scales_.get_mask(); }
     size_t get_count() const { return count_; }
-    data_type_t get_data_type() const { return scales_.data_type_; }
+    data_type_t get_data_type() const { return scales_.get_data_type(); }
     dim_t get_group() const {
-        if (scales_.ndims_ < 2) return 1;
-        const auto g0 = scales_.group_dims_[0];
-        const auto g1 = scales_.group_dims_[1];
+        if (scales_.has_default_groups()) return 1;
+
+        const auto g0 = scales_.get_group(0);
+        const auto g1 = scales_.get_group(1);
         assert(utils::one_of(1, g0, g1));
         return g0 > 1 ? g0 : g1;
     }
@@ -59,13 +60,16 @@ class scales_query_t {
     int get_group_dim() const {
         // If groups are not identified, they should be set to `1`, and
         // it shouldn't hurt to divide by 1 any dim. Just use 0th for that.
-        if (scales_.ndims_ < 2) return 0;
-        const auto g0 = scales_.group_dims_[0];
-        const auto g1 = scales_.group_dims_[1];
+        if (scales_.has_default_groups()) return 0;
+
+        const auto g0 = scales_.get_group(0);
+        const auto g1 = scales_.get_group(1);
         assert(utils::one_of(1, g0, g1));
         UNUSED(g1);
         const int g_dim = g0 > 1 ? 0 : 1;
-        return ndims_ - scales_.ndims_ + g_dim;
+        // Note: hardcoded value so far.
+        // TODO: replace with some API when ndims can be different from 2.
+        return ndims_ - /* scales_.get_groups_ndims() = */ 2 + g_dim;
     }
 
     memory_storage_t &get_scales(const exec_ctx_t &ctx) const {
@@ -77,11 +81,11 @@ class scales_query_t {
             int arg)
         : arg_(arg), ndims_(mdw.ndims()) {
         scales_ = attr->scales_.get(arg);
-        count_ = get_attr_oscales_count(scales_.mask_, mdw);
+        count_ = get_attr_oscales_count(scales_.get_mask(), mdw);
     }
 
 private:
-    runtime_scales_t scales_;
+    quant_entry_t scales_;
     dim_t count_ = 0;
     int arg_ = 0;
     int ndims_ = 0;
diff --git a/src/gpu/intel/jit/conv/config.cpp b/src/gpu/intel/jit/conv/config.cpp
index 711cc8845f7..64f3113ea36 100644
--- a/src/gpu/intel/jit/conv/config.cpp
+++ b/src/gpu/intel/jit/conv/config.cpp
@@ -974,7 +974,9 @@ bool post_ops_ok(const conv_problem_t &prb, const hw_t &hw) {
         scales[i] = scale_args[i].second;
     if (!attr->scales_.has_default_values(scales)) return false;
     for (int arg : scales) {
-        int mask = attr->scales_.get(arg).mask_;
+        if (attr->scales_.has_default_values(arg)) continue;
+
+        int mask = attr->scales_.get(arg).get_mask();
         // XXX: per_oc for BWD_D is treated as per_ic assuming it's called from
         // deconvolution.
         if (arg == DNNL_ARG_WEIGHTS) {
diff --git a/src/gpu/intel/jit/gemm/gen_gemm.hpp b/src/gpu/intel/jit/gemm/gen_gemm.hpp
index ae7b6df8a25..20c2155a38b 100644
--- a/src/gpu/intel/jit/gemm/gen_gemm.hpp
+++ b/src/gpu/intel/jit/gemm/gen_gemm.hpp
@@ -307,11 +307,15 @@ struct gen_gemm_t : public gpu_gemm_t {
             auto &wei_scales = attr()->scales_.get(DNNL_ARG_WEIGHTS);
             auto &src_scales = attr()->scales_.get(DNNL_ARG_SRC);
 
-            if (quant_enabled_ && wei_scales.ndims_ > 1) wei_scales_2d_ = true;
-            if (quant_enabled_ && src_scales.ndims_ > 1) src_scales_2d_ = true;
+            if (quant_enabled_ && !wei_scales.has_default_groups())
+                wei_scales_2d_ = true;
+            if (quant_enabled_ && !src_scales.has_default_groups())
+                src_scales_2d_ = true;
 
             for (auto s : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) {
-                auto mask = attr()->scales_.get(s).mask_;
+                if (attr()->scales_.has_default_values(s)) continue;
+
+                auto mask = attr()->scales_.get_mask(s);
                 VDISPATCH_GEMM(utils::one_of(mask, 0, mask_scalar, mask_per_oc,
                                        mask_per_ic)
                                 || (s == DNNL_ARG_WEIGHTS && wei_scales_2d_
@@ -322,12 +326,11 @@ struct gen_gemm_t : public gpu_gemm_t {
             }
 
             if (wei_scales_2d_) {
-                auto scales_group_k
-                        = wei_scales.ndims_ > 0 ? wei_scales.group_dims_[0] : 1;
+                auto scales_group_k = wei_scales.get_group(0);
                 if (scales_group_k >= d->k()) {
                     wei_scales_2d_ = false;
                 } else {
-                    wei_scales_type = wei_scales.data_type_;
+                    wei_scales_type = wei_scales.get_data_type();
                     if (!wei_zp_2d)
                         wei_q2d_group_k = scales_group_k;
                     else {
@@ -336,14 +339,13 @@ struct gen_gemm_t : public gpu_gemm_t {
                     }
                 }
                 // Non-trivial N group unsupported.
-                VDISPATCH_GEMM(wei_scales.group_dims_[1] == 1,
+                VDISPATCH_GEMM(wei_scales.get_group(1) == 1,
                         VERBOSE_UNSUPPORTED_SCALES_CFG);
             }
             if (src_scales_2d_) {
-                src_scales_type = src_scales.data_type_;
-                src_po_sc_ = src_scales.mask_ == 2;
-                auto scales_group_k
-                        = src_scales.ndims_ > 0 ? src_scales.group_dims_[1] : 1;
+                src_scales_type = src_scales.get_data_type();
+                src_po_sc_ = src_scales.get_mask() == 2;
+                auto scales_group_k = src_scales.get_group(1);
                 if (scales_group_k >= d->k())
                     src_scales_2d_ = false;
                 else {
diff --git a/src/gpu/intel/jit/gemm/jit_gemm_pd.cpp b/src/gpu/intel/jit/gemm/jit_gemm_pd.cpp
index 20ca4105a33..a31bb52d744 100644
--- a/src/gpu/intel/jit/gemm/jit_gemm_pd.cpp
+++ b/src/gpu/intel/jit/gemm/jit_gemm_pd.cpp
@@ -92,71 +92,67 @@ status_t jit_gemm_pd_t::init_post_ops() {
     }
 
     if (!wei_scales->has_default_values()) {
-        const auto &mask = wei_scales->mask_;
+        const auto &mask = wei_scales->get_mask();
         bool convert = (mask == 0 || math::is_pow2(mask));
-        if (wei_scales->ndims_ > 1)
-            convert |= (wei_scales->group_dims_[0] >= d->k());
+        if (!wei_scales->has_default_groups())
+            convert |= (wei_scales->get_group(0) >= d->k());
         if (convert) {
-            ok = ok && (mask == 0 || mask == (1 << (d->c_desc.ndims - 1)));
-
             dim_t dims = {(mask > 0) ? d->m() : 1};
             CHECK(memory_desc_init_by_tag(wei_scales_md, 1, &dims,
-                    wei_scales->data_type_, format_tag::a));
+                    wei_scales->get_data_type(), format_tag::a));
 
-            auto status = post_ops_.prepend_binary(binary_mul, &wei_scales_md);
-            if (status != status::success) return status;
+            CHECK(post_ops_.prepend_binary(binary_mul, &wei_scales_md));
 
             binary_srcs_.insert(binary_srcs_.begin(),
                     binary_src_t {binary_src_t::scales, DNNL_ARG_WEIGHTS});
         }
     }
     if (!src_scales->has_default_values()) {
-        const auto &mask = src_scales->mask_;
+        const auto &mask = src_scales->get_mask();
         bool convert = (mask == 0);
-        if (src_scales->ndims_ > 1) {
-            convert |= (src_scales->group_dims_[1] >= d->k());
-            convert |= (src_scales->group_dims_[0] >= d->n());
+        if (!src_scales->has_default_groups()) {
+            convert |= (src_scales->get_group(1) >= d->k());
+            convert |= (src_scales->get_group(0) >= d->n());
         }
         if (convert) {
             if (mask == 0) {
                 dim_t dims = 1;
                 CHECK(memory_desc_init_by_tag(src_scales_md, 1, &dims,
-                        src_scales->data_type_, format_tag::a));
-            } else if (src_scales->ndims_ > 1) {
-                int n_group = src_scales->group_dims_[0];
-                int k_group = src_scales->group_dims_[1];
+                        src_scales->get_data_type(), format_tag::a));
+            } else if (!src_scales->has_default_groups()) {
+                // TODO: is it inverted?
+                int n_group = src_scales->get_group(0);
+                int k_group = src_scales->get_group(1);
                 dim_t dims[]
                         = {(mask & (d->batch() > 1 ? 2 : 1)) ? d->n() / n_group
                                                              : 1,
                                 d->k() / k_group};
                 CHECK(memory_desc_init_by_tag(src_scales_md, 2, dims,
-                        src_scales->data_type_, format_tag::ab));
+                        src_scales->get_data_type(), format_tag::ab));
             } else {
                 dim_t dims[] = {d->n(), 1};
                 CHECK(memory_desc_init_by_tag(src_scales_md, 2, dims,
-                        src_scales->data_type_, format_tag::ab));
+                        src_scales->get_data_type(), format_tag::ab));
             }
 
-            auto status = post_ops_.prepend_binary(binary_mul, &src_scales_md);
-            if (status != status::success) return status;
+            CHECK(post_ops_.prepend_binary(binary_mul, &src_scales_md));
 
             binary_srcs_.insert(binary_srcs_.begin(),
                     binary_src_t {binary_src_t::scales, DNNL_ARG_SRC});
         }
     }
     if (!c_scales->has_default_values()) {
-        const auto &mask = c_scales->mask_;
+        const auto &mask = c_scales->get_mask();
         bool convert = (mask == 0 || math::is_pow2(mask));
-        if (c_scales->ndims_ > 1)
-            convert |= (c_scales->group_dims_[0] >= d->m());
+        if (!c_scales->has_default_groups())
+            convert |= (c_scales->get_group(0) >= d->m());
         if (convert) {
             ok = ok && (mask == 0 || mask == (1 << (d->c_desc.ndims - 1)));
             dim_t dims = {(mask > 0) ? d->m() : 1};
             CHECK(memory_desc_init_by_tag(c_scales_md, 1, &dims,
-                    c_scales->data_type_, format_tag::a));
+                    c_scales->get_data_type(), format_tag::a));
 
-            auto status = post_ops_.append_binary(binary_div, &c_scales_md);
-            if (status != status::success) return status;
+            CHECK(post_ops_.append_binary(binary_div, &c_scales_md));
 
             binary_srcs_.push_back(
                     binary_src_t {binary_src_t::scales, DNNL_ARG_DST});
diff --git a/src/gpu/intel/jit/ir/post_ops.cpp b/src/gpu/intel/jit/ir/post_ops.cpp
index 4ca7837763a..c8b2e1544af 100644
--- a/src/gpu/intel/jit/ir/post_ops.cpp
+++ b/src/gpu/intel/jit/ir/post_ops.cpp
@@ -50,12 +50,10 @@ post_op_context_t::post_op_context_t(const primitive_attr_t &attr,
             if (buf.is_empty()) continue;
             int key = kernel_info.key(scale_args[i].first)
                     & ~DNNL_ARG_ATTR_SCALES;
-            auto scales = attr.scales_.get(key);
-            if (scales.has_default_values()) continue;
-            int mask = scales.mask_;
-            auto sc_type = scales.data_type_ == data_type::undef
-                    ? type_t::f32()
-                    : scales.data_type_;
+            if (attr.scales_.has_default_values(key)) continue;
+
+            int mask = attr.scales_.get_mask(key);
+            auto sc_type = attr.scales_.get_data_type(key);
             view_t view;
             switch (key) {
                 case DNNL_ARG_SRC:
@@ -67,8 +65,8 @@ post_op_context_t::post_op_context_t(const primitive_attr_t &attr,
                     break;
                 case DNNL_ARG_WEIGHTS:
                     // Convert o/i weights mask to src/dst.
-                    // XXX: per_oc for BWD_D is treated as per_ic assuming it's
-                    // called from deconvolution.
+                    // XXX: per_oc for BWD_D is treated as per_ic assuming
+                    // it's called from deconvolution.
                     ir_assert(utils::one_of(mask, 0, 1, 3));
                     wei_scales_type = sc_type;
                     view = po_vm_.create_view(sc_type, (mask) ? 1 << 1 : 0);
@@ -76,7 +74,7 @@ post_op_context_t::post_op_context_t(const primitive_attr_t &attr,
                     wei_scales_mask = mask;
                     break;
                 case DNNL_ARG_DST: // Invert dst scales right after load.
-                    ir_assert(utils::one_of(mask, 0, 2));
+                    ir_assert(mask == 0);
                     dst_scales_type = sc_type;
                     view = po_vm_.create_view(sc_type, mask);
                     dst_scales = add_input_tensor(view, buf);
@@ -297,7 +295,8 @@ bool post_op_context_t::init_need_to_restore_zero_padding(
             && out_md.dims[1] != out_md.padded_dims[1])
         return true;
     auto dst_scales = attr.scales_.get(DNNL_ARG_DST);
-    if (!dst_scales.has_default_values() && dst_scales.mask_ != 0) return true;
+    if (!dst_scales.has_default_values() && dst_scales.get_mask() != 0)
+        return true;
     return false;
 }
 
diff --git a/src/gpu/intel/jit/ir/tensor_config.cpp b/src/gpu/intel/jit/ir/tensor_config.cpp
index a7c5e4f7c8d..c21a198650d 100644
--- a/src/gpu/intel/jit/ir/tensor_config.cpp
+++ b/src/gpu/intel/jit/ir/tensor_config.cpp
@@ -59,9 +59,8 @@ void init_extra_tensors(const zero_points_config_t &zp_cfg,
     auto scale_args = get_scale_args();
     for (int i = 0; i < (int)scale_args.size(); i++) {
         int arg = scale_args[i].second;
-        auto &s = attr.scales_.get(arg);
-        if (s.has_default_values()) continue;
-        std::vector<dim_t> dims = {(s.mask_ == 0) ? 1 : oc};
+        if (attr.scales_.has_default_values(arg)) continue;
+        std::vector<dim_t> dims = {(attr.scales_.get_mask(arg) == 0) ? 1 : oc};
         layout_t layout(type_t::f32(), 0, dims);
         int arg_key = DNNL_ARG_ATTR_SCALES | arg;
         tensor_cfg.add_tensor(scale_args[i].first, arg_key, /*is_input=*/true,
diff --git a/src/gpu/intel/jit/reorder/gen_reorder.cpp b/src/gpu/intel/jit/reorder/gen_reorder.cpp
index 974b35210a8..fe34e153b02 100644
--- a/src/gpu/intel/jit/reorder/gen_reorder.cpp
+++ b/src/gpu/intel/jit/reorder/gen_reorder.cpp
@@ -58,8 +58,13 @@ status_t gen_reorder_t::pd_t::init(impl::engine_t *engine,
         return true;
     };
     auto scales_ok = [&]() {
-        return (attr()->scales_.get(DNNL_ARG_SRC).mask_ == 0)
-                && (attr()->scales_.get(DNNL_ARG_DST).mask_ == 0);
+        const bool src_scale_ok
+                = attr()->scales_.has_default_values(DNNL_ARG_SRC)
+                || attr()->scales_.get_mask(DNNL_ARG_SRC) == 0;
+        const bool dst_scale_ok
+                = attr()->scales_.has_default_values(DNNL_ARG_DST)
+                || attr()->scales_.get_mask(DNNL_ARG_DST) == 0;
+        return src_scale_ok && dst_scale_ok;
     };
     auto is_bf16_or_f32_or_f8 = [](data_type_t dt) {
         return utils::one_of(dt, bf16, f32, f8_e5m2, f8_e4m3);
diff --git a/src/gpu/intel/ocl/convolution_inner_product.cpp b/src/gpu/intel/ocl/convolution_inner_product.cpp
index b86c67e17c2..a103c85320a 100644
--- a/src/gpu/intel/ocl/convolution_inner_product.cpp
+++ b/src/gpu/intel/ocl/convolution_inner_product.cpp
@@ -198,7 +198,7 @@ status_t convolution_inner_product_fwd_t::execute_forward(
 
     const auto &args = ctx.args();
     for (const int arg : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) {
-        if (pd()->attr()->scales_.get(arg).has_default_values()) continue;
+        if (pd()->attr()->scales_.has_default_values(arg)) continue;
 
         c_args[DNNL_ARG_ATTR_SCALES | arg]
                 = args.at(DNNL_ARG_ATTR_SCALES | arg);
diff --git a/src/gpu/intel/ocl/gemm/gemm_with_post_ops.cpp b/src/gpu/intel/ocl/gemm/gemm_with_post_ops.cpp
index f3542d69934..3b9361eee05 100644
--- a/src/gpu/intel/ocl/gemm/gemm_with_post_ops.cpp
+++ b/src/gpu/intel/ocl/gemm/gemm_with_post_ops.cpp
@@ -46,7 +46,9 @@ status_t gemm_with_post_ops_t::pd_t::init(impl::engine_t *engine) {
 
     const primitive_attr_t *attributes_with_po = attr();
     for (int arg : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) {
-        const auto &mask = attr()->scales_.get(arg).mask_;
+        if (attr()->scales_.has_default_values(arg)) continue;
+
+        const auto &mask = attr()->scales_.get_mask(arg);
         if (arg == DNNL_ARG_WEIGHTS && !wei_decomp)
             VDISPATCH_GEMM((mask == 0 || mask == (1 << (dst_md()->ndims - 1))),
                     VERBOSE_UNSUPPORTED_SCALES_CFG);
@@ -88,7 +90,7 @@ status_t gemm_with_post_ops_t::pd_t::init(impl::engine_t *engine) {
     // Setup empty attributes but keep zero points for gemm.
     primitive_attr_t attributes_without_po = *attr();
     attributes_without_po.set_post_ops(post_ops_t());
-    attributes_without_po.scales_ = arg_scales_t();
+    attributes_without_po.scales_ = scales_t();
     attributes_without_po.zero_points_ = zero_points_t();
     int src_mask, wei_mask;
     auto zp = attributes_with_po->zero_points_;
@@ -158,12 +160,10 @@ status_t gemm_with_post_ops_t::pd_t::init_kernel_ctx(
     kernel_ctx.set_data_type(c_type);
 
     const auto &attr_scales = attr()->scales_;
-    const bool with_src_scales
-            = !attr_scales.get(DNNL_ARG_SRC).has_default_values();
+    const bool with_src_scales = !attr_scales.has_default_values(DNNL_ARG_SRC);
     const bool with_wei_scales
-            = !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_values();
-    const bool with_dst_scales
-            = !attr_scales.get(DNNL_ARG_DST).has_default_values();
+            = !attr_scales.has_default_values(DNNL_ARG_WEIGHTS);
+    const bool with_dst_scales = !attr_scales.has_default_values(DNNL_ARG_DST);
     auto is_int_type = [](data_type_t t) {
         return utils::one_of(t, data_type::s8, data_type::u8, data_type::s32);
     };
@@ -250,7 +250,7 @@ status_t gemm_with_post_ops_t::execute(const gemm_exec_ctx_t &ctx) const {
     arg_list.set(idx++, GEMM_CTX_ARG_STORAGE(a_scales));
     arg_list.set(idx++, GEMM_CTX_ARG_STORAGE(c_scales));
     arg_list.set(idx++,
-            pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ != 0 ? 1 : 0);
+            pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) > 0 ? 1 : 0);
     arg_list.set(idx, GEMM_CTX_ARG_STORAGE(c_zero_point));
     auto nd_range = pd()->dispatch_.nd_range();
     exec_status = parallel_for(ctx, nd_range, post_process_kernel_, arg_list);
diff --git a/src/gpu/intel/ocl/gemm/ref_gemm.hpp b/src/gpu/intel/ocl/gemm/ref_gemm.hpp
index b3452437ea7..efb6f412fd4 100644
--- a/src/gpu/intel/ocl/gemm/ref_gemm.hpp
+++ b/src/gpu/intel/ocl/gemm/ref_gemm.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -136,9 +136,14 @@ struct ref_gemm_t : public gpu_gemm_t {
 
         bool attr_oscale_ok() const {
             const auto &scales = attr()->scales_;
-            return scales.get(DNNL_ARG_SRC).mask_ == 0
-                    && scales.get(DNNL_ARG_WEIGHTS).mask_ == 0
-                    && scales.get(DNNL_ARG_DST).mask_ == 0;
+            const bool src_scale_ok = scales.has_default_values(DNNL_ARG_SRC)
+                    || scales.get_mask(DNNL_ARG_SRC) == 0;
+            const bool wei_scale_ok
+                    = scales.has_default_values(DNNL_ARG_WEIGHTS)
+                    || scales.get_mask(DNNL_ARG_WEIGHTS) == 0;
+            const bool dst_scale_ok = scales.has_default_values(DNNL_ARG_DST)
+                    || scales.get_mask(DNNL_ARG_DST) == 0;
+            return src_scale_ok && wei_scale_ok && dst_scale_ok;
         }
 
         bool attr_zp_ok() const {
diff --git a/src/gpu/intel/ocl/gemm_inner_product.hpp b/src/gpu/intel/ocl/gemm_inner_product.hpp
index 6e8d36b80b0..d7e8810595a 100644
--- a/src/gpu/intel/ocl/gemm_inner_product.hpp
+++ b/src/gpu/intel/ocl/gemm_inner_product.hpp
@@ -95,14 +95,19 @@ struct gemm_inner_product_fwd_t : public gpu_primitive_t {
                         "memory_desc_reshape()");
             }
             primitive_attr_t gemm_attr = *attr();
-            auto wei_mask = gemm_attr.scales_.get(DNNL_ARG_WEIGHTS).mask_;
-            if (wei_mask == 1) //transpose mask for gemm
-                VDISPATCH_INNER_PRODUCT_SC(
-                        gemm_attr.scales_.set(
-                                DNNL_ARG_WEIGHTS, 1 << (b_md.ndims - 1)),
-                        VERBOSE_UNSUPPORTED_ATTR);
-            else if (wei_mask != 0)
-                return status::unimplemented;
+            if (!gemm_attr.scales_.has_default_values(DNNL_ARG_WEIGHTS)) {
+                auto wei_mask = gemm_attr.scales_.get_mask(DNNL_ARG_WEIGHTS);
+                if (wei_mask == 1) {
+                    // Transpose the mask for gemm.
+                    VDISPATCH_INNER_PRODUCT_SC(
+                            gemm_attr.scales_.set(
+                                    DNNL_ARG_WEIGHTS, 1 << (b_md.ndims - 1)),
+                            VERBOSE_UNSUPPORTED_SCALES_CFG);
+                } else {
+                    VDISPATCH_INNER_PRODUCT(
+                            wei_mask == 0, VERBOSE_UNSUPPORTED_ATTR);
+                }
+            }
             VDISPATCH_INNER_PRODUCT_SC(
                     create_gemm_pd(gemm_pd_, engine, &a_md, &b_md, &c_md,
                             &bias_md, desc()->accum_data_type, &gemm_attr,
diff --git a/src/gpu/intel/ocl/gemm_matmul.hpp b/src/gpu/intel/ocl/gemm_matmul.hpp
index 5b1e19cbaf3..24e717e56b0 100644
--- a/src/gpu/intel/ocl/gemm_matmul.hpp
+++ b/src/gpu/intel/ocl/gemm_matmul.hpp
@@ -70,17 +70,25 @@ struct gemm_matmul_t : public gpu_primitive_t {
                 return status::success;
             };
 
-            auto adjust_scales_mask = [&](arg_scales_t &scales, int arg,
-                                              int diff_dims) {
-                int mask = 0, nd = 0;
-                bool is_set = false;
-                data_type_t dt = dnnl_data_type_undef;
-                dims_t dims = {};
-                CHECK(attr()->scales_.get(arg, &mask, &is_set, &nd, dims, &dt));
-                mask = mask >> diff_dims;
-                if (is_set) { CHECK(scales.set(arg, mask, nd, dims, dt)); }
-                return status::success;
-            };
+            // The function shrinks the mask for scales and updates it in
+            // `scales` object.
+            auto adjust_scales_mask
+                    = [&](scales_t &scales, int arg, int diff_dims) {
+                          if (attr()->scales_.has_default_values(arg))
+                              return status::success;
+
+                          int mask = attr()->scales_.get_mask(arg) >> diff_dims;
+                          data_type_t dt = attr()->scales_.get_data_type(arg);
+                          int nd = 0;
+                          dims_t dims {};
+                          if (!attr()->scales_.get(arg).has_default_groups()) {
+                              nd = 2; // Note: hardcoded so far.
+                              dims[0] = attr()->scales_.get_group(arg, 0);
+                              dims[1] = attr()->scales_.get_group(arg, 1);
+                          }
+                          CHECK(scales.set(arg, mask, dt, nd, dims));
+                          return status::success;
+                      };
             if (!attr()->zero_points_.has_default_values()) {
                 CHECK(map_gemm_zp(DNNL_ARG_SRC, DNNL_ARG_B));
                 CHECK(map_gemm_zp(
diff --git a/src/gpu/intel/ocl/gen9_binary.hpp b/src/gpu/intel/ocl/gen9_binary.hpp
index 2ef8df1c52e..40b256f274f 100644
--- a/src/gpu/intel/ocl/gen9_binary.hpp
+++ b/src/gpu/intel/ocl/gen9_binary.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -63,13 +63,17 @@ struct gen9_binary_t : public gpu_primitive_t {
             VDISPATCH_BINARY(!is_ternary_op(), VERBOSE_BAD_ALGORITHM);
             VDISPATCH_BINARY(
                     IMPLICATION(!attr()->scales_.has_default_values(),
-                            utils::one_of(dst_md()->data_type, s8, u8)
-                                    && utils::everyone_is(
-                                            attr()->scales_.get(DNNL_ARG_SRC_0)
-                                                    .mask_,
-                                            attr()->scales_.get(DNNL_ARG_SRC_1)
-                                                    .mask_,
-                                            0)),
+                            utils::one_of(dst_md()->data_type, s8, u8)),
+                    VERBOSE_UNSUPPORTED_SCALES_CFG);
+            VDISPATCH_BINARY(
+                    IMPLICATION(!attr()->scales_.get(DNNL_ARG_SRC_0)
+                                         .has_default_values(),
+                            attr()->scales_.get_mask(DNNL_ARG_SRC_0) == 0),
+                    VERBOSE_UNSUPPORTED_SCALES_CFG);
+            VDISPATCH_BINARY(
+                    IMPLICATION(!attr()->scales_.get(DNNL_ARG_SRC_1)
+                                         .has_default_values(),
+                            attr()->scales_.get_mask(DNNL_ARG_SRC_1) == 0),
                     VERBOSE_UNSUPPORTED_SCALES_CFG);
             VDISPATCH_BINARY(attr()->has_default_values(attr_skip_mask),
                     VERBOSE_UNSUPPORTED_ATTR);
diff --git a/src/gpu/intel/ocl/gen9_softmax.hpp b/src/gpu/intel/ocl/gen9_softmax.hpp
index f7a1880e1bd..0d2a6dd0913 100644
--- a/src/gpu/intel/ocl/gen9_softmax.hpp
+++ b/src/gpu/intel/ocl/gen9_softmax.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -215,9 +215,9 @@ struct gen9_softmax_fwd_t : public gpu_primitive_t {
         kernel_ctx.add_option("-cl-std=CL2.0");
         kernel_ctx.define_int("LOGSOFTMAX", pd()->is_logsoftmax());
         kernel_ctx.define_int("WITH_SRC_SCALES",
-                !pd()->attr()->scales_.get(DNNL_ARG_SRC).has_default_values());
+                !pd()->attr()->scales_.has_default_values(DNNL_ARG_SRC));
         kernel_ctx.define_int("WITH_DST_SCALES",
-                !pd()->attr()->scales_.get(DNNL_ARG_DST).has_default_values());
+                !pd()->attr()->scales_.has_default_values(DNNL_ARG_DST));
 
         const memory_desc_wrapper dst_mdw(pd()->dst_md());
         const memory_desc_wrapper src_mdw(pd()->src_md());
diff --git a/src/gpu/intel/ocl/generic_reorder.cpp b/src/gpu/intel/ocl/generic_reorder.cpp
index 3094556cef4..cdd592889dc 100644
--- a/src/gpu/intel/ocl/generic_reorder.cpp
+++ b/src/gpu/intel/ocl/generic_reorder.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2021-2024 Intel Corporation
+* Copyright 2021-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -791,8 +791,8 @@ status_t generic_reorder_t::pd_t::init_conf(impl::engine_t *engine) {
     memcpy(&new_a, src_md(), sizeof(new_a));
     memcpy(&new_b, dst_md(), sizeof(new_b));
     compress(new_a, new_b, src_mask, dst_mask);
-    if (src_mask) CHECK(attr_copy.scales_.set(DNNL_ARG_SRC, src_mask));
-    if (dst_mask) CHECK(attr_copy.scales_.set(DNNL_ARG_DST, dst_mask));
+    if (src_mask >= 0) { CHECK(attr_copy.scales_.set(DNNL_ARG_SRC, src_mask)); }
+    if (dst_mask >= 0) { CHECK(attr_copy.scales_.set(DNNL_ARG_DST, dst_mask)); }
 
     if (!is_generic_faster_than_ref(new_a, new_b)) return status::unimplemented;
 
diff --git a/src/gpu/intel/ocl/micro_sdpa.cpp b/src/gpu/intel/ocl/micro_sdpa.cpp
index 41f007f3acb..75851ffbf06 100644
--- a/src/gpu/intel/ocl/micro_sdpa.cpp
+++ b/src/gpu/intel/ocl/micro_sdpa.cpp
@@ -203,10 +203,11 @@ sdpa_config_t *choose_config_xehpc(
 ///   |  3 (1100) | true    |
 ///   |  1 (1000) | true    |
 ///   |  8 (0001) | false   |
-bool with_quantize_common(const runtime_scales_t &scales) {
-    return !scales.has_default_values()
-            && (((scales.mask_ & 3) != 0 && (scales.mask_ & 12) == 0)
-                    || scales.mask_ == 0);
+bool with_quantize_common(const quant_entry_t &scale_entry) {
+    return !scale_entry.has_default_values()
+            && (((scale_entry.get_mask() & 3) != 0
+                        && (scale_entry.get_mask() & 12) == 0)
+                    || scale_entry.get_mask() == 0);
 }
 
 /// Returns true if a common zero points value is used for each slice of the
diff --git a/src/gpu/intel/ocl/micro_sdpa.hpp b/src/gpu/intel/ocl/micro_sdpa.hpp
index 5942b4ea2a9..0e2f748ed8a 100644
--- a/src/gpu/intel/ocl/micro_sdpa.hpp
+++ b/src/gpu/intel/ocl/micro_sdpa.hpp
@@ -92,7 +92,7 @@ struct micro_sdpa_t : public gpu_primitive_t {
                     "tensors",
                     qry_md()->dims[1], key_md()->dims[1], val_md()->dims[1]);
 
-            int kq_scales_mask = desc()->kq_scales.mask_;
+            int kq_scales_mask = desc()->kq_scales.get_mask();
             int kq_zp_mask = desc()->kq_zero_points.get(DNNL_ARG_WEIGHTS);
             if (!desc()->kq_scales.has_default_values()
                     && !desc()->kq_zero_points.has_default_values())
@@ -119,7 +119,7 @@ struct micro_sdpa_t : public gpu_primitive_t {
                         key_group_size());
             }
 
-            int vs_scales_mask = desc()->vs_scales.mask_;
+            int vs_scales_mask = desc()->vs_scales.get_mask();
             int vs_zp_mask = desc()->vs_zero_points.get(DNNL_ARG_WEIGHTS);
             if (!desc()->vs_scales.has_default_values()
                     && !desc()->vs_zero_points.has_default_values())
diff --git a/src/gpu/intel/ocl/multi_po_reorder_binary.hpp b/src/gpu/intel/ocl/multi_po_reorder_binary.hpp
index e10b1f4e961..4b9c73a0444 100644
--- a/src/gpu/intel/ocl/multi_po_reorder_binary.hpp
+++ b/src/gpu/intel/ocl/multi_po_reorder_binary.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023-2024 Intel Corporation
+* Copyright 2023-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -41,8 +41,8 @@ struct multi_po_reorder_binary : public gpu_primitive_t {
         DECLARE_COMMON_PD_T("multi_po_reorder_binary", multi_po_reorder_binary);
 
         status_t init(impl::engine_t *engine) {
-            if (attr()->scales_.get(DNNL_ARG_SRC_0).is_set_
-                    || attr()->scales_.get(DNNL_ARG_SRC_1).is_set_
+            if (!attr()->scales_.get(DNNL_ARG_SRC_0).has_default_values()
+                    || !attr()->scales_.get(DNNL_ARG_SRC_1).has_default_values()
                     || attr()->post_ops_.len() >= 1) {
                 VDISPATCH_BINARY(false, VERBOSE_UNSUPPORTED_ATTR);
             }
diff --git a/src/gpu/intel/ocl/ref_group_normalization.cpp b/src/gpu/intel/ocl/ref_group_normalization.cpp
index a5e5f14121d..4cd8dded952 100644
--- a/src/gpu/intel/ocl/ref_group_normalization.cpp
+++ b/src/gpu/intel/ocl/ref_group_normalization.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -108,9 +108,9 @@ status_t ref_group_normalization_fwd_t::pd_t::init_kernel_ctx(
         compute::kernel_ctx_t &kernel_ctx) const {
 
     kernel_ctx.define_int("WITH_SRC_SCALES",
-            !attr()->scales_.get(DNNL_ARG_SRC).has_default_values());
+            !attr()->scales_.has_default_values(DNNL_ARG_SRC));
     kernel_ctx.define_int("WITH_DST_SCALES",
-            !attr()->scales_.get(DNNL_ARG_DST).has_default_values());
+            !attr()->scales_.has_default_values(DNNL_ARG_DST));
     init_kernel_ctx_common(kernel_ctx, this);
 
     // promote macros defined by parameters to OpenCL command line
diff --git a/src/gpu/intel/ocl/ref_layer_normalization.hpp b/src/gpu/intel/ocl/ref_layer_normalization.hpp
index 45fb291964a..13d50f23d2d 100644
--- a/src/gpu/intel/ocl/ref_layer_normalization.hpp
+++ b/src/gpu/intel/ocl/ref_layer_normalization.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -93,9 +93,9 @@ struct ref_layer_normalization_fwd_t : public gpu_primitive_t {
         CHECK(status);
 
         kernel_ctx.define_int("WITH_SRC_SCALES",
-                !pd()->attr()->scales_.get(DNNL_ARG_SRC).has_default_values());
+                !pd()->attr()->scales_.has_default_values(DNNL_ARG_SRC));
         kernel_ctx.define_int("WITH_DST_SCALES",
-                !pd()->attr()->scales_.get(DNNL_ARG_DST).has_default_values());
+                !pd()->attr()->scales_.has_default_values(DNNL_ARG_DST));
 
         CHECK(create_kernel(engine, &kernel_, "ref_lnorm_fwd", kernel_ctx));
         if (!kernel_) return status::runtime_error;
diff --git a/src/gpu/intel/ocl/ref_matmul.cpp b/src/gpu/intel/ocl/ref_matmul.cpp
index 8fa152073d5..62ab19de3cf 100644
--- a/src/gpu/intel/ocl/ref_matmul.cpp
+++ b/src/gpu/intel/ocl/ref_matmul.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -84,15 +84,13 @@ status_t ref_matmul_t::execute_ref(const exec_ctx_t &ctx) const {
     const dim_t K = a_d.dims()[last];
 
     const auto &attr_scales = pd()->attr()->scales_;
-    const int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_;
+    const int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS);
     const bool wei_scale_per_k = wei_scale_mask & pd()->wei_qmask_K();
-    const auto wei_scale_group_ndim = attr_scales.get(DNNL_ARG_WEIGHTS).ndims_;
-    const auto wei_scale_group_k = wei_scale_group_ndim > 0
-            ? attr_scales.get(DNNL_ARG_WEIGHTS).group_dims_[0]
+    const auto wei_scale_group_k
+            = !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_groups()
+            ? attr_scales.get_group(DNNL_ARG_WEIGHTS, 0)
             : (wei_scale_per_k ? 1 : K);
-    const auto wei_scale_group_n = wei_scale_group_ndim > 0
-            ? attr_scales.get(DNNL_ARG_WEIGHTS).group_dims_[1]
-            : 1;
+    const auto wei_scale_group_n = attr_scales.get_group(DNNL_ARG_WEIGHTS, 1);
     const auto wei_scale_ngroups_k = K / wei_scale_group_k;
     // Identify wei_scales dimensions as user may not pass them.
     dims_t wei_scale_dims {};
@@ -120,11 +118,11 @@ status_t ref_matmul_t::execute_ref(const exec_ctx_t &ctx) const {
     const dim_t wei_scale_stride_b1
             = b_d.ndims() > 3 ? wei_scale_strides[b_d.ndims() - 4] : 0;
 
-    const int src_scale_mask = attr_scales.get(DNNL_ARG_SRC).mask_;
+    const int src_scale_mask = attr_scales.get_mask(DNNL_ARG_SRC);
     const bool src_scale_per_k = src_scale_mask & pd()->src_qmask_K();
-    const auto src_scale_group_ndim = attr_scales.get(DNNL_ARG_SRC).ndims_;
-    const auto src_scale_group_k = src_scale_group_ndim > 0
-            ? attr_scales.get(DNNL_ARG_SRC).group_dims_[1]
+    const auto src_scale_group_k
+            = !attr_scales.get(DNNL_ARG_SRC).has_default_groups()
+            ? attr_scales.get_group(DNNL_ARG_SRC, 1)
             : (src_scale_per_k ? 1 : K);
     const auto src_scale_ngroups_k = K / src_scale_group_k;
     // Identify src_scales dimensions as user may not pass them.
diff --git a/src/gpu/intel/ocl/ref_matmul.hpp b/src/gpu/intel/ocl/ref_matmul.hpp
index 428f0289c46..b20ac69a940 100644
--- a/src/gpu/intel/ocl/ref_matmul.hpp
+++ b/src/gpu/intel/ocl/ref_matmul.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -230,19 +230,19 @@ struct ref_matmul_t : public gpu_primitive_t {
         def_data_type(kernel_ctx, pd()->bia_dt_, "BIA");
         def_data_type(kernel_ctx, pd()->desc()->accum_data_type, "ACC");
         def_data_type(kernel_ctx,
-                pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).data_type_,
+                pd()->attr()->scales_.get_data_type(DNNL_ARG_WEIGHTS),
                 "WEI_SCALES");
         def_data_type(kernel_ctx,
                 pd()->attr()->zero_points_.get_data_type(DNNL_ARG_WEIGHTS),
                 "WEI_ZP");
         def_data_type(kernel_ctx,
-                pd()->attr()->scales_.get(DNNL_ARG_SRC).data_type_,
+                pd()->attr()->scales_.get_data_type(DNNL_ARG_SRC),
                 "SRC_SCALES");
         def_data_type(kernel_ctx,
                 pd()->attr()->zero_points_.get_data_type(DNNL_ARG_SRC),
                 "SRC_ZP");
         def_data_type(kernel_ctx,
-                pd()->attr()->scales_.get(DNNL_ARG_DST).data_type_,
+                pd()->attr()->scales_.get_data_type(DNNL_ARG_DST),
                 "DST_SCALES");
         kernels_.resize(2);
         CHECK(create_kernel(engine, &kernels_[0], "ref_matmul", kernel_ctx));
diff --git a/src/gpu/intel/ocl/reusable_lnorm.cpp b/src/gpu/intel/ocl/reusable_lnorm.cpp
index 77b2c65d9cf..35fe364e7e9 100644
--- a/src/gpu/intel/ocl/reusable_lnorm.cpp
+++ b/src/gpu/intel/ocl/reusable_lnorm.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023-2024 Intel Corporation
+* Copyright 2023-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -49,8 +49,8 @@ static status_t init_conf_common(const layer_normalization_pd_t *pd,
     conf->dst_dt = dst_buf.data_type;
 
     auto scales = pd->attr()->scales_;
-    conf->with_src_scale = !scales.get(DNNL_ARG_SRC).has_default_values();
-    conf->with_dst_scale = !scales.get(DNNL_ARG_DST).has_default_values();
+    conf->with_src_scale = !scales.has_default_values(DNNL_ARG_SRC);
+    conf->with_dst_scale = !scales.has_default_values(DNNL_ARG_DST);
 
     // We require that the lnorm axis is a single dense block, so that it can
     // be represented by a stride + size alone.
diff --git a/src/gpu/intel/ocl/simple_binary.hpp b/src/gpu/intel/ocl/simple_binary.hpp
index 013517a71f7..a166c558437 100644
--- a/src/gpu/intel/ocl/simple_binary.hpp
+++ b/src/gpu/intel/ocl/simple_binary.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -97,7 +97,7 @@ struct simple_binary_t : public gpu_primitive_t {
         status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const;
 
         bool with_scales(int position) const {
-            return !attr()->scales_.get(position).has_default_values();
+            return !attr()->scales_.has_default_values(position);
         }
 
         bool with_scales() const {
diff --git a/src/gpu/intel/ocl/simple_layer_normalization.hpp b/src/gpu/intel/ocl/simple_layer_normalization.hpp
index 2719bd4491d..f225b19f30a 100644
--- a/src/gpu/intel/ocl/simple_layer_normalization.hpp
+++ b/src/gpu/intel/ocl/simple_layer_normalization.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -92,9 +92,9 @@ struct simple_layer_normalization_fwd_t : public gpu_primitive_t {
         CHECK(status);
 
         kernel_ctx.define_int("WITH_SRC_SCALES",
-                !pd()->attr()->scales_.get(DNNL_ARG_SRC).has_default_values());
+                !pd()->attr()->scales_.has_default_values(DNNL_ARG_SRC));
         kernel_ctx.define_int("WITH_DST_SCALES",
-                !pd()->attr()->scales_.get(DNNL_ARG_DST).has_default_values());
+                !pd()->attr()->scales_.has_default_values(DNNL_ARG_DST));
 
         CHECK(create_kernel(engine, &kernel_, "simple_lnorm_fwd", kernel_ctx));
         if (!kernel_) return status::runtime_error;
diff --git a/src/gpu/intel/ocl/simple_softmax.hpp b/src/gpu/intel/ocl/simple_softmax.hpp
index 49ed31d9087..e80fcabf5ce 100644
--- a/src/gpu/intel/ocl/simple_softmax.hpp
+++ b/src/gpu/intel/ocl/simple_softmax.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -140,9 +140,9 @@ struct simple_softmax_fwd_t : public gpu_primitive_t {
         kernel_ctx.add_option("-cl-std=CL2.0");
         kernel_ctx.define_int("LOGSOFTMAX", pd()->is_logsoftmax());
         kernel_ctx.define_int("WITH_SRC_SCALES",
-                !pd()->attr()->scales_.get(DNNL_ARG_SRC).has_default_values());
+                !pd()->attr()->scales_.has_default_values(DNNL_ARG_SRC));
         kernel_ctx.define_int("WITH_DST_SCALES",
-                !pd()->attr()->scales_.get(DNNL_ARG_DST).has_default_values());
+                !pd()->attr()->scales_.has_default_values(DNNL_ARG_DST));
 
         const memory_desc_wrapper dst_mdw(pd()->dst_md());
         const memory_desc_wrapper src_mdw(pd()->src_md());
diff --git a/src/gpu/intel/ocl/vectorized_lnorm.hpp b/src/gpu/intel/ocl/vectorized_lnorm.hpp
index b7afb6c3bbf..20278a030b4 100644
--- a/src/gpu/intel/ocl/vectorized_lnorm.hpp
+++ b/src/gpu/intel/ocl/vectorized_lnorm.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -89,9 +89,9 @@ struct vectorized_lnorm_fwd_t : public gpu_primitive_t {
         CHECK(status);
 
         kernel_ctx.define_int("WITH_SRC_SCALES",
-                !pd()->attr()->scales_.get(DNNL_ARG_SRC).has_default_values());
+                !pd()->attr()->scales_.has_default_values(DNNL_ARG_SRC));
         kernel_ctx.define_int("WITH_DST_SCALES",
-                !pd()->attr()->scales_.get(DNNL_ARG_DST).has_default_values());
+                !pd()->attr()->scales_.has_default_values(DNNL_ARG_DST));
 
         CHECK(create_kernel(
                 engine, &kernel_, "vectorized_lnorm_fwd", kernel_ctx));
diff --git a/src/gpu/intel/primitive_conf.cpp b/src/gpu/intel/primitive_conf.cpp
index 0523dc0fbbe..cd5540ae406 100644
--- a/src/gpu/intel/primitive_conf.cpp
+++ b/src/gpu/intel/primitive_conf.cpp
@@ -104,22 +104,23 @@ attr_info_t attr_info_t::create(const primitive_attr_t *attr) {
     const auto &src_scales = attr->scales_.get(DNNL_ARG_SRC);
     attr_info.with_src_scales = !src_scales.has_default_values();
     attr_info.with_src0_scale = !src_scales.has_default_values();
-    attr_info.src_scales_mask = src_scales.mask_;
-    attr_info.src_scales_data_type = src_scales.data_type_;
+    attr_info.src_scales_data_type = src_scales.get_data_type();
 
     const auto &src1_scales = attr->scales_.get(DNNL_ARG_SRC_1);
     attr_info.with_src1_scale = !src1_scales.has_default_values();
-    gpu_assert(src1_scales.mask_ == 0);
+    if (attr_info.with_src1_scale) { gpu_assert(src1_scales.get_mask() == 0); }
 
     const auto &wei_scales = attr->scales_.get(DNNL_ARG_WEIGHTS);
     attr_info.with_wei_scales = !wei_scales.has_default_values();
-    attr_info.wei_scales_mask = wei_scales.mask_;
-    attr_info.wei_scales_data_type = wei_scales.data_type_;
+    // TODO: remove the default `0` value.
+    attr_info.wei_scales_mask
+            = attr_info.with_wei_scales ? wei_scales.get_mask() : 0;
+    attr_info.wei_scales_data_type = wei_scales.get_data_type();
 
     const auto &dst_scales = attr->scales_.get(DNNL_ARG_DST);
     attr_info.with_dst_scales = !dst_scales.has_default_values();
-    attr_info.dst_scales_mask = dst_scales.mask_;
-    attr_info.dst_scales_data_type = dst_scales.data_type_;
+    attr_info.dst_scales_mask = dst_scales.get_mask();
+    attr_info.dst_scales_data_type = dst_scales.get_data_type();
 
     // zero points
     const auto &zp = attr->zero_points_;
@@ -827,7 +828,6 @@ status_t def_attr_info_impl(compute::kernel_ctx_t &kernel_ctx,
     kernel_ctx.define_int("WITH_SRC_SCALES", attr_info.with_src_scales);
     kernel_ctx.define_int("WITH_WEI_SCALES", attr_info.with_wei_scales);
     kernel_ctx.define_int("WITH_DST_SCALES", attr_info.with_dst_scales);
-    kernel_ctx.define_int("SRC_SCALES_MASK", attr_info.src_scales_mask);
     kernel_ctx.define_int("WEI_SCALES_MASK", attr_info.wei_scales_mask);
     kernel_ctx.define_int("DST_SCALES_MASK", attr_info.dst_scales_mask);
     def_data_type(kernel_ctx, attr_info.src_scales_data_type, "SRC_SCALES",
diff --git a/src/gpu/intel/primitive_conf.hpp b/src/gpu/intel/primitive_conf.hpp
index b4684dc274e..2fcf821a27b 100644
--- a/src/gpu/intel/primitive_conf.hpp
+++ b/src/gpu/intel/primitive_conf.hpp
@@ -83,9 +83,8 @@ struct attr_info_t {
     bool with_src_scales;
     bool with_wei_scales;
     bool with_dst_scales;
-    bool src_scales_mask;
-    bool wei_scales_mask;
-    bool dst_scales_mask;
+    int wei_scales_mask;
+    int dst_scales_mask;
     data_type_t src_scales_data_type;
     data_type_t wei_scales_data_type;
     data_type_t dst_scales_data_type;
diff --git a/src/gpu/nvidia/cudnn_binary.hpp b/src/gpu/nvidia/cudnn_binary.hpp
index 4bba9c0ba6b..8226b6832cc 100644
--- a/src/gpu/nvidia/cudnn_binary.hpp
+++ b/src/gpu/nvidia/cudnn_binary.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 * Copyright 2020 Codeplay Software Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@@ -66,12 +66,7 @@ struct cudnn_binary_t : public gpu::primitive_t {
                     || has_zero_dims(dst_md()->dims, dst_md()->ndims);
         }
 
-        bool check_scales_mask() const {
-            for (const auto &s : attr()->scales_.scales_) {
-                if (s.second.mask_ != 0) return false;
-            }
-            return true;
-        }
+        bool check_scales_mask() const { return attr_scales_ok(); }
 
         bool check_no_blocking() const {
             // Blocking is not supported by cudnnOpTensor, return false if any
diff --git a/src/gpu/nvidia/cudnn_convolution.hpp b/src/gpu/nvidia/cudnn_convolution.hpp
index 329081c4633..07154104e86 100644
--- a/src/gpu/nvidia/cudnn_convolution.hpp
+++ b/src/gpu/nvidia/cudnn_convolution.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 * Copyright 2020 Codeplay Software Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@@ -155,15 +155,17 @@ struct cudnn_convolution_fwd_t : public gpu::primitive_t {
                     && ndims() < 5;
         }
 
-        bool attr_scales_ok() const {
-            const auto &scales = attr()->scales_;
-            const auto &supported_args
-                    = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST};
-            if (!scales.has_default_values(supported_args)) return false;
-            // cuDNN does not support scaling per dimension.
-            for (auto arg : supported_args)
-                if (scales.get(arg).mask_ != 0) return false;
-            return true;
+        bool attr_scales_ok(const std::vector<int> &supported_args
+                = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) const {
+            bool ok = attr()->scales_.has_default_values(supported_args);
+            for (int arg : supported_args) {
+                if (attr()->scales_.has_default_values(arg)) continue;
+
+                const auto &mask = attr()->scales_.get_mask(arg);
+                // cuDNN does not support scaling per dimension.
+                ok = ok && (mask == 0);
+            }
+            return ok;
         }
     };
 
diff --git a/src/gpu/nvidia/cudnn_convolution_impl.hpp b/src/gpu/nvidia/cudnn_convolution_impl.hpp
index c6b04245c3a..e0fb47e796b 100644
--- a/src/gpu/nvidia/cudnn_convolution_impl.hpp
+++ b/src/gpu/nvidia/cudnn_convolution_impl.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 * Copyright 2020 Codeplay Software Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@@ -143,13 +143,10 @@ struct cudnn_convolution_impl_base_t
         with_bias = pd->with_bias();
         beta = 0.0f;
         do_scaling = !pd->attr()->scales_.has_default_values();
-        do_dst_scaling
-                = !pd->attr()->scales_.get(DNNL_ARG_DST).has_default_values();
-        do_src_scaling
-                = !pd->attr()->scales_.get(DNNL_ARG_SRC).has_default_values();
-        do_wei_scaling = !pd->attr()
-                                  ->scales_.get(DNNL_ARG_WEIGHTS)
-                                  .has_default_values();
+        do_dst_scaling = !pd->attr()->scales_.has_default_values(DNNL_ARG_DST);
+        do_src_scaling = !pd->attr()->scales_.has_default_values(DNNL_ARG_SRC);
+        do_wei_scaling
+                = !pd->attr()->scales_.has_default_values(DNNL_ARG_WEIGHTS);
         dnnl_descs[x] = *pd->invariant_src_md();
         dnnl_descs[weights] = *pd->invariant_wei_md();
         dnnl_descs[y] = *pd->invariant_dst_md();
diff --git a/src/gpu/nvidia/cudnn_inner_product.hpp b/src/gpu/nvidia/cudnn_inner_product.hpp
index b4d5c7aa438..a6ec15d578d 100644
--- a/src/gpu/nvidia/cudnn_inner_product.hpp
+++ b/src/gpu/nvidia/cudnn_inner_product.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 * Copyright 2020 Codeplay Software Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@@ -39,15 +39,17 @@ struct cudnn_inner_product_fwd_t : public gpu::primitive_t {
     struct pd_t : public inner_product_fwd_pd_t {
         using inner_product_fwd_pd_t::inner_product_fwd_pd_t;
 
-        bool attr_scales_ok() const {
-            const auto &scales = attr()->scales_;
-            const auto &supported_args
-                    = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST};
-            if (!scales.has_default_values(supported_args)) return false;
-            // cuDNN does not support scaling per dimension.
-            for (auto arg : supported_args)
-                if (scales.get(arg).mask_ != 0) return false;
-            return true;
+        bool attr_scales_ok(const std::vector<int> &supported_args
+                = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) const {
+            bool ok = attr()->scales_.has_default_values(supported_args);
+            for (int arg : supported_args) {
+                if (attr()->scales_.has_default_values(arg)) continue;
+
+                const auto &mask = attr()->scales_.get_mask(arg);
+                // cuDNN does not support scaling per dimension.
+                ok = ok && (mask == 0);
+            }
+            return ok;
         }
 
         std::shared_ptr<cudnn_inner_product_impl_base_t> inner_product_impl_;
diff --git a/src/gpu/nvidia/cudnn_matmul.hpp b/src/gpu/nvidia/cudnn_matmul.hpp
index f1674c860fe..d24470b5336 100644
--- a/src/gpu/nvidia/cudnn_matmul.hpp
+++ b/src/gpu/nvidia/cudnn_matmul.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 * Copyright 2020 Codeplay Software Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@@ -101,10 +101,11 @@ struct cudnn_matmul_t : public gpu::primitive_t {
             if (!scales.has_default_values(supported_args)) return false;
             // cuDNN does not support scaling per dimension.
             for (auto arg : supported_args) {
-                auto &s = scales.get(arg);
-                if (scales.get(arg).mask_ != 0
-                        || !utils::one_of(
-                                s.data_type_, s8, s32, f32, f16, bf16))
+                if (scales.has_default_values(arg)) continue;
+
+                if (scales.get_mask(arg) > 0) return false;
+                if (!utils::one_of(
+                            scales.get_data_type(arg), s8, s32, f32, f16, bf16))
                     return false;
             }
             return true;
diff --git a/src/gpu/nvidia/cudnn_matmul_impl.hpp b/src/gpu/nvidia/cudnn_matmul_impl.hpp
index bdbb25358c2..c77a26d47e4 100644
--- a/src/gpu/nvidia/cudnn_matmul_impl.hpp
+++ b/src/gpu/nvidia/cudnn_matmul_impl.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 * Copyright 2020 Codeplay Software Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@@ -50,7 +50,7 @@ struct cublas_params : cublas_base_params {
             return status::unimplemented;
         }
 
-        with_dst_scale_ = !attr->scales_.get(DNNL_ARG_DST).has_default_values();
+        with_dst_scale_ = !attr->scales_.has_default_values(DNNL_ARG_DST);
         with_separate_bias_ = with_bias;
         if ((with_separate_bias_)
                 && (bias_md->data_type != dst_md->data_type)) {
diff --git a/src/gpu/nvidia/cudnn_matmul_lt.hpp b/src/gpu/nvidia/cudnn_matmul_lt.hpp
index 0b6907d094c..d482a17e3c7 100644
--- a/src/gpu/nvidia/cudnn_matmul_lt.hpp
+++ b/src/gpu/nvidia/cudnn_matmul_lt.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 * Copyright 2024 Codeplay Software Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@@ -191,7 +191,7 @@ struct cudnn_matmul_lt_t : public gpu::primitive_t {
         memory_desc_t s32_dst_md_;
 
         bool default_scale(int ARG) const {
-            return attr()->scales_.get(ARG).has_default_values();
+            return attr()->scales_.has_default_values(ARG);
         }
 
     private:
@@ -215,10 +215,9 @@ struct cudnn_matmul_lt_t : public gpu::primitive_t {
         }
 
         status_t create_scale_binary_pd(impl::engine_t *engine, int ARG) {
-            auto scale_md = dnnl_memory_desc();
-            scale_md.ndims = attr()->scales_.get(ARG).ndims_;
-            scale_md.data_type = attr()->scales_.get(ARG).data_type_;
-            scale_md.format_kind = dnnl_blocked;
+            memory_desc_t scale_md;
+            scale_md.data_type = attr()->scales_.get_data_type(ARG);
+            scale_md.format_kind = format_kind::blocked;
             auto format_desc = create_scaling_format_desc(ARG, scale_md);
 
             scale_md.format_desc = {format_desc};
@@ -246,7 +245,7 @@ struct cudnn_matmul_lt_t : public gpu::primitive_t {
         }
 
         blocking_desc_t create_scaling_format_desc(
-                int ARG, dnnl_memory_desc &scale_md) {
+                int ARG, memory_desc_t &scale_md) {
             blocking_desc_t format_desc;
             memory_desc_t md;
             if (ARG == DNNL_ARG_SRC) {
@@ -255,11 +254,13 @@ struct cudnn_matmul_lt_t : public gpu::primitive_t {
                 md = *weights_md(0);
             } else if (ARG == DNNL_ARG_DST) {
                 md = *dst_md();
+            } else {
+                assert(!"unexpected arg");
             }
 
             scale_md.ndims = md.ndims;
             for (int i = 0; i < md.ndims; i++) {
-                if (attr()->scales_.get(1).mask_ & (1 << i)) {
+                if (attr()->scales_.get_mask(ARG) & (1 << i)) {
                     scale_md.dims[i] = md.dims[i];
                 } else {
                     scale_md.dims[i] = 1;
@@ -303,20 +304,17 @@ struct cudnn_matmul_lt_t : public gpu::primitive_t {
 
         bool single_scale(int ARG) const {
             const auto &scales = attr()->scales_;
-            return scales.get(ARG).mask_ == 0;
+            return scales.get_mask(ARG) == 0;
         }
 
-        bool scales_ok() {
-            data_type_t src_scale_dt
-                    = attr()->scales_.get(DNNL_ARG_SRC).data_type_;
-            data_type_t wei_scale_dt
-                    = attr()->scales_.get(DNNL_ARG_WEIGHTS).data_type_;
-            bool src_scales_ok = default_scale(DNNL_ARG_SRC)
-                    || utils::one_of(
-                            src_scale_dt, data_type::s8, data_type::s32);
-            bool wei_scales_ok = default_scale(DNNL_ARG_WEIGHTS)
-                    || utils::one_of(
-                            wei_scale_dt, data_type::s8, data_type::s32);
+        bool scales_ok() const {
+            bool src_scales_ok = IMPLICATION(!default_scale(DNNL_ARG_SRC),
+                    utils::one_of(attr()->scales_.get_data_type(DNNL_ARG_SRC),
+                            data_type::s8, data_type::s32));
+            bool wei_scales_ok = IMPLICATION(!default_scale(DNNL_ARG_WEIGHTS),
+                    utils::one_of(
+                            attr()->scales_.get_data_type(DNNL_ARG_WEIGHTS),
+                            data_type::s8, data_type::s32));
             return src_scales_ok && wei_scales_ok;
         }
 
diff --git a/src/gpu/nvidia/cudnn_matmul_lt_impl.hpp b/src/gpu/nvidia/cudnn_matmul_lt_impl.hpp
index f529cd67000..e97879d0b07 100644
--- a/src/gpu/nvidia/cudnn_matmul_lt_impl.hpp
+++ b/src/gpu/nvidia/cudnn_matmul_lt_impl.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 * Copyright 2024 Codeplay Software Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@@ -55,20 +55,23 @@ struct cublas_lt_params : cublas_base_params {
                 || dst_d.has_runtime_dims_or_strides()
                 || weights_d.has_runtime_dims_or_strides();
 
-        if (attr->scales_.get(DNNL_ARG_SRC).has_default_values()) {
-            auto src_scale = attr->scales_.get(DNNL_ARG_SRC);
-            if (src_scale.mask_ != 0) { multi_src_scale_ = true; }
+        if (attr->scales_.has_default_values(DNNL_ARG_SRC)) {
+            if (attr->scales_.get_mask(DNNL_ARG_SRC) > 0) {
+                multi_src_scale_ = true;
+            }
         }
 
-        if (attr->scales_.get(DNNL_ARG_WEIGHTS).has_default_values()) {
-            auto wei_scale = attr->scales_.get(DNNL_ARG_WEIGHTS);
-            if (wei_scale.mask_ != 0) { multi_wei_scale_ = true; }
+        if (attr->scales_.has_default_values(DNNL_ARG_WEIGHTS)) {
+            if (attr->scales_.get_mask(DNNL_ARG_WEIGHTS) > 0) {
+                multi_wei_scale_ = true;
+            }
         }
 
-        with_dst_scale_ = !attr->scales_.get(DNNL_ARG_DST).has_default_values();
+        with_dst_scale_ = !attr->scales_.has_default_values(DNNL_ARG_DST);
         if (with_dst_scale_) {
-            auto dst_scale = attr->scales_.get(DNNL_ARG_DST);
-            if (dst_scale.mask_ != 0) { multi_dst_scale_ = true; }
+            if (attr->scales_.get_mask(DNNL_ARG_DST) > 0) {
+                multi_dst_scale_ = true;
+            }
         }
 
         // Initialise flags and variables for the imma case (E.g. imma_case_ flag).
diff --git a/src/gpu/nvidia/cudnn_reorder.hpp b/src/gpu/nvidia/cudnn_reorder.hpp
index 30c7bc185dd..76c4b4a5120 100644
--- a/src/gpu/nvidia/cudnn_reorder.hpp
+++ b/src/gpu/nvidia/cudnn_reorder.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 * Copyright 2020 Codeplay Software Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@@ -83,14 +83,17 @@ struct cudnn_reorder_t : public gpu::primitive_t {
             return ok;
         }
 
-        bool scales_ok() const {
-            const auto &scales = attr()->scales_;
-            const auto &supported_args = {DNNL_ARG_FROM, DNNL_ARG_TO};
-            if (!scales.has_default_values(supported_args)) return false;
-            // cuDNN does not support scaling per dimension.
-            for (auto arg : supported_args)
-                if (scales.get(arg).mask_ != 0) return false;
-            return true;
+        bool scales_ok(const std::vector<int> &supported_args
+                = {DNNL_ARG_FROM, DNNL_ARG_TO}) const {
+            bool ok = attr()->scales_.has_default_values(supported_args);
+            for (int arg : supported_args) {
+                if (attr()->scales_.has_default_values(arg)) continue;
+
+                const auto &mask = attr()->scales_.get_mask(arg);
+                // cuDNN does not support scaling per dimension.
+                ok = ok && (mask == 0);
+            }
+            return ok;
         }
 
         bool post_ops_ok() const {
diff --git a/src/gpu/nvidia/cudnn_reorder_lt.hpp b/src/gpu/nvidia/cudnn_reorder_lt.hpp
index b8e1d2afc7f..f5343a361fd 100644
--- a/src/gpu/nvidia/cudnn_reorder_lt.hpp
+++ b/src/gpu/nvidia/cudnn_reorder_lt.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 * Copyright 2020 Codeplay Software Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@@ -117,14 +117,17 @@ struct cudnn_reorder_lt_t : public gpu::primitive_t {
             return ok;
         }
 
-        bool scales_ok() const {
-            const auto &scales = attr()->scales_;
-            const auto &supported_args = {DNNL_ARG_FROM, DNNL_ARG_TO};
-            if (!scales.has_default_values(supported_args)) return false;
-            // cuDNN does not support scaling per dimension.
-            for (auto arg : supported_args)
-                if (scales.get(arg).mask_ != 0) return false;
-            return true;
+        bool scales_ok(const std::vector<int> &supported_args
+                = {DNNL_ARG_FROM, DNNL_ARG_TO}) const {
+            bool ok = attr()->scales_.has_default_values(supported_args);
+            for (int arg : supported_args) {
+                if (attr()->scales_.has_default_values(arg)) continue;
+
+                const auto &mask = attr()->scales_.get_mask(arg);
+                // cuDNN does not support scaling per dimension.
+                ok = ok && (mask == 0);
+            }
+            return ok;
         }
 
         bool post_ops_ok() const {
@@ -144,11 +147,6 @@ struct cudnn_reorder_lt_t : public gpu::primitive_t {
                     && post_ops_ok();
             if (!ok) return status::unimplemented;
 
-            primitive_attr_t r_attr;
-            int mask = 0;
-            bool is_set = false;
-            auto src = DNNL_ARG_DST;
-            auto dst = DNNL_ARG_SRC;
             if (src_float_) {
                 src_scratch_md_ = *src_md();
                 dst_scratch_md_ = create_temp_md(src_scratch_md_);
@@ -157,21 +155,23 @@ struct cudnn_reorder_lt_t : public gpu::primitive_t {
                 src_scratch_md_ = create_temp_md(dst_scratch_md_);
                 dst_scratch_md_ = *dst_md();
             }
-            attr()->scales_.get(src, &mask, &is_set);
-            if (is_set) { r_attr.scales_.set(src, mask); }
 
-            attr()->scales_.get(dst, &mask, &is_set);
-            if (is_set) { r_attr.scales_.set(dst, mask); }
+            primitive_attr_t r_attr;
+            if (!attr()->scales_.has_default_values(DNNL_ARG_SRC)) {
+                const auto mask = attr()->scales_.get_mask(DNNL_ARG_SRC);
+                r_attr.scales_.set(DNNL_ARG_SRC, mask);
+            }
 
-            status_t generic_ok = reorder_primitive_desc_create(
-                    generic_reorder_desc_, engine, &src_scratch_md_,
-                    &dst_scratch_md_, &r_attr);
-            ok = ok && (generic_ok == status::success);
+            if (!attr()->scales_.has_default_values(DNNL_ARG_DST)) {
+                const auto mask = attr()->scales_.get_mask(DNNL_ARG_DST);
+                r_attr.scales_.set(DNNL_ARG_DST, mask);
+            }
 
-            if (!ok) return status::unimplemented;
+            CHECK(reorder_primitive_desc_create(generic_reorder_desc_, engine,
+                    &src_scratch_md_, &dst_scratch_md_, &r_attr));
             init_scratchpad();
 
-            return dnnl_success;
+            return status::success;
         }
 
         void init_scratchpad() {
diff --git a/src/gpu/nvidia/cudnn_softmax.cpp b/src/gpu/nvidia/cudnn_softmax.cpp
index 1a39656dc06..29f317df470 100644
--- a/src/gpu/nvidia/cudnn_softmax.cpp
+++ b/src/gpu/nvidia/cudnn_softmax.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 * Copyright 2020-2022 Codeplay Software Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@@ -33,12 +33,12 @@ status_t cudnn_softmax_fwd_t::execute(const exec_ctx_t &ctx) const {
     nvidia::stream_t *cuda_stream
             = utils::downcast<nvidia::stream_t *>(ctx.stream());
 
-    if (!pd()->attr()->scales_.get(DNNL_ARG_SRC).has_default_values())
+    if (!pd()->attr()->scales_.has_default_values(DNNL_ARG_SRC))
         CHECK(stream_utils::copy_input_arg_to_host(ctx, cuda_stream,
                 &host_scales_[0], DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC,
                 sizeof(float)));
 
-    if (!pd()->attr()->scales_.get(DNNL_ARG_DST).has_default_values())
+    if (!pd()->attr()->scales_.has_default_values(DNNL_ARG_DST))
         CHECK(stream_utils::copy_input_arg_to_host(ctx, cuda_stream,
                 &host_scales_[1], DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST,
                 sizeof(float)));
diff --git a/src/gpu/nvidia/cudnn_softmax.hpp b/src/gpu/nvidia/cudnn_softmax.hpp
index 3ce67c4acaf..49736634516 100644
--- a/src/gpu/nvidia/cudnn_softmax.hpp
+++ b/src/gpu/nvidia/cudnn_softmax.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 * Copyright 2020 Codeplay Software Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@@ -56,7 +56,7 @@ struct cudnn_softmax_fwd_t : public gpu::primitive_t {
                     && set_default_formats() == status::success
                     && src_d.is_plain() && dst_d.is_plain() && dst_d == src_d
                     && IMPLICATION(!attr()->scales_.has_default_values(),
-                            check_scales_mask()
+                            attr_scales_ok()
                                     && dst_d.data_type() != data_type::s8);
             if (!ok) return status::unimplemented;
 
@@ -64,12 +64,6 @@ struct cudnn_softmax_fwd_t : public gpu::primitive_t {
 
             return softmax_impl_->init(this);
         }
-        bool check_scales_mask() const {
-            for (const auto &s : attr()->scales_.scales_) {
-                if (s.second.mask_ != 0) return false;
-            }
-            return true;
-        }
 
         std::shared_ptr<cudnn_softmax_impl_base_t> softmax_impl_;
     };
diff --git a/tests/gtests/test_iface_attr.cpp b/tests/gtests/test_iface_attr.cpp
index 720aed6eb7f..23e091b1c96 100644
--- a/tests/gtests/test_iface_attr.cpp
+++ b/tests/gtests/test_iface_attr.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2017-2024 Intel Corporation
+* Copyright 2017-2025 Intel Corporation
 * Copyright 2020-2021 FUJITSU LIMITED
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@@ -281,15 +281,17 @@ HANDLE_EXCEPTIONS_FOR_TEST_F(attr_test_t, TestScalesWithGroups) {
     for (auto arg : supported_args) {
         // single non-default scales for supported arg
         attr.set_scales(arg, 0, {});
-        // multiple scales with groups
+        // multiple scales with a single group dim
         attr.set_scales(arg, 1 << 0, {4});
+        // multiple scales with multiple group dims
+        attr.set_scales(arg, 1 << 0, {4, 1});
         // scales with groups and a data type
-        attr.set_scales(arg, 1 << 0, {4}, data_type::f32);
+        attr.set_scales(arg, 1 << 0, {4, 1}, data_type::f32);
     }
 
     for (auto arg : unsupported_args) {
         // multiple scales with groups for unsupported args
-        EXPECT_ANY_THROW(attr.set_scales(arg, 1 << 0, {4}));
+        EXPECT_ANY_THROW(attr.set_scales(arg, 1 << 0, {4, 1}));
         // multiple scales with non-default data type for unsupported args
         EXPECT_ANY_THROW(attr.set_scales(arg, 1 << 0, {}, data_type::bf16));
     }