From e61f59600b505e758b38c09d3c02abe19868793c Mon Sep 17 00:00:00 2001 From: Dmitrii Zarukin Date: Wed, 15 Jan 2025 11:36:13 -0800 Subject: [PATCH] src: introduce quant_entry_t and refactor arg_scales_t to rely on it --- src/common/binary.cpp | 17 +- src/common/binary_pd.hpp | 13 +- src/common/concat.cpp | 22 +- src/common/convolution.cpp | 16 +- src/common/convolution_pd.hpp | 4 +- src/common/deconvolution.cpp | 19 +- src/common/deconvolution_pd.hpp | 6 +- src/common/group_normalization.cpp | 18 +- src/common/group_normalization_pd.hpp | 12 +- src/common/inner_product.cpp | 17 +- src/common/inner_product_pd.hpp | 6 +- src/common/layer_normalization.cpp | 18 +- src/common/layer_normalization_pd.hpp | 18 +- src/common/matmul.cpp | 105 ++++--- src/common/matmul_pd.hpp | 53 ++-- src/common/primitive_attr.cpp | 12 +- src/common/primitive_attr.hpp | 2 +- src/common/primitive_attr_quant.cpp | 101 +++++++ src/common/primitive_attr_quant.hpp | 277 ++++++++++-------- src/common/primitive_desc.hpp | 8 +- src/common/primitive_hashing.cpp | 26 +- src/common/primitive_hashing.hpp | 3 +- src/common/reorder.cpp | 29 +- src/common/sdpa_pd.hpp | 17 +- src/common/sdpa_types.hpp | 4 +- src/common/serialization.cpp | 23 +- src/common/serialization_stream.hpp | 3 +- src/common/softmax.cpp | 20 +- src/common/softmax_pd.hpp | 17 +- src/common/verbose.cpp | 29 +- src/common/verbose.hpp | 1 + src/cpu/aarch64/acl_reorder.hpp | 12 +- src/cpu/aarch64/brgemm/brgemm.cpp | 17 +- src/cpu/aarch64/jit_brdgmm_dw_conv.cpp | 2 +- src/cpu/aarch64/jit_brgemm_conv_utils.cpp | 4 +- src/cpu/aarch64/jit_brgemm_post_ops.hpp | 4 +- src/cpu/aarch64/jit_uni_reorder.cpp | 11 +- src/cpu/aarch64/jit_uni_reorder_utils.cpp | 13 +- src/cpu/aarch64/matmul/acl_lowp_matmul.cpp | 6 +- src/cpu/aarch64/matmul/acl_lowp_matmul_sq.cpp | 19 +- src/cpu/aarch64/matmul/brgemm_matmul.cpp | 4 +- .../aarch64/matmul/brgemm_matmul_utils.cpp | 15 +- src/cpu/cpu_primitive.hpp | 6 +- src/cpu/dw_convolution_utils.hpp | 16 +- src/cpu/gemm_convolution_utils.cpp | 4 +- src/cpu/gemm_inner_product_utils.cpp | 12 +- src/cpu/gemm_x8s8s32x_convolution.cpp | 14 +- src/cpu/gemm_x8s8s32x_inner_product.cpp | 7 +- src/cpu/matmul/gemm_bf16_matmul.cpp | 23 +- src/cpu/matmul/gemm_f32_matmul.cpp | 23 +- src/cpu/matmul/gemm_x8s8s32x_matmul.cpp | 13 +- src/cpu/matmul/matmul_utils.hpp | 9 +- src/cpu/matmul/ref_matmul.cpp | 23 +- src/cpu/matmul/ref_matmul_int8.cpp | 30 +- src/cpu/ref_concat.hpp | 11 +- src/cpu/ref_convolution_int8.cpp | 10 +- src/cpu/ref_deconvolution.cpp | 18 +- src/cpu/ref_fused_convolution.hpp | 16 +- src/cpu/ref_inner_product_int8.cpp | 7 +- src/cpu/ref_sum.hpp | 4 +- src/cpu/reorder/cpu_reorder_pd.hpp | 10 +- src/cpu/reorder/simple_reorder.hpp | 63 ++-- src/cpu/scale_utils.cpp | 56 ++-- src/cpu/scale_utils.hpp | 4 +- src/cpu/x64/brgemm/brgemm.cpp | 23 +- src/cpu/x64/brgemm/capi/brgemm_api.cpp | 9 +- .../jit_avx512_core_amx_1x1_conv_kernel.cpp | 5 +- .../jit_avx512_core_amx_1x1_convolution.cpp | 7 +- .../jit_avx512_core_amx_1x1_convolution.hpp | 6 +- .../x64/jit_avx512_core_amx_conv_kernel.cpp | 4 +- .../x64/jit_avx512_core_amx_convolution.cpp | 17 +- .../x64/jit_avx512_core_amx_convolution.hpp | 10 +- .../x64/jit_avx512_core_amx_deconvolution.cpp | 7 +- .../x64/jit_avx512_core_scale_precompute.cpp | 20 +- .../x64/jit_avx512_core_scale_precompute.hpp | 13 +- ...t_avx512_core_x8s8s32x_1x1_conv_kernel.cpp | 6 +- ...t_avx512_core_x8s8s32x_1x1_convolution.cpp | 6 +- .../jit_avx512_core_x8s8s32x_conv_kernel.cpp | 6 +- .../jit_avx512_core_x8s8s32x_convolution.cpp | 4 +- ...jit_avx512_core_x8s8s32x_deconvolution.cpp | 8 +- src/cpu/x64/jit_brdgmm_dw_conv.cpp | 11 +- src/cpu/x64/jit_brgemm_1x1_conv.cpp | 9 +- src/cpu/x64/jit_brgemm_conv.cpp | 9 +- src/cpu/x64/jit_brgemm_conv_bwd_strided.cpp | 9 +- src/cpu/x64/jit_brgemm_conv_bwd_utils.cpp | 4 +- src/cpu/x64/jit_brgemm_conv_utils.cpp | 4 +- src/cpu/x64/jit_brgemm_inner_product.cpp | 5 +- src/cpu/x64/jit_brgemm_inner_product.hpp | 4 +- .../x64/jit_brgemm_inner_product_utils.cpp | 5 +- src/cpu/x64/jit_brgemm_post_ops.cpp | 3 +- src/cpu/x64/jit_gemm_inner_product_utils.cpp | 4 +- src/cpu/x64/jit_uni_binary.cpp | 8 +- src/cpu/x64/jit_uni_group_normalization.cpp | 6 +- .../x64/jit_uni_instance_normalization.cpp | 6 +- src/cpu/x64/jit_uni_layer_normalization.cpp | 6 +- src/cpu/x64/jit_uni_reorder.cpp | 13 +- src/cpu/x64/jit_uni_reorder_utils.cpp | 15 +- src/cpu/x64/jit_uni_softmax.cpp | 10 +- .../x64/jit_uni_x8s8s32x_1x1_conv_kernel.cpp | 6 +- .../x64/jit_uni_x8s8s32x_1x1_convolution.cpp | 6 +- src/cpu/x64/jit_uni_x8s8s32x_conv_kernel.cpp | 6 +- src/cpu/x64/jit_uni_x8s8s32x_convolution.cpp | 4 +- .../x64/jit_uni_x8s8s32x_deconvolution.cpp | 8 +- src/cpu/x64/matmul/brgemm_matmul.cpp | 27 +- src/cpu/x64/matmul/brgemm_matmul_utils.cpp | 16 +- src/cpu/x64/matmul_inner_product.cpp | 17 +- src/gpu/generic/ref_concat.hpp | 10 +- .../sycl/layer_normalizations_kernels.hpp | 32 +- src/gpu/generic/sycl/ref_binary.hpp | 5 +- src/gpu/generic/sycl/ref_convolution.cpp | 26 +- src/gpu/generic/sycl/ref_convolution.hpp | 4 +- .../generic/sycl/ref_layer_normalizations.cpp | 16 +- .../generic/sycl/ref_layer_normalizations.hpp | 4 +- src/gpu/generic/sycl/ref_matmul.cpp | 9 +- src/gpu/generic/sycl/ref_matmul.hpp | 5 +- src/gpu/generic/sycl/ref_reorder.cpp | 9 +- src/gpu/generic/sycl/ref_reorder.hpp | 4 +- src/gpu/generic/sycl/ref_softmax.cpp | 8 +- src/gpu/generic/sycl/ref_softmax.hpp | 13 +- src/gpu/generic/sycl/ref_sum.hpp | 14 +- src/gpu/generic/sycl/reorder_kernels.hpp | 10 +- src/gpu/gpu_utils.hpp | 28 +- src/gpu/intel/jit/conv/config.cpp | 4 +- src/gpu/intel/jit/gemm/gen_gemm.hpp | 24 +- src/gpu/intel/jit/gemm/jit_gemm_pd.cpp | 48 ++- src/gpu/intel/jit/ir/post_ops.cpp | 19 +- src/gpu/intel/jit/ir/tensor_config.cpp | 5 +- src/gpu/intel/jit/reorder/gen_reorder.cpp | 9 +- .../intel/ocl/convolution_inner_product.cpp | 2 +- src/gpu/intel/ocl/gemm/gemm_with_post_ops.cpp | 16 +- src/gpu/intel/ocl/gemm/ref_gemm.hpp | 13 +- src/gpu/intel/ocl/gemm_inner_product.hpp | 21 +- src/gpu/intel/ocl/gemm_matmul.hpp | 30 +- src/gpu/intel/ocl/gen9_binary.hpp | 20 +- src/gpu/intel/ocl/gen9_softmax.hpp | 6 +- src/gpu/intel/ocl/generic_reorder.cpp | 6 +- src/gpu/intel/ocl/micro_sdpa.cpp | 9 +- src/gpu/intel/ocl/micro_sdpa.hpp | 4 +- src/gpu/intel/ocl/multi_po_reorder_binary.hpp | 6 +- src/gpu/intel/ocl/ref_group_normalization.cpp | 6 +- src/gpu/intel/ocl/ref_layer_normalization.hpp | 6 +- src/gpu/intel/ocl/ref_matmul.cpp | 22 +- src/gpu/intel/ocl/ref_matmul.hpp | 8 +- src/gpu/intel/ocl/reusable_lnorm.cpp | 6 +- src/gpu/intel/ocl/simple_binary.hpp | 4 +- .../intel/ocl/simple_layer_normalization.hpp | 6 +- src/gpu/intel/ocl/simple_softmax.hpp | 6 +- src/gpu/intel/ocl/vectorized_lnorm.hpp | 6 +- src/gpu/intel/primitive_conf.cpp | 16 +- src/gpu/intel/primitive_conf.hpp | 5 +- src/gpu/nvidia/cudnn_binary.hpp | 9 +- src/gpu/nvidia/cudnn_convolution.hpp | 22 +- src/gpu/nvidia/cudnn_convolution_impl.hpp | 13 +- src/gpu/nvidia/cudnn_inner_product.hpp | 22 +- src/gpu/nvidia/cudnn_matmul.hpp | 11 +- src/gpu/nvidia/cudnn_matmul_impl.hpp | 4 +- src/gpu/nvidia/cudnn_matmul_lt.hpp | 38 ++- src/gpu/nvidia/cudnn_matmul_lt_impl.hpp | 23 +- src/gpu/nvidia/cudnn_reorder.hpp | 21 +- src/gpu/nvidia/cudnn_reorder_lt.hpp | 48 +-- src/gpu/nvidia/cudnn_softmax.cpp | 6 +- src/gpu/nvidia/cudnn_softmax.hpp | 10 +- tests/gtests/test_iface_attr.cpp | 10 +- 163 files changed, 1342 insertions(+), 1199 deletions(-) create mode 100644 src/common/primitive_attr_quant.cpp diff --git a/src/common/binary.cpp b/src/common/binary.cpp index a29aeb017d7..83cc51b1512 100644 --- a/src/common/binary.cpp +++ b/src/common/binary.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -55,16 +55,17 @@ status_t binary_attr_check(const binary_desc_t &desc, const engine_t *engine, // Check scales if (!attr->scales_.has_default_values()) { - VCHECK_BINARY_UNIMPL(attr->scales_.has_default_values( - {DNNL_ARG_SRC_0, DNNL_ARG_SRC_1}), + static const std::vector supported_args { + DNNL_ARG_SRC_0, DNNL_ARG_SRC_1}; + VCHECK_BINARY_UNIMPL(attr->scales_.has_default_values(supported_args), VERBOSE_UNSUPPORTED_SCALES_CFG); - const auto &sc = attr->scales_; - const int mask_src_0 = sc.get(DNNL_ARG_SRC_0).mask_; - const int mask_src_1 = sc.get(DNNL_ARG_SRC_1).mask_; + for (int arg : supported_args) { + if (attr->scales_.has_default_values(arg)) continue; - VCHECK_BINARY_UNIMPL(utils::everyone_is(0, mask_src_0, mask_src_1), - VERBOSE_UNSUPPORTED_SCALES_CFG); + const int mask = attr->scales_.get_mask(arg); + VCHECK_BINARY_UNIMPL(mask == 0, VERBOSE_UNSUPPORTED_SCALES_CFG); + } } // Check post-ops diff --git a/src/common/binary_pd.hpp b/src/common/binary_pd.hpp index b9c94e58475..5c19c907a78 100644 --- a/src/common/binary_pd.hpp +++ b/src/common/binary_pd.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -179,10 +179,13 @@ struct binary_pd_t : public primitive_desc_t { bool attr_scales_ok(const std::vector &supported_args = {DNNL_ARG_SRC_0, DNNL_ARG_SRC_1, DNNL_ARG_DST}) const { - bool ok = attr()->scales_.has_default_values(supported_args); - for (int arg : supported_args) { - const auto &mask = attr()->scales_.get(arg).mask_; - ok = ok && (mask == 0); + const auto &scales = attr()->scales_; + bool ok = scales.has_default_values(supported_args); + + for (const auto &arg : supported_args) { + if (scales.has_default_values(arg)) continue; + + ok = ok && scales.get_mask(arg) == 0; } return ok; } diff --git a/src/common/concat.cpp b/src/common/concat.cpp index d686df416f8..089aadf9285 100644 --- a/src/common/concat.cpp +++ b/src/common/concat.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2018-2023 Intel Corporation +* Copyright 2018-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -54,10 +54,22 @@ status_t concat_primitive_desc_create(std::shared_ptr &pd, VCHECK_CONCAT_UNIMPL(attr->has_default_values(smask_t::scales_runtime), VERBOSE_UNSUPPORTED_ATTR); const auto &scales = attr->scales_; - if (!scales.has_default_values()) - for (const auto &s : scales.scales_) - VCHECK_CONCAT_UNIMPL( - s.second.mask_ == 0, VERBOSE_UNSUPPORTED_SCALES_CFG); + if (!scales.has_default_values()) { + std::vector supported_args(n); + for (int i = 0; i < n; i++) { + supported_args[i] = DNNL_ARG_MULTIPLE_SRC + i; + } + VCHECK_CONCAT_UNIMPL( + attr->scales_.has_default_values(supported_args), + VERBOSE_UNSUPPORTED_SCALES_CFG); + + for (int arg : supported_args) { + if (scales.has_default_values(arg)) continue; + + int mask = scales.get_mask(arg); + VCHECK_CONCAT_UNIMPL(mask == 0, VERBOSE_UNSUPPORTED_SCALES_CFG); + } + } } const int ndims = src_mds[0]->ndims; diff --git a/src/common/convolution.cpp b/src/common/convolution.cpp index 80e61134353..78643c884d9 100644 --- a/src/common/convolution.cpp +++ b/src/common/convolution.cpp @@ -186,13 +186,19 @@ status_t conv_attr_check(const convolution_desc_t &desc, const engine_t *engine, // Check scales if (!attr->scales_.has_default_values()) { const auto &sc = attr->scales_; - const int mask_src = sc.get(DNNL_ARG_SRC).mask_; - const int mask_wei = sc.get(DNNL_ARG_WEIGHTS).mask_; - const int mask_dst = sc.get(DNNL_ARG_DST).mask_; const bool with_groups = desc.src_desc.ndims != desc.weights_desc.ndims; - VCHECK_CONV_UNIMPL(utils::one_of(mask_wei, 0, with_groups ? 3 : 1) - && utils::one_of(mask_dst, 0, 2) && mask_src == 0, + VCHECK_CONV_UNIMPL(IMPLICATION(!sc.has_default_values(DNNL_ARG_SRC), + sc.get_mask(DNNL_ARG_SRC) == 0), + VERBOSE_UNSUPPORTED_SCALES_CFG); + VCHECK_CONV_UNIMPL( + IMPLICATION(!sc.has_default_values(DNNL_ARG_WEIGHTS), + utils::one_of(sc.get_mask(DNNL_ARG_WEIGHTS), 0, + with_groups ? 3 : 1)), + VERBOSE_UNSUPPORTED_SCALES_CFG); + VCHECK_CONV_UNIMPL( + IMPLICATION(!sc.has_default_values(DNNL_ARG_DST), + utils::one_of(sc.get_mask(DNNL_ARG_DST), 0, 2)), VERBOSE_UNSUPPORTED_SCALES_CFG); } diff --git a/src/common/convolution_pd.hpp b/src/common/convolution_pd.hpp index 51afe19efd1..487fbbad2a8 100644 --- a/src/common/convolution_pd.hpp +++ b/src/common/convolution_pd.hpp @@ -242,7 +242,9 @@ struct convolution_pd_t : public primitive_desc_t { = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) const { bool ok = attr()->scales_.has_default_values(supported_args); for (int arg : supported_args) { - const auto &mask = attr()->scales_.get(arg).mask_; + if (attr()->scales_.has_default_values(arg)) continue; + + const auto &mask = attr()->scales_.get_mask(arg); if (arg == DNNL_ARG_WEIGHTS) ok = ok && (mask == 0 || mask == (with_groups() ? 3 : 1)); else if (arg == DNNL_ARG_DST) diff --git a/src/common/deconvolution.cpp b/src/common/deconvolution.cpp index 00f3f89d037..3b4695156ed 100644 --- a/src/common/deconvolution.cpp +++ b/src/common/deconvolution.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2018-2024 Intel Corporation +* Copyright 2018-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -172,13 +172,20 @@ status_t deconv_attr_check(const deconvolution_desc_t &desc, // Check scales if (!attr->scales_.has_default_values()) { const auto &sc = attr->scales_; - const int mask_src = sc.get(DNNL_ARG_SRC).mask_; - const int mask_wei = sc.get(DNNL_ARG_WEIGHTS).mask_; - const int mask_dst = sc.get(DNNL_ARG_DST).mask_; const bool with_groups = desc.src_desc.ndims != desc.weights_desc.ndims; - VCHECK_DECONV_UNIMPL(utils::everyone_is(0, mask_src, mask_dst) - && utils::one_of(mask_wei, 0, with_groups ? 3 : 1), + VCHECK_DECONV_UNIMPL( + IMPLICATION(!sc.has_default_values(DNNL_ARG_SRC), + sc.get_mask(DNNL_ARG_SRC) == 0), + VERBOSE_UNSUPPORTED_SCALES_CFG); + VCHECK_DECONV_UNIMPL( + IMPLICATION(!sc.has_default_values(DNNL_ARG_WEIGHTS), + utils::one_of(sc.get_mask(DNNL_ARG_WEIGHTS), 0, + with_groups ? 3 : 1)), + VERBOSE_UNSUPPORTED_SCALES_CFG); + VCHECK_DECONV_UNIMPL( + IMPLICATION(!sc.has_default_values(DNNL_ARG_DST), + sc.get_mask(DNNL_ARG_DST) == 0), VERBOSE_UNSUPPORTED_SCALES_CFG); } diff --git a/src/common/deconvolution_pd.hpp b/src/common/deconvolution_pd.hpp index 32dfb1a58ef..24a44153a98 100644 --- a/src/common/deconvolution_pd.hpp +++ b/src/common/deconvolution_pd.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2018-2024 Intel Corporation +* Copyright 2018-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -173,7 +173,9 @@ struct deconvolution_pd_t : public primitive_desc_t { = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) const { bool ok = attr()->scales_.has_default_values(supported_args); for (int arg : supported_args) { - const auto &mask = attr()->scales_.get(arg).mask_; + if (attr()->scales_.has_default_values(arg)) continue; + + const auto &mask = attr()->scales_.get_mask(arg); if (arg == DNNL_ARG_WEIGHTS) ok = ok && (mask == 0 || mask == (with_groups() ? 3 : 1)); else diff --git a/src/common/group_normalization.cpp b/src/common/group_normalization.cpp index 4e0abf3b6a2..c4f0e7cc5e7 100644 --- a/src/common/group_normalization.cpp +++ b/src/common/group_normalization.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023 Intel Corporation +* Copyright 2023-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -154,12 +154,18 @@ status_t group_normalization_attr_check(const group_normalization_desc_t &desc, // Check scales if (!attr->scales_.has_default_values()) { - const auto &sc = attr->scales_; - const int mask_src = sc.get(DNNL_ARG_SRC).mask_; - const int mask_dst = sc.get(DNNL_ARG_DST).mask_; - - VCHECK_GNORM_UNIMPL(utils::everyone_is(0, mask_src, mask_dst), + static const std::vector supported_args { + DNNL_ARG_SRC, DNNL_ARG_DST}; + VCHECK_GNORM_UNIMPL( + attr->scales_.has_default_values(supported_args), VERBOSE_UNSUPPORTED_SCALES_CFG); + + for (int arg : supported_args) { + if (attr->scales_.has_default_values(arg)) continue; + + const int mask = attr->scales_.get_mask(arg); + VCHECK_GNORM_UNIMPL(mask == 0, VERBOSE_UNSUPPORTED_SCALES_CFG); + } } // Check post-ops diff --git a/src/common/group_normalization_pd.hpp b/src/common/group_normalization_pd.hpp index a9a48ed29f0..14343554126 100644 --- a/src/common/group_normalization_pd.hpp +++ b/src/common/group_normalization_pd.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023-2024 Intel Corporation +* Copyright 2023-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -190,17 +190,17 @@ struct group_normalization_fwd_pd_t : public group_normalization_pd_t { return IMPLICATION(use_scale() || use_shift(), weights_md()->data_type == data_type::f32); } - bool attr_scales_ok() const { + bool attr_scales_ok(const std::vector &supported_args + = {DNNL_ARG_SRC, DNNL_ARG_DST}) const { using namespace data_type; const auto &scales = attr()->scales_; - const std::vector supported_args({DNNL_ARG_SRC, DNNL_ARG_DST}); bool ok = scales.has_default_values(supported_args); for (const auto &arg : supported_args) { - const auto &sc = scales.get(arg); - if (!sc.has_default_values()) { + if (!scales.has_default_values(arg)) { const data_type_t dt = arg_md(arg)->data_type; - ok = ok && utils::one_of(dt, s8, u8) && sc.mask_ == 0; + ok = ok && utils::one_of(dt, s8, u8); + ok = ok && scales.get_mask(arg) == 0; } } return ok; diff --git a/src/common/inner_product.cpp b/src/common/inner_product.cpp index 4c1943cf67f..ac24c0c02c3 100644 --- a/src/common/inner_product.cpp +++ b/src/common/inner_product.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2016-2024 Intel Corporation +* Copyright 2016-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -133,12 +133,15 @@ status_t ip_attr_check(const inner_product_desc_t &desc, const engine_t *engine, // Check scales if (!attr->scales_.has_default_values()) { const auto &sc = attr->scales_; - const int mask_src = sc.get(DNNL_ARG_SRC).mask_; - const int mask_wei = sc.get(DNNL_ARG_WEIGHTS).mask_; - const int mask_dst = sc.get(DNNL_ARG_DST).mask_; - - VCHECK_IP_UNIMPL(utils::everyone_is(0, mask_src, mask_dst) - && utils::one_of(mask_wei, 0, 1), + VCHECK_IP_UNIMPL(IMPLICATION(!sc.has_default_values(DNNL_ARG_SRC), + sc.get_mask(DNNL_ARG_SRC) == 0), + VERBOSE_UNSUPPORTED_SCALES_CFG); + VCHECK_IP_UNIMPL( + IMPLICATION(!sc.has_default_values(DNNL_ARG_WEIGHTS), + utils::one_of(sc.get_mask(DNNL_ARG_WEIGHTS), 0, 1)), + VERBOSE_UNSUPPORTED_SCALES_CFG); + VCHECK_IP_UNIMPL(IMPLICATION(!sc.has_default_values(DNNL_ARG_DST), + sc.get_mask(DNNL_ARG_DST) == 0), VERBOSE_UNSUPPORTED_SCALES_CFG); } diff --git a/src/common/inner_product_pd.hpp b/src/common/inner_product_pd.hpp index 5d7d2163ddf..ecde97f3687 100644 --- a/src/common/inner_product_pd.hpp +++ b/src/common/inner_product_pd.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2016-2024 Intel Corporation +* Copyright 2016-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -184,7 +184,9 @@ struct inner_product_pd_t : public primitive_desc_t { = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) const { bool ok = attr()->scales_.has_default_values(supported_args); for (auto arg : supported_args) { - int mask = attr()->scales_.get(arg).mask_; + if (attr()->scales_.has_default_values(arg)) continue; + + int mask = attr()->scales_.get_mask(arg); if (arg == DNNL_ARG_WEIGHTS) ok = ok && (mask == 0 || mask == (1 << 0)); else diff --git a/src/common/layer_normalization.cpp b/src/common/layer_normalization.cpp index 79ccc98c45c..ab8a3790a80 100644 --- a/src/common/layer_normalization.cpp +++ b/src/common/layer_normalization.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -164,12 +164,18 @@ status_t layer_normalization_attr_check(const layer_normalization_desc_t &desc, // Check scales if (!attr->scales_.has_default_values()) { - const auto &sc = attr->scales_; - const int mask_src = sc.get(DNNL_ARG_SRC).mask_; - const int mask_dst = sc.get(DNNL_ARG_DST).mask_; - - VCHECK_LNORM_UNIMPL(utils::everyone_is(0, mask_src, mask_dst), + static const std::vector supported_args { + DNNL_ARG_SRC, DNNL_ARG_DST}; + VCHECK_LNORM_UNIMPL( + attr->scales_.has_default_values(supported_args), VERBOSE_UNSUPPORTED_SCALES_CFG); + + for (int arg : supported_args) { + if (attr->scales_.has_default_values(arg)) continue; + + const int mask = attr->scales_.get_mask(arg); + VCHECK_LNORM_UNIMPL(mask == 0, VERBOSE_UNSUPPORTED_SCALES_CFG); + } } // Check post-ops diff --git a/src/common/layer_normalization_pd.hpp b/src/common/layer_normalization_pd.hpp index 0629be31180..7e901c64bf3 100644 --- a/src/common/layer_normalization_pd.hpp +++ b/src/common/layer_normalization_pd.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -248,11 +248,19 @@ struct layer_normalization_fwd_pd_t : public layer_normalization_pd_t { return false; } - bool attr_scales_ok() const { + bool attr_scales_ok(const std::vector &supported_args + = {DNNL_ARG_SRC, DNNL_ARG_DST}) const { + using namespace data_type; const auto &scales = attr()->scales_; - bool ok = true; - for (const auto &e : scales.scales_) { - ok = ok && e.second.mask_ == 0; + bool ok = scales.has_default_values(supported_args); + + for (const auto &arg : supported_args) { + if (!scales.has_default_values(arg)) { + // TODO: disallow non-int8 scales? + // const data_type_t dt = arg_md(arg)->data_type; + // ok = ok && utils::one_of(dt, s8, u8); + ok = ok && scales.get_mask(arg) == 0; + } } return ok; } diff --git a/src/common/matmul.cpp b/src/common/matmul.cpp index 8dd32b8055a..8d05e147aaf 100644 --- a/src/common/matmul.cpp +++ b/src/common/matmul.cpp @@ -95,36 +95,73 @@ status_t matmul_attr_check(const matmul_desc_t &desc, const engine_t *engine, // Check scales if (!attr->scales_.has_default_values()) { const auto &sc = attr->scales_; - const auto &sc_src = sc.get(DNNL_ARG_SRC); - const auto &sc_wei = sc.get(DNNL_ARG_WEIGHTS); - const int mask_src = sc_src.mask_; - const int mask_wei = sc_wei.mask_; - const int mask_dst = sc.get(DNNL_ARG_DST).mask_; + dim_t src_scale_group_k = 1; + if (!sc.has_default_values(DNNL_ARG_SRC)) { + const int mask_src = sc.get_mask(DNNL_ARG_SRC); - VCHECK_MATMUL_UNIMPL(utils::one_of(mask_src, 0, src_qmask_K, - src_qmask_M + src_qmask_K), - VERBOSE_UNSUPPORTED_SCALES_CFG); - // Masks for weights scales can be any - skipping them. - if (engine->kind() == engine_kind::gpu) { - VCHECK_MATMUL_UNIMPL( - utils::one_of(mask_dst, 0, dst_qmask_N, dst_qmask_M, - dst_qmask_N + dst_qmask_M), + VCHECK_MATMUL_UNIMPL(utils::one_of(mask_src, 0, src_qmask_K, + src_qmask_M + src_qmask_K), + VERBOSE_UNSUPPORTED_SCALES_CFG); + + if (!sc.get(DNNL_ARG_SRC).has_default_groups()) { + if (mask_src & src_qmask_K) + src_scale_group_k = sc.get_group(DNNL_ARG_SRC, 1); + } + + // Due to hardware specifics, groups should be multiple of 32. + VCHECK_MATMUL_UNIMPL(IMPLICATION(src_scale_group_k > 1, + src_scale_group_k % 32 == 0), VERBOSE_UNSUPPORTED_SCALES_CFG); - } else { - VCHECK_MATMUL_UNIMPL(mask_dst == 0, VERBOSE_UNSUPPORTED_SCALES_CFG); } + + dim_t wei_scale_group_k = 1; + dim_t wei_scale_group_n = 1; + if (!sc.has_default_values(DNNL_ARG_WEIGHTS)) { + const int mask_wei = sc.get_mask(DNNL_ARG_WEIGHTS); + + // Masks for weights scales can be any - skipping them. + + if (!sc.get(DNNL_ARG_WEIGHTS).has_default_groups()) { + if (mask_wei & wei_qmask_K) + wei_scale_group_k = sc.get_group(DNNL_ARG_WEIGHTS, 0); + if (mask_wei & wei_qmask_N) + wei_scale_group_n = sc.get_group(DNNL_ARG_WEIGHTS, 1); + } + + // Groups per N are solely for weights decompression as it's + // impossible to get performant kernel for a single `k` element in + // chain for regular quantized case. + VCHECK_MATMUL_UNIMPL(IMPLICATION(wei_scale_group_n > 1, + attr->fpmath_.apply_to_int_), + VERBOSE_UNSUPPORTED_SCALES_CFG); + + // Due to hardware specifics, groups should be multiple of 32. + VCHECK_MATMUL_UNIMPL(IMPLICATION(wei_scale_group_k > 1, + wei_scale_group_k % 32 == 0), + VERBOSE_UNSUPPORTED_SCALES_CFG); + VCHECK_MATMUL_UNIMPL(IMPLICATION(wei_scale_group_n > 1, + wei_scale_group_n % 32 == 0), + VERBOSE_UNSUPPORTED_SCALES_CFG); + } + + if (!sc.has_default_values(DNNL_ARG_DST)) { + const int mask_dst = sc.get_mask(DNNL_ARG_DST); + + if (engine->kind() == engine_kind::gpu) { + VCHECK_MATMUL_UNIMPL( + utils::one_of(mask_dst, 0, dst_qmask_N, dst_qmask_M, + dst_qmask_N + dst_qmask_M), + VERBOSE_UNSUPPORTED_SCALES_CFG); + } else { + VCHECK_MATMUL_UNIMPL( + mask_dst == 0, VERBOSE_UNSUPPORTED_SCALES_CFG); + } + } + // Check dependency between scales. // Source scales groups are supported for int8 source and must divide // or be divided by weights groups when both are greater than 1. - const auto src_scale_group_k - = (mask_src & src_qmask_K) && sc_src.ndims_ > 0 - ? sc_src.group_dims_[1] - : 1; - const auto wei_scale_group_k - = (mask_wei & wei_qmask_K) && sc_wei.ndims_ > 0 - ? sc_wei.group_dims_[0] - : 1; const bool groups_are_divisible = IMPLICATION( src_scale_group_k > 1 && wei_scale_group_k > 1, (src_scale_group_k % wei_scale_group_k == 0) @@ -133,28 +170,6 @@ status_t matmul_attr_check(const matmul_desc_t &desc, const engine_t *engine, IMPLICATION(src_scale_group_k > 1, (src_is_int8 || src_is_fp8) && groups_are_divisible), VERBOSE_UNSUPPORTED_SCALES_CFG); - - // Groups per N are solely for weights decompression as it's impossible - // to get performant kernel for a single `k` element in chain for - // regular quantized case. - const auto wei_scale_group_n - = (mask_wei & wei_qmask_N) && sc_wei.ndims_ > 0 - ? sc_wei.group_dims_[1] - : 1; - VCHECK_MATMUL_UNIMPL( - IMPLICATION(wei_scale_group_n > 1, attr->fpmath_.apply_to_int_), - VERBOSE_UNSUPPORTED_SCALES_CFG); - - // Due to hardware specifics, groups should be multiple of 32. - VCHECK_MATMUL_UNIMPL( - IMPLICATION(src_scale_group_k > 1, src_scale_group_k % 32 == 0), - VERBOSE_UNSUPPORTED_SCALES_CFG); - VCHECK_MATMUL_UNIMPL( - IMPLICATION(wei_scale_group_k > 1, wei_scale_group_k % 32 == 0), - VERBOSE_UNSUPPORTED_SCALES_CFG); - VCHECK_MATMUL_UNIMPL( - IMPLICATION(wei_scale_group_n > 1, wei_scale_group_n % 32 == 0), - VERBOSE_UNSUPPORTED_SCALES_CFG); } // Check zero points diff --git a/src/common/matmul_pd.hpp b/src/common/matmul_pd.hpp index 94828f4c082..ece3d0a5fc9 100644 --- a/src/common/matmul_pd.hpp +++ b/src/common/matmul_pd.hpp @@ -182,29 +182,26 @@ struct matmul_pd_t : public primitive_desc_t { virtual bool attr_scales_ok(const std::vector &supported_args = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) const { - if (attr()->scales_.has_default_values()) return true; + const auto &scales = attr()->scales_; + if (scales.has_default_values()) return true; - bool ok = attr()->scales_.has_default_values(supported_args); + bool ok = scales.has_default_values(supported_args); for (int arg : supported_args) { - const auto &sc = attr()->scales_.get(arg); - const auto &mask = sc.mask_; - if (sc.has_default_values()) { continue; } + if (scales.has_default_values(arg)) { continue; } + const auto &mask = scales.get_mask(arg); if (arg == DNNL_ARG_WEIGHTS) { - const bool wei_k_group_ok - = IMPLICATION(sc.ndims_ == 2 && sc.group_dims_[0] > 1, - K() % sc.group_dims_[0] == 0); - const bool wei_n_group_ok - = IMPLICATION(sc.ndims_ == 2 && sc.group_dims_[1] > 1, - N() % sc.group_dims_[1] == 0); + const auto &g0 = scales.get_group(arg, 0); + const auto &g1 = scales.get_group(arg, 1); + const bool wei_k_group_ok = IMPLICATION(g0 > 1, K() % g1 == 0); + const bool wei_n_group_ok = IMPLICATION(g1 > 1, N() % g0 == 0); // Any group is allowed to be greater than 1 but only one at a // time, not both. - ok = ok && utils::one_of(sc.ndims_, 0, 2) - && IMPLICATION(sc.ndims_ == 2, - utils::one_of( - 1, sc.group_dims_[0], sc.group_dims_[1]) - && wei_k_group_ok && wei_n_group_ok); + ok = ok + && IMPLICATION(!scales.get(arg).has_default_groups(), + utils::one_of(1, g0, g1) && wei_k_group_ok + && wei_n_group_ok); // Mask over K dim is allowed for decompression feature only. const bool is_decompression_or_dynquant @@ -220,20 +217,24 @@ struct matmul_pd_t : public primitive_desc_t { ok = ok && utils::one_of(mask, 0, src_qmask_K(), src_qmask_M() + src_qmask_K()); - ok = ok && utils::one_of(sc.ndims_, 0, 2); - ok = ok && IMPLICATION((mask & src_qmask_K()), sc.ndims_ == 2); ok = ok - && IMPLICATION(sc.ndims_ == 2, - sc.group_dims_[0] == 1 - && K() % sc.group_dims_[1] == 0); - } else { + && IMPLICATION((mask & src_qmask_K()), + !scales.get(arg).has_default_groups()); + ok = ok + && IMPLICATION(!scales.get(arg).has_default_groups(), + scales.get_group(arg, 0) + && K() % scales.get_group(arg, 1) == 0); + } else if (arg == DNNL_ARG_DST) { ok = ok && utils::one_of(mask, 0, dst_qmask_N(), dst_qmask_M() + dst_qmask_N()); - ok = ok && utils::one_of(sc.ndims_, 0, 2) - && IMPLICATION(sc.ndims_ == 2, - sc.group_dims_[1] == 1 - && M() % sc.group_dims_[0] == 0); + ok = ok + && IMPLICATION(!scales.get(arg).has_default_groups(), + scales.get_group(arg, 1) == 1 + && (M() % scales.get_group(arg, 0)) + == 0); + } else { + assert(!"Unsupported arg"); } } return ok; diff --git a/src/common/primitive_attr.cpp b/src/common/primitive_attr.cpp index 68b27f201a5..f846585ff31 100644 --- a/src/common/primitive_attr.cpp +++ b/src/common/primitive_attr.cpp @@ -35,11 +35,6 @@ const primitive_attr_t &default_attr() { return default_attr_instance; } -const runtime_scales_t &default_runtime_scale() { - static const runtime_scales_t default_runtime_scale_instance; - return default_runtime_scale_instance; -} - void rnn_create_time_scales_t::set_single_scale(float scale) { count_ = 1; mask_ = 0; @@ -543,8 +538,9 @@ status_t dnnl_primitive_attr_set_scratchpad_mode( status_t dnnl_primitive_attr_set_scales_mask( primitive_attr_t *attr, int arg, int mask) { - bool ok = attr && mask >= 0 && arg >= 0; - if (!ok) return invalid_arguments; + VCHECK_ATTR(attr, VERBOSE_NULL_ARG); + VCHECK_ATTR(mask >= 0, VERBOSE_BAD_PARAM, "mask"); + VCHECK_ATTR(arg >= 0, VERBOSE_BAD_PARAM, "arg"); return attr->scales_.set(arg, mask); } @@ -560,7 +556,7 @@ status_t dnnl_primitive_attr_set_scales(primitive_attr_t *attr, int arg, VERBOSE_INVALID_DATATYPE, "scales"); VCHECK_ATTR(IMPLICATION(ndims, validate_dims(ndims, group_dims)), VERBOSE_BAD_PARAM, "group_dims"); - return attr->scales_.set(arg, mask, ndims, group_dims, data_type); + return attr->scales_.set(arg, mask, data_type, ndims, group_dims); } status_t dnnl_primitive_attr_set_zero_points_mask( diff --git a/src/common/primitive_attr.hpp b/src/common/primitive_attr.hpp index 4c61aef817a..bafaddc12f3 100644 --- a/src/common/primitive_attr.hpp +++ b/src/common/primitive_attr.hpp @@ -669,7 +669,7 @@ struct dnnl_primitive_attr : public dnnl::impl::c_compatible { } // NOTE: make sure that the types below have overloaded comparison operator - dnnl::impl::arg_scales_t scales_; + dnnl::impl::scales_t scales_; dnnl::impl::zero_points_t zero_points_; dnnl::impl::scratchpad_mode_t scratchpad_mode_; dnnl::impl::fpmath_t fpmath_; diff --git a/src/common/primitive_attr_quant.cpp b/src/common/primitive_attr_quant.cpp new file mode 100644 index 00000000000..172225154e7 --- /dev/null +++ b/src/common/primitive_attr_quant.cpp @@ -0,0 +1,101 @@ +/******************************************************************************* +* Copyright 2024-2025 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "common/primitive_attr_quant.hpp" +#include "common/primitive_hashing.hpp" +#include "common/verbose.hpp" + +namespace dnnl { +namespace impl { + +const quant_entry_t &default_quant_entry() { + static const quant_entry_t default_quant_entry; + return default_quant_entry; +} + +size_t quant_entry_t::get_hash() const { + size_t seed = 0; + seed = hash_combine(seed, mask_); + seed = hash_combine(seed, static_cast(data_type_)); + seed = hash_combine(seed, group_ndims_); + if (group_ndims_ > 0) + seed = primitive_hashing::get_array_hash( + seed, group_dims_, group_ndims_); + return seed; +} + +void quant_entry_t::serialize(serialization_stream_t &sstream) const { + sstream.write(&mask_); + sstream.write(&data_type_); + sstream.write(&group_ndims_); + if (group_ndims_ > 0) sstream.write(group_dims_, group_ndims_); +} + +std::string quant_entry_t::get_verbose() const { + std::string s; + s.append(std::to_string(mask_)); + s.append(":").append(dnnl_dt2str(data_type_)); + if (group_ndims_ > 0) { + s.append(":") + .append(std::to_string(group_dims_[0])) + .append("x") + .append(std::to_string(group_dims_[1])); + } + return s; +} + +std::ostream &operator<<(std::ostream &ss, const quant_entry_t &e) { + ss << e.get_verbose(); + return ss; +} + +size_t scales_t::get_hash() const { + size_t seed = 0; + // Go through scales for all arguments. + for (const auto &e : scales_) { + seed = hash_combine(seed, e.first); + seed = hash_combine(seed, e.second.get_hash()); + } + return seed; +} + +void scales_t::serialize(serialization_stream_t &sstream) const { + for (const auto &e : scales_) { + sstream.write(&e.first); + e.second.serialize(sstream); + } +} + +std::string scales_t::get_verbose() const { + std::string s; + std::string empty_delim, attr_delim = "+"; + std::string delim = empty_delim; + for (const auto &scale : scales_) { + const auto &q = scale.second; + if (q.has_default_values()) continue; + + int arg = scale.first; + s.append(delim) + .append(arg2str(arg)) + .append(":") + .append(q.get_verbose()); + delim = attr_delim; + } + return s; +} + +} // namespace impl +} // namespace dnnl diff --git a/src/common/primitive_attr_quant.hpp b/src/common/primitive_attr_quant.hpp index 6396327cd88..979b110d69d 100644 --- a/src/common/primitive_attr_quant.hpp +++ b/src/common/primitive_attr_quant.hpp @@ -28,172 +28,205 @@ // dependency between headers when it comes to inclusion of opdesc.hpp which // sdpa_desc_t is a part of. -#include "utils.hpp" +#include "common/serialization_stream.hpp" +#include "common/utils.hpp" #include -#include +#include #include +#include namespace dnnl { namespace impl { -struct runtime_scales_t; -const runtime_scales_t &default_runtime_scale(); +struct quant_entry_t; +const quant_entry_t &default_quant_entry(); -struct runtime_scales_t : public c_compatible { - runtime_scales_t() = default; +struct quant_entry_t : public c_compatible { + quant_entry_t() = default; - runtime_scales_t &operator=(const runtime_scales_t &rhs) { - mask_ = rhs.mask_; - is_set_ = rhs.is_set_; - ndims_ = rhs.ndims_; - if (ndims_ > 0) utils::array_copy(group_dims_, rhs.group_dims_, ndims_); - data_type_ = rhs.data_type_; - return *this; + // `set(...)` approach is taken over constructors as the usage model assumes + // the change of state of this object but it doesn't require its destruction + // which would come with some performance price which prevails in this case. + status_t set(int mask, data_type_t data_type) { + return set(mask, data_type, 0, {}); } - - status_t set(int mask) { return set(0, mask, {}, data_type::f32); } - - status_t set(int ndims, int mask, const dims_t group_dims, - data_type_t data_type = data_type::f32) { + status_t set(int mask, data_type_t data_type, int group_ndims, + const dims_t group_dims) { mask_ = mask; - is_set_ = true; - ndims_ = ndims; - if (ndims > 0) utils::array_copy(group_dims_, group_dims, ndims); data_type_ = data_type; + group_ndims_ = group_ndims; + if (group_ndims_ > 0) { + utils::array_copy(group_dims_, group_dims, group_ndims_); + } return status::success; } + status_t set(const quant_entry_t &other) { + return set(other.mask_, other.data_type_, other.group_ndims_, + other.group_dims_); + } + + quant_entry_t &operator=(const quant_entry_t &rhs) { + auto st = this->set(rhs); + assert(st == status::success); + UNUSED(st); + return *this; + } + + bool has_default_values() const { return *this == default_quant_entry(); } + bool has_default_groups() const { + return this->group_ndims_ == default_quant_entry().group_ndims_; + } - bool operator==(const runtime_scales_t &rhs) const { - return mask_ == rhs.mask_ && is_set_ == rhs.is_set_ - && ndims_ == rhs.ndims_ - && IMPLICATION(ndims_ > 0, - utils::array_cmp(group_dims_, rhs.group_dims_, ndims_)) - && data_type_ == rhs.data_type_; + int get_mask() const { return mask_; } + data_type_t get_data_type() const { return data_type_; } + dim_t get_group(int d) const { + // If groups were not requested, return `1` for convenience. + if (group_ndims_ == default_quant_entry().group_ndims_) return 1; + // But if they were, any out of bound access would return `0` and likely + // lead to a division by zero which is fast to catch. + if (d >= group_ndims_) return 0; + return group_dims_[d]; } - bool has_default_values() const { return *this == default_runtime_scale(); } + // Note: keep the definition here to satisfy the + // `gtests/internals/test_comparison_operators` linking requirements which + // mandates bodies to be in the header file. + bool operator==(const quant_entry_t &rhs) const { + return mask_ == rhs.mask_ && data_type_ == rhs.data_type_ + && group_ndims_ == rhs.group_ndims_ + && IMPLICATION(group_ndims_ > 0, + utils::array_cmp( + group_dims_, rhs.group_dims_, group_ndims_)); + } - bool has_default_groups() const { return 0 == ndims_; } - bool has_default_data_type() const { return data_type_ == data_type::f32; } + size_t get_hash() const; - // TODO: replace with `-1` to remove `is_set_`. - // Hide `mask_` under `private:` to force interface usage. - int mask_ = 0; - bool is_set_ = false; - int ndims_ = 0; - dims_t group_dims_ = {}; - data_type_t data_type_ = data_type::f32; + void serialize(serialization_stream_t &sstream) const; + + std::string get_verbose() const; + +private: + // Note: INT_MIN is used on purpose to avoid potential issues when + // `(mask & bit)` expression will return `true`. `INT_MIN` is represented + // as `10...0` in bits and will avoid such situations. + int mask_ = INT_MIN; + data_type_t data_type_ = data_type::undef; + int group_ndims_ = 0; + dims_t group_dims_ {}; }; -struct arg_scales_t : public c_compatible { - arg_scales_t() = default; +std::ostream &operator<<(std::ostream &ss, const quant_entry_t &e); - const runtime_scales_t &get(int arg) const { - static const runtime_scales_t default_scales; +struct scales_t : public c_compatible { + scales_t() = default; + + const quant_entry_t &get(int arg) const { const auto it = scales_.find(arg); - if (it == scales_.end()) return default_scales; + if (it == scales_.end()) return default_quant_entry(); return it->second; } - status_t set(int arg, const runtime_scales_t &scale) { + // See `set(...)` comment for `quant_entry_t` for a design choice + // explanation. + status_t set(int arg, int mask) { + return set(arg, mask, default_data_type, 0, {}); + } + status_t set(int arg, int mask, data_type_t data_type, int group_ndims, + const dims_t group_dims) { if (!check_arg(arg)) return status::invalid_arguments; - scales_[arg] = scale; + CHECK(scales_[arg].set(mask, data_type, group_ndims, group_dims)); return status::success; } - - bool operator==(const arg_scales_t &rhs) const { - return scales_ == rhs.scales_; + // Use this interface with `default_quant_entry` when need to remove a + // specific scale. + status_t set(int arg, const quant_entry_t &other) { + return scales_[arg].set(other); } - bool has_default_values(const std::vector &skip_args = {}) const { - auto predicate = [](const runtime_scales_t &s) { - return s.has_default_values(); - }; - return has_default_property(skip_args, predicate); + // This interface is different from the one below and is just a shortcut. + bool has_default_values(int arg) const { + return get(arg).has_default_values(); } - bool has_default_data_type(const std::vector &skip_args = {}) const { - auto predicate = [](const runtime_scales_t &s) { - return s.has_default_data_type(); - }; - return has_default_property(skip_args, predicate); + // This interface is used to make sure that other than `supported_args` have + // default values. It's to make sure that non-allowed arguments were not + // passed to the library. + bool has_default_values(const std::vector &supported_args = {}) const { + auto predicate + = [](const quant_entry_t &s) { return s.has_default_values(); }; + return has_default_property(supported_args, predicate); } - bool has_default_groups(const std::vector &skip_args = {}) const { - auto predicate = [](const runtime_scales_t &s) { - return s.has_default_groups(); + // This interface is used to make sure that other than `supported_args` have + // default values. It's to make sure that non-allowed arguments were not + // passed to the library. + bool has_default_data_type( + const std::vector &supported_args = {}) const { + auto predicate = [](const quant_entry_t &s) { + // Note: `data_type::undef` represents `default_quant_entry`. + return utils::one_of( + s.get_data_type(), default_data_type, data_type::undef); }; - return has_default_property(skip_args, predicate); + return has_default_property(supported_args, predicate); } - - status_t set(int arg, int mask) { - return set(arg, mask, 0, {}, data_type::f32); + // This interface checks specific argument. It exists because quant_entry_t + // doesn't have a notion of default data_type, only scales do. + // Note: can be removed once the library unconditionally supports data type + // for scales for every implementation, then this call can be removed as to + // make a proper load, the data type must be queried. + bool has_default_data_type(int arg) const { + // Note: `data_type::undef` represents `default_quant_entry`. + return utils::one_of( + get(arg).get_data_type(), default_data_type, data_type::undef); } - status_t set(int arg, int mask, int ndims, const dims_t group_dims, - data_type_t data_type) { - if (!check_arg(arg)) return status::invalid_arguments; - return scales_[arg].set(ndims, mask, group_dims, data_type); + // This interface is different from the one below and is just a shortcut. + bool has_default_groups(int arg) const { + return get(arg).has_default_groups(); } - // TODO: move to `private` and keep a single interface per entry. - status_t get(int arg, int *mask, bool *is_set, int *ndims = nullptr, - dims_t group_dims = nullptr, - data_type_t *data_type = nullptr) const { - if (!check_arg(arg)) return status::invalid_arguments; - const auto &s = get(arg); - if (mask) *mask = s.mask_; - if (is_set) *is_set = s.is_set_; - if (ndims) *ndims = s.ndims_; - if (group_dims && s.ndims_ > 0) - utils::array_copy(group_dims, s.group_dims_, s.ndims_); - if (data_type) *data_type = s.data_type_; - return status::success; + // This interface is used to make sure that other than `supported_args` have + // default values. It's to make sure that non-allowed arguments were not + // passed to the library. + bool has_default_groups(const std::vector &supported_args = {}) const { + auto predicate + = [](const quant_entry_t &s) { return s.has_default_groups(); }; + return has_default_property(supported_args, predicate); } + int get_mask(int arg) const { return get(arg).get_mask(); } data_type_t get_data_type(int arg) const { - data_type_t data_type; - auto st = get(arg, nullptr, nullptr, nullptr, nullptr, &data_type); - if (st != status::success) return data_type::undef; - return data_type; + return get(arg).get_data_type(); } + dim_t get_group(int arg, int d) const { return get(arg).get_group(d); } - status_t reset(int arg) { - if (!check_arg(arg)) return status::invalid_arguments; - const auto it = scales_.find(arg); - if (it != scales_.end()) scales_.erase(it); - return status::success; + bool operator==(const scales_t &rhs) const { + return scales_ == rhs.scales_; } - status_t copy_from(const arg_scales_t &other) { - for (auto it = other.scales_.begin(); it != other.scales_.end(); ++it) { - // Find an entry that can match the arguments without constructing a - // new object. - if (scales_.count(it->first) == 1) { - auto &entry = scales_[it->first]; - if (entry == it->second) continue; - } + size_t get_hash() const; - CHECK(set(it->first, it->second)); - } - return status::success; - } + void serialize(serialization_stream_t &sstream) const; - std::map scales_; + std::string get_verbose() const; private: + // Sorted property of `std::map` is used for hashing. + std::map scales_; + static constexpr data_type_t default_data_type = data_type::f32; + bool check_arg(int arg) const { + // regular + for (const auto &sa : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) { + if (arg == sa) return true; + } // binary - for (const auto &sa : {DNNL_ARG_SRC_0, DNNL_ARG_SRC_1}) { + for (const auto &sa : {DNNL_ARG_SRC_1}) { if (arg == sa) return true; } // concat if (arg & DNNL_ARG_MULTIPLE_SRC) return true; - // convolution - for (const auto &sa : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) { - if (arg == sa) return true; - } // depth-wise convolution post op for (const auto &sa : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) { if (arg == (DNNL_ARG_ATTR_POST_OP_DW | sa)) return true; @@ -203,19 +236,23 @@ struct arg_scales_t : public c_compatible { return false; } - bool has_default_property(const std::vector &skip_args, - bool (*predicate)(const runtime_scales_t &)) const { + // The function makes sure that if any argument was specified by user, that + // only `supported_args` have their value customized, rest unsupported + // values were not updated. + bool has_default_property(const std::vector &supported_args, + bool (*predicate)(const quant_entry_t &)) const { for (const auto &s : scales_) { - if (!predicate(s.second)) { - bool skip = false; - for (const auto &skip_a : skip_args) - if (s.first == skip_a) { - skip = true; - break; - } - if (skip) continue; - return false; - } + // Arg passed the condition, check the next one. + if (predicate(s.second)) continue; + + bool allow_non_default = false; + for (const auto &supported_arg : supported_args) + if (s.first == supported_arg) { + allow_non_default = true; + break; + } + if (allow_non_default) continue; + return false; } return true; } diff --git a/src/common/primitive_desc.hpp b/src/common/primitive_desc.hpp index 288c9339117..86f1e3740c5 100644 --- a/src/common/primitive_desc.hpp +++ b/src/common/primitive_desc.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2016-2024 Intel Corporation +* Copyright 2016-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -157,14 +157,14 @@ struct primitive_desc_t : public c_compatible { } if (arg & DNNL_ARG_ATTR_SCALES) { int scale_arg = arg & ~DNNL_ARG_ATTR_SCALES; - if (!attr()->scales_.get(scale_arg).has_default_values()) + if (!attr()->scales_.has_default_values(scale_arg)) return arg_usage_t::input; } if ((arg == (DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC_0)) - && !attr()->scales_.get(DNNL_ARG_SRC_0).has_default_values()) + && !attr()->scales_.has_default_values(DNNL_ARG_SRC_0)) return arg_usage_t::input; if ((arg == (DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC_1)) - && !attr()->scales_.get(DNNL_ARG_SRC_1).has_default_values()) + && !attr()->scales_.has_default_values(DNNL_ARG_SRC_1)) return arg_usage_t::input; if (arg == DNNL_ARG_SCRATCHPAD && !is_zero_md(scratchpad_md())) return arg_usage_t::output; diff --git a/src/common/primitive_hashing.cpp b/src/common/primitive_hashing.cpp index b3037c0c67c..28dbdfabedd 100644 --- a/src/common/primitive_hashing.cpp +++ b/src/common/primitive_hashing.cpp @@ -15,6 +15,7 @@ *******************************************************************************/ #include +#include "primitive_attr.hpp" #include "primitive_desc.hpp" #include "type_helpers.hpp" #include "utils.hpp" @@ -218,19 +219,6 @@ size_t get_md_hash(const memory_desc_t &md) { return seed; } -// Generate a hash for runtime scales -size_t get_runtime_scale_hash(const runtime_scales_t &scales) { - size_t seed = 0; - seed = hash_combine(seed, scales.mask_); - // scales: groups - const int ndims = scales.ndims_; - seed = hash_combine(seed, ndims); - if (ndims > 0) seed = get_array_hash(seed, scales.group_dims_, ndims); - // scales: data type - seed = hash_combine(seed, static_cast(scales.data_type_)); - return seed; -} - // Generate a hash for zero points size_t get_zero_points_hash(const zero_points_t &zps) { size_t seed = 0; @@ -276,13 +264,7 @@ size_t get_attr_hash(const primitive_attr_t &attr) { } if (!attr.scales_.has_default_values()) { - // go through scales for all arguments - for (const auto &p : attr.scales_.scales_) { - // scales: arg - seed = hash_combine(seed, p.first); - // scales: mask - seed = hash_combine(seed, get_runtime_scale_hash(p.second)); - } + seed = hash_combine(seed, attr.scales_.get_hash()); } seed = hash_combine(seed, get_zero_points_hash(attr.zero_points_)); @@ -776,9 +758,9 @@ size_t get_desc_hash(const sdpa_desc_t &desc) { seed = hash_combine(seed, get_md_hash(desc.q_desc)); seed = hash_combine(seed, get_md_hash(desc.k_desc)); seed = hash_combine(seed, get_md_hash(desc.v_desc)); - seed = hash_combine(seed, get_runtime_scale_hash(desc.kq_scales)); + seed = hash_combine(seed, desc.kq_scales.get_hash()); seed = hash_combine(seed, get_zero_points_hash(desc.kq_zero_points)); - seed = hash_combine(seed, get_runtime_scale_hash(desc.vs_scales)); + seed = hash_combine(seed, desc.vs_scales.get_hash()); seed = hash_combine(seed, get_zero_points_hash(desc.vs_zero_points)); seed = hash_combine(seed, get_md_hash(desc.dst_desc)); seed = hash_combine(seed, get_md_hash(desc.attn_mask_desc)); diff --git a/src/common/primitive_hashing.hpp b/src/common/primitive_hashing.hpp index e43bfcf9669..2b4e7e8d22f 100644 --- a/src/common/primitive_hashing.hpp +++ b/src/common/primitive_hashing.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,6 @@ #include "common/c_types_map.hpp" #include "common/engine_id.hpp" -#include "common/primitive_attr.hpp" #include "common/type_helpers.hpp" #include "common/verbose.hpp" diff --git a/src/common/reorder.cpp b/src/common/reorder.cpp index 506e939de5a..c21fe526dfb 100644 --- a/src/common/reorder.cpp +++ b/src/common/reorder.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2016-2024 Intel Corporation +* Copyright 2016-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,6 +38,10 @@ namespace impl { VCONDCHECK(primitive, create, check, reorder, (cond), \ status::invalid_arguments, msg, ##__VA_ARGS__); +#define VCHECK_REORDER_UNIMPL(cond, msg, ...) \ + VCONDCHECK(primitive, create, check, reorder, (cond), \ + status::unimplemented, msg, ##__VA_ARGS__); + namespace { engine_t *get_reorder_engine(engine_t *src_engine, engine_t *dst_engine) { auto s_ek = src_engine->kind(); @@ -100,24 +104,28 @@ status_t reorder_primitive_desc_create(std::shared_ptr &pd, // Check scales if (!attr->scales_.has_default_values()) { + static const std::vector supported_args { + DNNL_ARG_SRC, DNNL_ARG_DST}; + VCHECK_REORDER_UNIMPL(attr->scales_.has_default_values(supported_args), + VERBOSE_UNSUPPORTED_SCALES_CFG); + const auto &sc = attr->scales_; const auto &sc_src = sc.get(DNNL_ARG_SRC); - const int mask_src = sc_src.mask_; + const int mask_src = sc.get_mask(DNNL_ARG_SRC); VCHECK_REORDER(IMPLICATION(utils::one_of(src_md->data_type, data_type::s4, data_type::u4), mask_src > 0), VERBOSE_INVALID_DATATYPE, "mask for int4 source"); - if (sc_src.ndims_ > 0) { + if (!sc_src.has_default_groups()) { const int src_ndims = s_mdw.ndims(); const bool group_dims_are_consistent - = IMPLICATION(sc_src.group_dims_[0] > 1, - src_md->dims[src_ndims - 2] - % sc_src.group_dims_[0] + = IMPLICATION(sc_src.get_group(0) > 1, + src_md->dims[src_ndims - 2] % sc_src.get_group(0) == 0) - && IMPLICATION(sc_src.group_dims_[1] > 1, - src_md->dims[src_ndims - 1] % sc_src.group_dims_[1] + && IMPLICATION(sc_src.get_group(1) > 1, + src_md->dims[src_ndims - 1] % sc_src.get_group(1) == 0); VCHECK_REORDER(group_dims_are_consistent, "groups dimensions are not consistent with reorder " @@ -132,9 +140,8 @@ status_t reorder_primitive_desc_create(std::shared_ptr &pd, "mask is not consistent with groups"); } - const auto &sc_dst = sc.get(DNNL_ARG_DST); - VCHECK_REORDER(sc_dst.ndims_ == 0, VERBOSE_BAD_NDIMS, "dst scales", - sc_dst.ndims_); + VCHECK_REORDER(sc.get(DNNL_ARG_DST).has_default_groups(), + VERBOSE_UNSUPPORTED_SCALES_CFG); } bool is_cross_engine = src_engine != dst_engine diff --git a/src/common/sdpa_pd.hpp b/src/common/sdpa_pd.hpp index 5c09693c992..cdbb28d066f 100644 --- a/src/common/sdpa_pd.hpp +++ b/src/common/sdpa_pd.hpp @@ -130,7 +130,9 @@ struct sdpa_pd_t : public primitive_desc_t { } /// Returns the data type of the scales tensor for the KQ matmul - data_type_t key_scales_dt() const { return desc()->kq_scales.data_type_; } + data_type_t key_scales_dt() const { + return desc()->kq_scales.get_data_type(); + } /// Returns the data type of the zero points tensor for the KQ matmul data_type_t key_zp_dt() const { @@ -138,7 +140,9 @@ struct sdpa_pd_t : public primitive_desc_t { } /// Returns the data type of the scales tensor for the VS matmul - data_type_t value_scales_dt() const { return desc()->vs_scales.data_type_; } + data_type_t value_scales_dt() const { + return desc()->vs_scales.get_data_type(); + } /// Returns the data type of the zero points tensor for the VS matmul data_type_t value_zp_dt() const { @@ -198,18 +202,19 @@ struct sdpa_pd_t : public primitive_desc_t { private: static int scale_group_size( - const runtime_scales_t &scales, const memory_desc_t &desc) { + const quant_entry_t &scales, const memory_desc_t &desc) { dim_t out = utils::array_product(desc.dims, desc.ndims); + const auto mask = scales.get_mask(); if (scales.has_default_groups()) { - for (int idx : mask_iterator(scales.mask_)) { + for (int idx : mask_iterator(mask)) { out /= desc.dims[idx]; } } else { - for (int idx : mask_iterator(scales.mask_)) { + for (int idx : mask_iterator(mask)) { if (idx < 2) { out /= desc.dims[idx]; } else { - out /= (desc.dims[idx] / scales.group_dims_[idx - 2]); + out /= (desc.dims[idx] / scales.get_group(idx - 2)); } } } diff --git a/src/common/sdpa_types.hpp b/src/common/sdpa_types.hpp index 671269a5437..6ef24b149f6 100644 --- a/src/common/sdpa_types.hpp +++ b/src/common/sdpa_types.hpp @@ -47,9 +47,9 @@ struct sdpa_desc_t : public op_desc_t { // primitive_attr_t can't be used because of deleted copy-ctor, but desc_t // must be copyable. - runtime_scales_t kq_scales {}; + quant_entry_t kq_scales {}; zero_points_t kq_zero_points {}; - runtime_scales_t vs_scales {}; + quant_entry_t vs_scales {}; zero_points_t vs_zero_points {}; memory_desc_t dst_desc {}; diff --git a/src/common/serialization.cpp b/src/common/serialization.cpp index afe9c37f49e..6cc5af2b0da 100644 --- a/src/common/serialization.cpp +++ b/src/common/serialization.cpp @@ -178,18 +178,6 @@ void serialize_post_ops( } } -void serialize_runtime_scales( - serialization_stream_t &sstream, const runtime_scales_t &scales) { - // scales: mask - sstream.write(&scales.mask_); - // scales: groups - const int ndims = scales.ndims_; - sstream.write(&ndims); - if (ndims > 0) sstream.write(scales.group_dims_, ndims); - // scales: data type - sstream.write(&scales.data_type_); -} - void serialize_zero_points( serialization_stream_t &sstream, const zero_points_t &zps) { for (int arg : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) @@ -224,12 +212,7 @@ void serialize_attr( if (!attr.scales_.has_default_values()) { sstream.write("scale:"); - // go through scales for all arguments - for (const auto &p : attr.scales_.scales_) { - // scales: arg - sstream.write(&p.first); - serialize_runtime_scales(sstream, p.second); - } + attr.scales_.serialize(sstream); } // zero_points if (!attr.zero_points_.has_default_values()) sstream.write("zp:"); @@ -626,9 +609,9 @@ void serialize_desc(serialization_stream_t &sstream, const sdpa_desc_t &desc) { serialize_md(sstream, desc.q_desc); serialize_md(sstream, desc.k_desc); serialize_md(sstream, desc.v_desc); - serialize_runtime_scales(sstream, desc.kq_scales); + desc.kq_scales.serialize(sstream); serialize_zero_points(sstream, desc.kq_zero_points); - serialize_runtime_scales(sstream, desc.vs_scales); + desc.vs_scales.serialize(sstream); serialize_zero_points(sstream, desc.vs_zero_points); serialize_md(sstream, desc.dst_desc); serialize_md(sstream, desc.attn_mask_desc); diff --git a/src/common/serialization_stream.hpp b/src/common/serialization_stream.hpp index 28eb32aad61..90da39b94fe 100644 --- a/src/common/serialization_stream.hpp +++ b/src/common/serialization_stream.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2021-2023 Intel Corporation +* Copyright 2021-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #define COMMON_SERIALIZATION_STREAM_HPP #include +#include #include #include diff --git a/src/common/softmax.cpp b/src/common/softmax.cpp index 94e6e9c4ca5..c055321bb63 100644 --- a/src/common/softmax.cpp +++ b/src/common/softmax.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2016-2023 Intel Corporation +* Copyright 2016-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -112,13 +112,21 @@ status_t softmax_attr_check(const softmax_desc_t &desc, const engine_t *engine, VCHECK_SOFTMAX_UNIMPL(attr->has_default_values(fwd_attr_mask, dst_dt), VERBOSE_UNSUPPORTED_ATTR); + // Check scales if (!attr->scales_.has_default_values()) { - const auto &sc = attr->scales_; - const int mask_src = sc.get(DNNL_ARG_SRC).mask_; - const int mask_dst = sc.get(DNNL_ARG_DST).mask_; - - VCHECK_SOFTMAX_UNIMPL(utils::everyone_is(0, mask_src, mask_dst), + static const std::vector supported_args { + DNNL_ARG_SRC, DNNL_ARG_DST}; + VCHECK_SOFTMAX_UNIMPL( + attr->scales_.has_default_values(supported_args), VERBOSE_UNSUPPORTED_SCALES_CFG); + + for (int arg : supported_args) { + if (attr->scales_.has_default_values(arg)) continue; + + const int mask = attr->scales_.get_mask(arg); + VCHECK_SOFTMAX_UNIMPL( + mask == 0, VERBOSE_UNSUPPORTED_SCALES_CFG); + } } // Check post-ops diff --git a/src/common/softmax_pd.hpp b/src/common/softmax_pd.hpp index 6e6c12c54c2..905d604ce92 100644 --- a/src/common/softmax_pd.hpp +++ b/src/common/softmax_pd.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2016-2024 Intel Corporation +* Copyright 2016-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -176,11 +176,18 @@ struct softmax_fwd_pd_t : public softmax_pd_t { dst_md_, src_md_.format_desc.blocking); } - bool attr_scales_ok() const { + bool attr_scales_ok(const std::vector &supported_args + = {DNNL_ARG_SRC, DNNL_ARG_DST}) const { const auto &scales = attr()->scales_; - bool ok = true; - for (const auto &e : scales.scales_) { - ok = ok && e.second.mask_ == 0; + bool ok = scales.has_default_values(supported_args); + + for (const auto &arg : supported_args) { + if (scales.has_default_values(arg)) continue; + + // TODO: disallow non-int8 scales? + // const data_type_t dt = arg_md(arg)->data_type; + // ok = ok && utils::one_of(dt, s8, u8); + ok = ok && scales.get_mask(arg) == 0; } return ok; } diff --git a/src/common/verbose.cpp b/src/common/verbose.cpp index 4d93782ee7c..05682c9d906 100644 --- a/src/common/verbose.cpp +++ b/src/common/verbose.cpp @@ -618,18 +618,6 @@ std::string md2desc_str(const memory_desc_t *md) { return s; } -std::ostream &operator<<(std::ostream &ss, const runtime_scales_t &scale) { - ss << scale.mask_; - ss << ":" << scale.data_type_; - if (scale.ndims_) { - ss << ":"; - for (int i = 0; i < scale.ndims_ - 1; ++i) - ss << scale.group_dims_[i] << 'x'; - ss << scale.group_dims_[scale.ndims_ - 1]; - } - return ss; -} - std::ostream &operator<<( std::ostream &ss, const rnn_create_time_scales_t &rnn_scales) { ss << rnn_scales.mask_; @@ -742,20 +730,13 @@ std::ostream &operator<<(std::ostream &ss, const primitive_attr_t *attr) { if (deterministic) { ss << field_delim() << "attr-deterministic:" << deterministic; } - if (attr->has_default_values()) return ss; - const arg_scales_t &as = attr->scales_; - if (!as.has_default_values()) { - std::string delim = empty_delim; - ss << field_delim() << "attr-scales:"; - for (const auto &map_entry : as.scales_) { - const auto &val = map_entry.second; - if (val.has_default_values()) continue; + // Fast exit if rest attributes were not specified. + if (attr->has_default_values()) return ss; - int arg = map_entry.first; - ss << delim << arg2str(arg) << ":" << val; - delim = attr_delim; - } + const scales_t &scales = attr->scales_; + if (!scales.has_default_values()) { + ss << field_delim() << "attr-scales:" << scales.get_verbose(); } const zero_points_t &zp = attr->zero_points_; diff --git a/src/common/verbose.hpp b/src/common/verbose.hpp index adb7b3708e9..637f8cdd03d 100644 --- a/src/common/verbose.hpp +++ b/src/common/verbose.hpp @@ -382,6 +382,7 @@ std::string md2fmt_str( const char *name, const memory_desc_t *md, format_kind_t user_format); std::string md2dim_str( const memory_desc_t *md, dims_type_t dims_type = dims_type_t::dims); +std::string arg2str(int arg); // Returns a verbose string of dimensions or descriptor from src, wei, and/or // dst memory descs. Can be called externally to provide info about actual // values of runtime dimensions. diff --git a/src/cpu/aarch64/acl_reorder.hpp b/src/cpu/aarch64/acl_reorder.hpp index e586ed4e304..d429fddf7e7 100644 --- a/src/cpu/aarch64/acl_reorder.hpp +++ b/src/cpu/aarch64/acl_reorder.hpp @@ -95,12 +95,12 @@ struct acl_reorder_fwd_t : public primitive_t { if (!ok) return status::unimplemented; - int mask = -1; - bool is_set = false; - CHECK(attr->scales_.get(DNNL_ARG_DST, &mask, &is_set)); - const memory_desc_wrapper input_d(src_md); - if (input_d.has_runtime_dims_or_strides() && is_set && mask > 0) - return status::unimplemented; + if (!attr->scales_.has_default_values(DNNL_ARG_DST)) { + int mask = attr->scales_.get_mask(DNNL_ARG_DST); + const memory_desc_wrapper input_d(src_md); + if (input_d.has_runtime_dims_or_strides() && mask > 0) + return status::unimplemented; + } // Create and check primitive descriptor auto _pd = make_unique_pd(attr, src_engine->kind(), src_md, diff --git a/src/cpu/aarch64/brgemm/brgemm.cpp b/src/cpu/aarch64/brgemm/brgemm.cpp index 86d1798496e..1cfbc4552c5 100644 --- a/src/cpu/aarch64/brgemm/brgemm.cpp +++ b/src/cpu/aarch64/brgemm/brgemm.cpp @@ -297,21 +297,22 @@ status_t brgemm_desc_set_postops(brgemm_t *brg, const primitive_attr_t *attr, if (brg->with_scales) { // Note. the current version supports only two different output scale // types: - // 1) common (mask_ = 0) + // 1) common (mask = 0) // 2) per_n_dim_scale - broadcast across n dimension; // for convolution and inner product promitives it corresponds - // to "per_oc" mask_ = 1 << 1; for matmul - to - // mask_ = (1 << (ndims - 1))), where ndims is number of + // to "per_oc" mask = 1 << 1; for matmul - to + // mask = (1 << (ndims - 1))), where ndims is number of // dimensions for original matmul problem - // So if wei_scales.mask_ != 0 (not common) it's assumed here that scale - // type is per_n_dim_scale and driver which calls brgemm kernel checked - // that mask has correct value for this case - brg->is_oc_scale = wei_scales.mask_ != 0; + // So if wei_scales.get_mask() > 0 (not common) it's assumed here that + // scale type is per_n_dim_scale and driver which calls brgemm kernel + // checked that mask has correct value for this case + brg->is_oc_scale = wei_scales.get_mask() > 0; } const auto &dst_scales = attr->scales_.get(DNNL_ARG_DST); brg->with_dst_scales = !dst_scales.has_default_values(); - const bool scales_ok = src_scales.mask_ == 0 && dst_scales.mask_ == 0 + const bool scales_ok = src_scales.get_mask() == 0 + && dst_scales.get_mask() == 0 && attr->scales_.has_default_values( {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}); if (!scales_ok) return status::unimplemented; diff --git a/src/cpu/aarch64/jit_brdgmm_dw_conv.cpp b/src/cpu/aarch64/jit_brdgmm_dw_conv.cpp index 24e018aef02..a8be300ddb0 100644 --- a/src/cpu/aarch64/jit_brdgmm_dw_conv.cpp +++ b/src/cpu/aarch64/jit_brdgmm_dw_conv.cpp @@ -200,7 +200,7 @@ status_t brdgmm_dw_convolution_fwd_t::pd_t::init(engine_t *engine) { const auto &wei_scales = attr_.scales_.get(DNNL_ARG_WEIGHTS); jcp.with_scale = !src_scales.has_default_values() || !wei_scales.has_default_values(); - jcp.is_oc_scale = wei_scales.mask_ != 0; + jcp.is_oc_scale = wei_scales.get_mask() > 0; const bool scales_ok = attr_scales_ok({DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}); diff --git a/src/cpu/aarch64/jit_brgemm_conv_utils.cpp b/src/cpu/aarch64/jit_brgemm_conv_utils.cpp index 3b9d3422594..026290c53e9 100644 --- a/src/cpu/aarch64/jit_brgemm_conv_utils.cpp +++ b/src/cpu/aarch64/jit_brgemm_conv_utils.cpp @@ -1993,7 +1993,7 @@ status_t init_conf(jit_brgemm_conv_conf_t &jcp, cpu_isa_t isa, jcp.with_scales = !src_scales.has_default_values() || !wei_scales.has_default_values() || jcp.scale_adjust_factor != 1.0f; - jcp.is_oc_scale = wei_scales.mask_ != 0; + jcp.is_oc_scale = wei_scales.get_mask() > 0; // disables the shape with small ic but large spatial // or specific large spatial shapes for int8 conv @@ -2190,7 +2190,7 @@ status_t init_1x1_conf(jit_brgemm_conv_conf_t &jcp, cpu_isa_t isa, jcp.with_scales = !src_scales.has_default_values() || !wei_scales.has_default_values() || jcp.scale_adjust_factor != 1.0f; - jcp.is_oc_scale = wei_scales.mask_ != 0; + jcp.is_oc_scale = wei_scales.get_mask() > 0; // enable ununroll_bd_loop for big shapes to reduce kernel sizes jcp.ununroll_bd_loop diff --git a/src/cpu/aarch64/jit_brgemm_post_ops.hpp b/src/cpu/aarch64/jit_brgemm_post_ops.hpp index 4257bfe31b8..5aed828a582 100644 --- a/src/cpu/aarch64/jit_brgemm_post_ops.hpp +++ b/src/cpu/aarch64/jit_brgemm_post_ops.hpp @@ -325,8 +325,8 @@ struct jit_brgemm_kernel_post_ops : public jit_generator { const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS); // per_oc: conv: 1 << 0, (1 << 1) + (1 << 0) (with groups) // per_oc: ip: 1 << 0 - is_oc_scale_ - = utils::one_of(wei_scales.mask_, 1 << 0, (1 << 1) + (1 << 0)); + is_oc_scale_ = utils::one_of( + wei_scales.get_mask(), 1 << 0, (1 << 1) + (1 << 0)); LDD_ = brg.LDD; inp_dt_ = brg.dt_c; diff --git a/src/cpu/aarch64/jit_uni_reorder.cpp b/src/cpu/aarch64/jit_uni_reorder.cpp index d6c20904c90..85e9ae4036c 100644 --- a/src/cpu/aarch64/jit_uni_reorder.cpp +++ b/src/cpu/aarch64/jit_uni_reorder.cpp @@ -2788,13 +2788,10 @@ status_t jit_uni_reorder_t::pd_t::init_scratchpad() { compensation_reduce_size); } - const memory_desc_wrapper input_d(src_md()); - int scales_mask = -1; - bool is_set = false; - CHECK(attr()->scales_.get(DNNL_ARG_DST, &scales_mask, &is_set)); - - if (is_set && scales_mask > 0) { - get_D_values(input_d, scales_mask, nullptr, &D_mask_, nullptr); + if (!attr()->scales_.has_default_values(DNNL_ARG_DST)) { + const memory_desc_wrapper input_d(src_md()); + int mask = attr()->scales_.get_mask(DNNL_ARG_DST); + get_D_values(input_d, mask, nullptr, &D_mask_, nullptr); if (D_mask_ > 1) { scratchpad.template book( memory_tracking::names::key_reorder_precomputed_dst_scales, diff --git a/src/cpu/aarch64/jit_uni_reorder_utils.cpp b/src/cpu/aarch64/jit_uni_reorder_utils.cpp index 63fc09a4cf2..0900d59d17d 100644 --- a/src/cpu/aarch64/jit_uni_reorder_utils.cpp +++ b/src/cpu/aarch64/jit_uni_reorder_utils.cpp @@ -276,24 +276,21 @@ status_t prb_init(prb_t &p, const memory_desc_t &imd, const memory_desc_t &omd, p.src_scale_type = scale_type_t::NONE; int src_mask = 0; - bool is_src_set = false; - CHECK(attr->scales_.get(DNNL_ARG_SRC, &src_mask, &is_src_set)); - if (is_src_set) { + if (!attr->scales_.has_default_values(DNNL_ARG_SRC)) { + src_mask = attr->scales_.get_mask(DNNL_ARG_SRC); p.src_scale_type = src_mask == 0 ? scale_type_t::COMMON : scale_type_t::MANY; } p.dst_scale_type = scale_type_t::NONE; int dst_mask = 0; - bool is_dst_set = false; - CHECK(attr->scales_.get(DNNL_ARG_DST, &dst_mask, &is_dst_set)); - if (is_dst_set) { + if (!attr->scales_.has_default_values(DNNL_ARG_DST)) { + dst_mask = attr->scales_.get_mask(DNNL_ARG_DST); p.dst_scale_type = dst_mask == 0 ? scale_type_t::COMMON : scale_type_t::MANY; } - if (is_src_set && is_dst_set && src_mask != dst_mask) - return status::unimplemented; + if (src_mask != dst_mask) return status::unimplemented; p.scale_adjust = (om_d.extra().flags & memory_extra_flags::scale_adjust) ? om_d.extra().scale_adjust diff --git a/src/cpu/aarch64/matmul/acl_lowp_matmul.cpp b/src/cpu/aarch64/matmul/acl_lowp_matmul.cpp index 076d5fd321a..ca826296706 100644 --- a/src/cpu/aarch64/matmul/acl_lowp_matmul.cpp +++ b/src/cpu/aarch64/matmul/acl_lowp_matmul.cpp @@ -66,11 +66,11 @@ status_t acl_lowp_matmul_t::pd_t::init(engine_t *engine) { | smask_t::zero_points_runtime | smask_t::post_ops), "only scale, zero point and post-ops attrs supported"); - VDISPATCH_MATMUL(attr()->scales_.get(DNNL_ARG_SRC).mask_ == 0 + VDISPATCH_MATMUL(attr()->scales_.get_mask(DNNL_ARG_SRC) == 0 && attr()->zero_points_.get(DNNL_ARG_SRC) == 0 - && attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ == 0 + && attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == 0 && attr()->zero_points_.get(DNNL_ARG_WEIGHTS) == 0 - && attr()->scales_.get(DNNL_ARG_DST).mask_ == 0 + && attr()->scales_.get_mask(DNNL_ARG_DST) == 0 && attr()->zero_points_.get(DNNL_ARG_DST) == 0, "common scales and zero points only"); diff --git a/src/cpu/aarch64/matmul/acl_lowp_matmul_sq.cpp b/src/cpu/aarch64/matmul/acl_lowp_matmul_sq.cpp index bdb4e04ce74..8b16545dd9b 100644 --- a/src/cpu/aarch64/matmul/acl_lowp_matmul_sq.cpp +++ b/src/cpu/aarch64/matmul/acl_lowp_matmul_sq.cpp @@ -52,13 +52,20 @@ status_t acl_lowp_matmul_sq_t::pd_t::init(engine_t *engine) { attr()->has_default_values(smask_t::scales_runtime | smask_t::zero_points_runtime | smask_t::post_ops), "only scale, zero point and post-ops attrs supported"); - VDISPATCH_MATMUL(attr()->scales_.get(DNNL_ARG_SRC).mask_ == 0 - && attr()->zero_points_.get(DNNL_ARG_SRC) == 0 - && attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ == 0 + + static const std::vector supported_args { + DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}; + for (int arg : supported_args) { + if (attr()->scales_.has_default_values(arg)) continue; + + VDISPATCH_MATMUL(attr()->scales_.get_mask(arg) == 0, + VERBOSE_UNSUPPORTED_SCALES_CFG); + } + + VDISPATCH_MATMUL(attr()->zero_points_.get(DNNL_ARG_SRC) == 0 && attr()->zero_points_.get(DNNL_ARG_WEIGHTS) == 0 - && attr()->scales_.get(DNNL_ARG_DST).mask_ == 0 && attr()->zero_points_.get(DNNL_ARG_DST) == 0, - "common scales and zero points only"); + "common zero points only"); VDISPATCH_MATMUL( !has_runtime_dims_or_strides(), VERBOSE_RUNTIMEDIM_UNSUPPORTED); const memory_desc_wrapper src_d(src_md_); @@ -214,4 +221,4 @@ status_t acl_lowp_matmul_sq_t::execute(const exec_ctx_t &ctx) const { } // namespace aarch64 } // namespace cpu } // namespace impl -} // namespace dnnl \ No newline at end of file +} // namespace dnnl diff --git a/src/cpu/aarch64/matmul/brgemm_matmul.cpp b/src/cpu/aarch64/matmul/brgemm_matmul.cpp index 7ede5613803..ae7255c9b5e 100644 --- a/src/cpu/aarch64/matmul/brgemm_matmul.cpp +++ b/src/cpu/aarch64/matmul/brgemm_matmul.cpp @@ -72,9 +72,9 @@ status_t brgemm_matmul_t::pd_t::init(engine_t *engine) { const std::vector supported_args = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}; bool ok = attr_scales_ok(supported_args); - if (!attr()->scales_.get(DNNL_ARG_SRC).has_default_values() + if (!attr()->scales_.has_default_values(DNNL_ARG_SRC) && !attr()->scales_.get(DNNL_ARG_WEIGHTS).has_default_values() - && attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ != 0) { + && attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) > 0) { // This case requires scratchpad if (N() == DNNL_RUNTIME_DIM_VAL) ok = false; } diff --git a/src/cpu/aarch64/matmul/brgemm_matmul_utils.cpp b/src/cpu/aarch64/matmul/brgemm_matmul_utils.cpp index 1506be014a9..0610147c752 100644 --- a/src/cpu/aarch64/matmul/brgemm_matmul_utils.cpp +++ b/src/cpu/aarch64/matmul/brgemm_matmul_utils.cpp @@ -783,21 +783,22 @@ status_t init_brgemm_matmul_conf(cpu_isa_t isa, brgemm_matmul_conf_t &bgmmc, const auto &src_scales = attr.scales_.get(DNNL_ARG_SRC); const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS); - bgmmc.with_scales = !src_scales.has_default_values() - || !wei_scales.has_default_values(); - if (bgmmc.with_scales) { - bgmmc.is_oscale_per_n = wei_scales.mask_ == 1 << (bgmmc.ndims - 1); + const bool has_wei_scales = !wei_scales.has_default_values(); + bgmmc.with_scales = !src_scales.has_default_values() || has_wei_scales; + if (has_wei_scales) { + bgmmc.is_oscale_per_n + = wei_scales.get_mask() == (1 << (bgmmc.ndims - 1)); // only common and per-oc-channel scales are supported - VCONDCHECK_BG(wei_scales.mask_ == 0 || bgmmc.is_oscale_per_n, + VCONDCHECK_BG(wei_scales.get_mask() == 0 || bgmmc.is_oscale_per_n, VERBOSE_UNSUPPORTED_SCALES_CFG); } const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST); bgmmc.with_dst_scales = !dst_scales.has_default_values(); // only common scales are supported - if (bgmmc.with_dst_scales && dst_scales.mask_ != 0) - return status::unimplemented; + VCONDCHECK_BG(!(bgmmc.with_dst_scales && dst_scales.get_mask() > 0), + VERBOSE_UNSUPPORTED_SCALES_CFG); const auto &p = attr.post_ops_; bgmmc.with_sum = p.find(primitive_kind::sum) != -1; diff --git a/src/cpu/cpu_primitive.hpp b/src/cpu/cpu_primitive.hpp index ac1dccd4181..d82b9ae0d92 100644 --- a/src/cpu/cpu_primitive.hpp +++ b/src/cpu/cpu_primitive.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,7 @@ alignas(16) float CONCAT2(scales, _buf16)[16] = {0}; \ const float *scales {nullptr}; \ if ((attr)) { \ - if ((attr)->scales_.get(arg).has_default_values()) { \ + if ((attr)->scales_.has_default_values(arg)) { \ utils::array_set(CONCAT2(scales, _buf16), 1.0f, 16); \ scales = CONCAT2(scales, _buf16); \ } else { \ @@ -92,7 +92,7 @@ #define ASSIGN_ARG_SCALE_VALUE(scale, mem_arg) \ alignas(16) float CONCAT2(CONCAT2(scales, _buf16), mem_arg)[16] = {0}; \ - if (pd()->attr()->scales_.get(mem_arg).has_default_values()) { \ + if (pd()->attr()->scales_.has_default_values(mem_arg)) { \ utils::array_set(CONCAT2(CONCAT2(scales, _buf16), mem_arg), 1.0f, 16); \ scale = CONCAT2(CONCAT2(scales, _buf16), mem_arg); \ } else { \ diff --git a/src/cpu/dw_convolution_utils.hpp b/src/cpu/dw_convolution_utils.hpp index 088e01b9964..03bb6ce43f5 100644 --- a/src/cpu/dw_convolution_utils.hpp +++ b/src/cpu/dw_convolution_utils.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2023 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,23 +43,19 @@ inline status_t get_depthwise_conv_desc(convolution_desc_t &cd_dw, // post-ops after depthwise post-op. auto &dw_po = attr_1x1.post_ops_.entry_[dw_po_index].depthwise_conv; - // erase 1x1 conv scales - for (auto arg : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) { - auto &scale = attr_dw.scales_.get(arg); - if (!scale.has_default_values()) attr_dw.scales_.reset(arg); - } - const auto &dw_src_scales = attr_1x1.scales_.get(DNNL_ARG_DST); const auto &dw_wei_scales = attr_1x1.scales_.get(DNNL_ARG_ATTR_POST_OP_DW | DNNL_ARG_WEIGHTS); const auto &dw_dst_scales = attr_1x1.scales_.get(DNNL_ARG_ATTR_POST_OP_DW | DNNL_ARG_DST); + + assert(attr_dw.scales_.has_default_values()); if (!dw_src_scales.has_default_values()) - attr_dw.scales_.set(DNNL_ARG_SRC, dw_src_scales.mask_); + CHECK(attr_dw.scales_.set(DNNL_ARG_SRC, dw_src_scales.get_mask())); if (!dw_wei_scales.has_default_values()) - attr_dw.scales_.set(DNNL_ARG_WEIGHTS, dw_wei_scales.mask_); + CHECK(attr_dw.scales_.set(DNNL_ARG_WEIGHTS, dw_wei_scales.get_mask())); if (!dw_dst_scales.has_default_values()) - attr_dw.scales_.set(DNNL_ARG_DST, dw_dst_scales.mask_); + CHECK(attr_dw.scales_.set(DNNL_ARG_DST, dw_dst_scales.get_mask())); auto dw_po_len = attr_1x1.post_ops_.len() - (dw_po_index + 1); attr_dw.post_ops_.entry_.resize(dw_po_len); diff --git a/src/cpu/gemm_convolution_utils.cpp b/src/cpu/gemm_convolution_utils.cpp index 01b90f5fc41..0400b66608c 100644 --- a/src/cpu/gemm_convolution_utils.cpp +++ b/src/cpu/gemm_convolution_utils.cpp @@ -2133,8 +2133,8 @@ status_t init_conf(conv_gemm_conf_t &jcp, jcp.dst_os_stride = dst_d.is_blocking_desc() ? dst_d.blocking_desc().strides[ndims - 1] : 0; - jcp.scale_idx_mult = attr.scales_.get(DNNL_ARG_WEIGHTS).mask_ != 0; - jcp.with_dst_scale = !attr.scales_.get(DNNL_ARG_DST).has_default_values(); + jcp.scale_idx_mult = attr.scales_.get_mask(DNNL_ARG_WEIGHTS) > 0; + jcp.with_dst_scale = !attr.scales_.has_default_values(DNNL_ARG_DST); book_precomputed_scales(scratchpad, attr.scales_, jcp.ngroups * jcp.oc); if (jcp.zp.src_exists) { diff --git a/src/cpu/gemm_inner_product_utils.cpp b/src/cpu/gemm_inner_product_utils.cpp index 815e953898b..2d637d543cf 100644 --- a/src/cpu/gemm_inner_product_utils.cpp +++ b/src/cpu/gemm_inner_product_utils.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2023 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -157,17 +157,17 @@ pp_kernel_t::pp_kernel_t(size_t OC, size_t MB, dim_t dst_mb_stride, , bias_data_type_(bias_dt) , acc_data_type_(acc_dt) , dst_data_type_(dst_md->data_type) - , do_scale_(!attr->scales_.get(DNNL_ARG_SRC).has_default_values() - || !attr->scales_.get(DNNL_ARG_WEIGHTS).has_default_values()) + , do_scale_(!attr->scales_.has_default_values(DNNL_ARG_SRC) + || !attr->scales_.has_default_values(DNNL_ARG_WEIGHTS)) , ndims_(dst_md->ndims) { - if (do_scale_) { - int wei_mask = attr->scales_.get(DNNL_ARG_WEIGHTS).mask_; + if (!attr->scales_.has_default_values(DNNL_ARG_WEIGHTS)) { + int wei_mask = attr->scales_.get_mask(DNNL_ARG_WEIGHTS); // matmul: per_oc: 1 << (ndims_ - 1) // ip: per_oc: 1 << 0 scale_idx_mult_ = wei_mask == (1 << (ndims_ - 1)) || wei_mask == 1 << 0; } - do_dst_scale_ = !attr->scales_.get(DNNL_ARG_DST).has_default_values(); + do_dst_scale_ = !attr->scales_.has_default_values(DNNL_ARG_DST); post_ops_ = attr->post_ops_; const int eltwise_ind = post_ops_.find(primitive_kind::eltwise); diff --git a/src/cpu/gemm_x8s8s32x_convolution.cpp b/src/cpu/gemm_x8s8s32x_convolution.cpp index 8482ae65eb0..53119b38915 100644 --- a/src/cpu/gemm_x8s8s32x_convolution.cpp +++ b/src/cpu/gemm_x8s8s32x_convolution.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2017-2024 Intel Corporation +* Copyright 2017-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -135,10 +135,9 @@ status_t gemm_x8s8s32x_convolution_fwd_t::execute_forward( DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS); DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const float *scales = precompute_scales(scratchpad, src_scales, wei_scales, - pd()->IC(), pd()->OC(), false, wei_scale_mask != 0, pd()->attr()); + pd()->IC(), pd()->OC(), false, wei_scale_mask > 0, pd()->attr()); parallel(jcp.nthr, [&](const int ithr, const int nthr) { status_t st_thr = execute_forward_thr(ithr, nthr, src_base, wei_base, @@ -358,16 +357,15 @@ status_t gemm_x8s8s32x_convolution_bwd_data_t::execute_backward_data_thr( const auto diff_src_dt_size = types::data_type_size(diff_src_md.data_type()); - const int scale_idx_mult = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ + const int scale_idx_mult = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == (1 << static_cast(pd()->with_groups())); DEFINE_ARG_SCALES_BUFFER(src_scales, DNNL_ARG_SRC); DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS); DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const float *scales = precompute_scales(scratchpad, src_scales, wei_scales, - pd()->IC(), pd()->OC(), false, wei_scale_mask != 0, pd()->attr()); + pd()->IC(), pd()->OC(), false, wei_scale_mask > 0, pd()->attr()); const dim_t work_amount = jcp.ngroups * jcp.mb; diff --git a/src/cpu/gemm_x8s8s32x_inner_product.cpp b/src/cpu/gemm_x8s8s32x_inner_product.cpp index 341a584a276..cad125ea7be 100644 --- a/src/cpu/gemm_x8s8s32x_inner_product.cpp +++ b/src/cpu/gemm_x8s8s32x_inner_product.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2018-2024 Intel Corporation +* Copyright 2018-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -64,10 +64,9 @@ status_t gemm_x8s8s32x_inner_product_fwd_t::execute_forward( DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST); auto scratchpad = ctx.get_scratchpad_grantor(); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const float *scales = precompute_scales(scratchpad, src_scales, wei_scales, - IC, OC, false, wei_scale_mask != 0, pd()->attr()); + IC, OC, false, wei_scale_mask > 0, pd()->attr()); int32_t *acc = pd()->dst_is_acc_ ? (int32_t *)dst diff --git a/src/cpu/matmul/gemm_bf16_matmul.cpp b/src/cpu/matmul/gemm_bf16_matmul.cpp index a9e32a34e5f..7758a3db4ed 100644 --- a/src/cpu/matmul/gemm_bf16_matmul.cpp +++ b/src/cpu/matmul/gemm_bf16_matmul.cpp @@ -105,9 +105,9 @@ status_t gemm_bf16_matmul_t::pd_t::check_and_configure_attributes( engine_t *engine) { auto check_attr_scales = [&]() -> bool { bool ok = attr_scales_ok(); - if (!attr()->scales_.get(DNNL_ARG_SRC).has_default_values() - && !attr()->scales_.get(DNNL_ARG_WEIGHTS).has_default_values() - && attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ != 0) { + if (!attr()->scales_.has_default_values(DNNL_ARG_SRC) + && !attr()->scales_.has_default_values(DNNL_ARG_WEIGHTS) + && attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) > 0) { // This case requires scratchpad with unknown size if (N() == DNNL_RUNTIME_DIM_VAL) ok = false; } @@ -145,11 +145,15 @@ status_t gemm_bf16_matmul_t::pd_t::check_and_configure_attributes( // set state CHECK(params_.pp_attr_.copy_from(*attr())); params_.gemm_applies_output_scales_ - = attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ == 0 && !with_bias(); + = attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == 0 && !with_bias(); if (params_.gemm_applies_output_scales_) { - params_.pp_attr_.scales_.reset(DNNL_ARG_SRC); - params_.pp_attr_.scales_.reset(DNNL_ARG_WEIGHTS); + VDISPATCH_MATMUL_SC(params_.pp_attr_.scales_.set( + DNNL_ARG_SRC, default_quant_entry()), + VERBOSE_UNSUPPORTED_SCALES_CFG); + VDISPATCH_MATMUL_SC(params_.pp_attr_.scales_.set( + DNNL_ARG_WEIGHTS, default_quant_entry()), + VERBOSE_UNSUPPORTED_SCALES_CFG); } // check post-ops @@ -203,11 +207,10 @@ status_t gemm_bf16_matmul_t::execute_ref( DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST); auto scratchpad = ctx.get_scratchpad_grantor(); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const float *scales = precompute_scales(scratchpad, src_scales, wei_scales, src_d.dims()[ndims - 1], dst_d.dims()[ndims - 1], false, - wei_scale_mask != 0, pd()->attr()); + wei_scale_mask > 0, pd()->attr()); if (src_d.has_zero_dim() || weights_d.has_zero_dim() || dst_d.has_zero_dim()) @@ -254,7 +257,7 @@ status_t gemm_bf16_matmul_t::execute_ref( const float beta = params.gemm_beta_; const dim_t acc_ldc = dst_is_acc ? ldc : N; const int scale_idx_mult - = this->pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ + = this->pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == (1 << (ndims - 1)); std::atomic st(status::success); diff --git a/src/cpu/matmul/gemm_f32_matmul.cpp b/src/cpu/matmul/gemm_f32_matmul.cpp index de57af38944..d44fd2eb0aa 100644 --- a/src/cpu/matmul/gemm_f32_matmul.cpp +++ b/src/cpu/matmul/gemm_f32_matmul.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,9 +50,9 @@ status_t gemm_f32_matmul_t::pd_t::init(engine_t *engine) { auto check_attr_scales = [&]() -> bool { bool ok = attr_scales_ok(); - if (!attr()->scales_.get(DNNL_ARG_SRC).has_default_values() + if (!attr()->scales_.has_default_values(DNNL_ARG_SRC) && !attr()->scales_.get(DNNL_ARG_WEIGHTS).has_default_values() - && attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ != 0) { + && attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) > 0) { // This case requires scratchpad with unknown size if (N() == DNNL_RUNTIME_DIM_VAL) ok = false; } @@ -131,10 +131,14 @@ status_t gemm_f32_matmul_t::pd_t::configure_attributes() { CHECK(params_.pp_attr_.copy_from(*attr())); params_.gemm_applies_output_scales_ - = attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ == 0 && !with_bias(); + = attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == 0 && !with_bias(); if (params_.gemm_applies_output_scales_) { - params_.pp_attr_.scales_.reset(DNNL_ARG_SRC); - params_.pp_attr_.scales_.reset(DNNL_ARG_WEIGHTS); + VDISPATCH_MATMUL_SC(params_.pp_attr_.scales_.set( + DNNL_ARG_SRC, default_quant_entry()), + VERBOSE_UNSUPPORTED_SCALES_CFG); + VDISPATCH_MATMUL_SC(params_.pp_attr_.scales_.set( + DNNL_ARG_WEIGHTS, default_quant_entry()), + VERBOSE_UNSUPPORTED_SCALES_CFG); } const auto &po = params_.pp_attr_.post_ops_; @@ -186,11 +190,10 @@ status_t gemm_f32_matmul_t::execute_ref(const exec_ctx_t &ctx) const { DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST); auto scratchpad = ctx.get_scratchpad_grantor(); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const float *scales = precompute_scales(scratchpad, src_scales, wei_scales, src_d.dims()[ndims - 1], dst_d.dims()[ndims - 1], false, - wei_scale_mask != 0, pd()->attr()); + wei_scale_mask > 0, pd()->attr()); if (src_d.has_zero_dim() || weights_d.has_zero_dim() || dst_d.has_zero_dim()) @@ -237,7 +240,7 @@ status_t gemm_f32_matmul_t::execute_ref(const exec_ctx_t &ctx) const { const dim_t acc_ldc = dst_is_acc ? ldc : N; const int scale_idx_mult - = this->pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ + = this->pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == (1 << (ndims - 1)); std::atomic st(status::success); diff --git a/src/cpu/matmul/gemm_x8s8s32x_matmul.cpp b/src/cpu/matmul/gemm_x8s8s32x_matmul.cpp index 5b9f7de0987..2483b25d048 100644 --- a/src/cpu/matmul/gemm_x8s8s32x_matmul.cpp +++ b/src/cpu/matmul/gemm_x8s8s32x_matmul.cpp @@ -61,9 +61,9 @@ status_t gemm_x8s8s32x_matmul_t::pd_t::init(engine_t *engine) { auto check_attr_scales = [&]() -> bool { bool ok = attr_scales_ok(); - if (!attr()->scales_.get(DNNL_ARG_SRC).has_default_values() - && !attr()->scales_.get(DNNL_ARG_WEIGHTS).has_default_values() - && attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ != 0) { + if (!attr()->scales_.has_default_values(DNNL_ARG_SRC) + && !attr()->scales_.has_default_values(DNNL_ARG_WEIGHTS) + && attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) > 0) { // This case requires scratchpad with unknown size if (N() == DNNL_RUNTIME_DIM_VAL) ok = false; } @@ -203,11 +203,10 @@ status_t gemm_x8s8s32x_matmul_t::execute_ref(const exec_ctx_t &ctx) const { DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST); auto &scratchpad = ctx.get_scratchpad_grantor(); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const float *scales = precompute_scales(scratchpad, src_scales, wei_scales, src_d.dims()[ndims - 1], dst_d.dims()[ndims - 1], false, - wei_scale_mask != 0, pd()->attr()); + wei_scale_mask > 0, pd()->attr()); DEFINE_ZERO_POINT_VALUE(src_zero_point, DNNL_ARG_SRC); DEFINE_ZERO_POINT_VALUE(weights_zero_point, DNNL_ARG_WEIGHTS); @@ -276,7 +275,7 @@ status_t gemm_x8s8s32x_matmul_t::execute_ref(const exec_ctx_t &ctx) const { const float beta = params.gemm_beta_; const dim_t acc_ldc = dst_is_acc ? ldc : N; const int scale_idx_mult - = this->pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ + = this->pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == (1 << (ndims - 1)); std::atomic st(status::success); diff --git a/src/cpu/matmul/matmul_utils.hpp b/src/cpu/matmul/matmul_utils.hpp index 996683c522d..2712e000189 100644 --- a/src/cpu/matmul/matmul_utils.hpp +++ b/src/cpu/matmul/matmul_utils.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2022 Arm Ltd. and affiliates * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -156,6 +156,11 @@ struct matmul_helper_t { static status_t get_quant_md(memory_desc_t &md, const int ndims, const dims_t in_dims, const int quant_mask, const dim_t g0, const dim_t g1, const data_type_t dt) { + if (dt == data_type::undef) { + md = glob_zero_md; + return status::success; + } + dims_t quant_dims {}; utils::copy_dims_with_mask(quant_dims, in_dims, ndims, quant_mask, /* fill_with_ones = */ true); @@ -172,6 +177,8 @@ struct matmul_helper_t { static dim_t get_quant_off(const dims_t &input_idx, const int ndims, const int quant_mask, const dim_t g0, const dim_t g1, const memory_desc_t &quant_md) { + if (types::is_zero_md(&quant_md)) return 0; + dims_t quant_idx {}; utils::array_copy(quant_idx, input_idx, ndims); utils::apply_mask_on_dims(quant_idx, ndims, quant_mask); diff --git a/src/cpu/matmul/ref_matmul.cpp b/src/cpu/matmul/ref_matmul.cpp index d81b63a2b4b..8b6a02a5494 100644 --- a/src/cpu/matmul/ref_matmul.cpp +++ b/src/cpu/matmul/ref_matmul.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -112,25 +112,18 @@ status_t ref_matmul_t::execute_ref(const exec_ctx_t &ctx) const { // arg scales section const auto &attr_scales = pd()->attr()->scales_; - const bool with_src_scales - = !attr_scales.get(DNNL_ARG_SRC).has_default_values(); + const bool with_src_scales = !attr_scales.has_default_values(DNNL_ARG_SRC); const bool with_wei_scales - = !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_values(); - const bool with_dst_scales - = !attr_scales.get(DNNL_ARG_DST).has_default_values(); - const auto wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_; + = !attr_scales.has_default_values(DNNL_ARG_WEIGHTS); + const bool with_dst_scales = !attr_scales.has_default_values(DNNL_ARG_DST); + const auto wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS); const dim_t wei_scale_stride_n = (wei_scale_mask & pd()->wei_qmask_N()) ? 1 : 0; - const auto &wei_scale_dt = attr_scales.get(DNNL_ARG_WEIGHTS).data_type_; + const auto &wei_scale_dt = attr_scales.get_data_type(DNNL_ARG_WEIGHTS); const auto wei_scales_d = ctx.memory_mdw(DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS); - const auto wei_scale_group_ndim = attr_scales.get(DNNL_ARG_WEIGHTS).ndims_; - const auto wei_scale_group_k = wei_scale_group_ndim > 0 - ? attr_scales.get(DNNL_ARG_WEIGHTS).group_dims_[0] - : 1; - const auto wei_scale_group_n = wei_scale_group_ndim > 0 - ? attr_scales.get(DNNL_ARG_WEIGHTS).group_dims_[1] - : 1; + const auto wei_scale_group_k = attr_scales.get_group(DNNL_ARG_WEIGHTS, 0); + const auto wei_scale_group_n = attr_scales.get_group(DNNL_ARG_WEIGHTS, 1); // Initialize a memory desc for quant entries for easier offset calculation. memory_desc_t wei_scale_md {}; CHECK(matmul_helper_t::get_quant_md(wei_scale_md, ndims, weights_d.dims(), diff --git a/src/cpu/matmul/ref_matmul_int8.cpp b/src/cpu/matmul/ref_matmul_int8.cpp index bbe4804994f..bc5c3744c46 100644 --- a/src/cpu/matmul/ref_matmul_int8.cpp +++ b/src/cpu/matmul/ref_matmul_int8.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2021-2024 Intel Corporation +* Copyright 2021-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -124,19 +124,12 @@ status_t ref_matmul_int8_t::execute_ref(const exec_ctx_t &ctx) const { // arg scales section const auto &attr_scales = pd()->attr()->scales_; const bool with_wei_scales - = !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_values(); - const bool with_dst_scales - = !attr_scales.get(DNNL_ARG_DST).has_default_values(); - const int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_; + = !attr_scales.has_default_values(DNNL_ARG_WEIGHTS); + const bool with_dst_scales = !attr_scales.has_default_values(DNNL_ARG_DST); + const int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS); const auto &wei_scale_dt = attr_scales.get_data_type(DNNL_ARG_WEIGHTS); - const bool wei_scale_per_k = wei_scale_mask & pd()->wei_qmask_K(); - const auto wei_scale_group_ndim = attr_scales.get(DNNL_ARG_WEIGHTS).ndims_; - const auto wei_scale_group_k = wei_scale_group_ndim > 0 - ? attr_scales.get(DNNL_ARG_WEIGHTS).group_dims_[0] - : (wei_scale_per_k ? 1 : K); - const auto wei_scale_group_n = wei_scale_group_ndim > 0 - ? attr_scales.get(DNNL_ARG_WEIGHTS).group_dims_[1] - : 1; + const auto wei_scale_group_k = attr_scales.get_group(DNNL_ARG_WEIGHTS, 0); + const auto wei_scale_group_n = attr_scales.get_group(DNNL_ARG_WEIGHTS, 1); const auto wei_scale_ngroups_k = K / wei_scale_group_k; // Initialize a memory desc for quant entries for easier offset calculation. memory_desc_t wei_scale_md {}; @@ -144,15 +137,10 @@ status_t ref_matmul_int8_t::execute_ref(const exec_ctx_t &ctx) const { wei_scale_mask, wei_scale_group_k, wei_scale_group_n, wei_scale_dt)); - const bool with_src_scales - = !attr_scales.get(DNNL_ARG_SRC).has_default_values(); - const int src_scale_mask = attr_scales.get(DNNL_ARG_SRC).mask_; + const bool with_src_scales = !attr_scales.has_default_values(DNNL_ARG_SRC); + const int src_scale_mask = attr_scales.get_mask(DNNL_ARG_SRC); const auto &src_scale_dt = attr_scales.get_data_type(DNNL_ARG_SRC); - const bool src_scale_per_k = src_scale_mask & pd()->src_qmask_K(); - const auto src_scale_group_ndim = attr_scales.get(DNNL_ARG_SRC).ndims_; - const auto src_scale_group_k = src_scale_group_ndim > 0 - ? attr_scales.get(DNNL_ARG_SRC).group_dims_[1] - : (src_scale_per_k ? 1 : K); + const auto src_scale_group_k = attr_scales.get_group(DNNL_ARG_SRC, 1); const auto src_scale_ngroups_k = K / src_scale_group_k; // Initialize a memory desc for quant entries for easier offset calculation. memory_desc_t src_scale_md {}; diff --git a/src/cpu/ref_concat.hpp b/src/cpu/ref_concat.hpp index 516e2ff0aac..2cf58d56078 100644 --- a/src/cpu/ref_concat.hpp +++ b/src/cpu/ref_concat.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2017-2024 Intel Corporation +* Copyright 2017-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,11 +58,10 @@ struct ref_concat_t : public primitive_t { reorder_pds_.resize(n_ + use_tent_dst()); for (int i = 0; i < n_; ++i) { primitive_attr_t r_attr; - if (!sc.get(DNNL_ARG_MULTIPLE_SRC + i).has_default_values()) { - int mask = 0; - CHECK(sc.get(DNNL_ARG_MULTIPLE_SRC + i, &mask, nullptr)); - if (mask != 0) return status::unimplemented; - r_attr.scales_.set(DNNL_ARG_SRC, mask); + if (!sc.has_default_values(DNNL_ARG_MULTIPLE_SRC + i)) { + int mask = sc.get_mask(DNNL_ARG_MULTIPLE_SRC + i); + VDISPATCH_CONCAT(mask == 0, VERBOSE_UNSUPPORTED_SCALES_CFG); + CHECK(r_attr.scales_.set(DNNL_ARG_SRC, mask)); } CHECK(reorder_primitive_desc_create(reorder_pds_[i], engine, src_md(i), src_image_md(i), &r_attr)); diff --git a/src/cpu/ref_convolution_int8.cpp b/src/cpu/ref_convolution_int8.cpp index b1c99eb8cda..c54a52372ca 100644 --- a/src/cpu/ref_convolution_int8.cpp +++ b/src/cpu/ref_convolution_int8.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2021-2023 Intel Corporation +* Copyright 2021-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,7 +35,7 @@ namespace { void dequantize(float &d, dim_t g, dim_t C, dim_t c, const float *wei_scales, bool with_groups, int wei_mask, const float *src_scales) { // scale_idx_mult = 1 for per_channel scales and 0, otherwise - const int wei_scale_idx_mult = wei_mask != 0; + const int wei_scale_idx_mult = wei_mask > 0; float scale = 1.0f; if (src_scales) scale *= src_scales[0]; if (wei_scales) scale *= wei_scales[(g * C + c) * wei_scale_idx_mult]; @@ -63,8 +63,7 @@ status_t ref_convolution_int8_fwd_t::execute_forward( DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS); DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); DEFINE_ZERO_POINTS_BUFFER(src_zero_point, DNNL_ARG_SRC); DEFINE_ZERO_POINTS_BUFFER(dst_zero_point, DNNL_ARG_DST); @@ -290,8 +289,7 @@ status_t ref_convolution_int8_bwd_data_t::execute_backward_data( DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS); DEFINE_ARG_SCALES_BUFFER(diff_dst_scales, DNNL_ARG_DST); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const memory_desc_wrapper diff_dst_d(pd()->diff_dst_md()); const memory_desc_wrapper diff_src_d(pd()->diff_src_md()); diff --git a/src/cpu/ref_deconvolution.cpp b/src/cpu/ref_deconvolution.cpp index 08c2304e675..5771bb5d36d 100644 --- a/src/cpu/ref_deconvolution.cpp +++ b/src/cpu/ref_deconvolution.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2018-2024 Intel Corporation +* Copyright 2018-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -174,8 +174,7 @@ status_t ref_deconvolution_fwd_t::compute_oscale( DEFINE_ARG_SCALES_BUFFER(src_scales, DNNL_ARG_SRC); DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const memory_desc_wrapper dst_d(pd()->dst_md()); @@ -190,7 +189,7 @@ status_t ref_deconvolution_fwd_t::compute_oscale( const auto maybe_oscale = [](float &d, dim_t oc, const float *src_scales, const float *wei_scales, int wei_mask) { // scale_idx_mult = 1 for per_oc scales and 0, otherwise - const int wei_scale_idx_mult = wei_mask != 0; + const int wei_scale_idx_mult = wei_mask > 0; d *= src_scales[0] * wei_scales[oc * wei_scale_idx_mult]; }; @@ -216,7 +215,7 @@ status_t ref_deconvolution_fwd_t::compute_ref_attrs(const exec_ctx_t &ctx, auto dst = CTX_OUT_MEM(void *, DNNL_ARG_DST); DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST); - const int dst_scale_mask = pd()->attr()->scales_.get(DNNL_ARG_DST).mask_; + const int dst_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_DST); DEFINE_ZERO_POINTS_BUFFER(dst_zero_point, DNNL_ARG_DST); const bool is_dst_zp_common @@ -242,7 +241,7 @@ status_t ref_deconvolution_fwd_t::compute_ref_attrs(const exec_ctx_t &ctx, const auto maybe_scale = [](float &d, dim_t oc, const float *scales, int mask) { // scale_idx_mult = 1 for per_oc scales and 0, otherwise - const int scale_idx_mult = mask != 0; + const int scale_idx_mult = mask > 0; d *= scales[oc * scale_idx_mult]; }; @@ -536,11 +535,10 @@ status_t ref_deconvolution_fwd_t::execute(const exec_ctx_t &ctx) const { float *conv_output = scratchpad.get(key_deconv_bias); - const auto &arg_scales = pd()->attr()->scales_; - const auto &src_scales = arg_scales.get(DNNL_ARG_SRC); - const auto &wei_scales = arg_scales.get(DNNL_ARG_WEIGHTS); + const auto &scales = pd()->attr()->scales_; - if (!src_scales.has_default_values() || !wei_scales.has_default_values()) { + if (!scales.has_default_values(DNNL_ARG_SRC) + || !scales.has_default_values(DNNL_ARG_WEIGHTS)) { compute_oscale(ctx, conv_output); } diff --git a/src/cpu/ref_fused_convolution.hpp b/src/cpu/ref_fused_convolution.hpp index 1e00b56d576..0fddedd5628 100644 --- a/src/cpu/ref_fused_convolution.hpp +++ b/src/cpu/ref_fused_convolution.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2022 Arm Ltd. and affiliates * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -224,10 +224,10 @@ struct ref_fused_convolution_fwd_t : public primitive_t { primitive_attr_t attr_1x1(*attr()); // erase dw_conv post-op scales for (auto arg : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) { - auto &scale - = attr_1x1.scales_.get(DNNL_ARG_ATTR_POST_OP_DW | arg); - if (!scale.has_default_values()) - attr_1x1.scales_.reset(DNNL_ARG_ATTR_POST_OP_DW | arg); + if (!attr_1x1.scales_.has_default_values( + DNNL_ARG_ATTR_POST_OP_DW | arg)) + CHECK(attr_1x1.scales_.set(DNNL_ARG_ATTR_POST_OP_DW | arg, + default_quant_entry())); } // erase post-ops after fusion as they will be handled separately auto &e = attr_1x1.post_ops_.entry_; @@ -250,7 +250,7 @@ struct ref_fused_convolution_fwd_t : public primitive_t { arg_cache.append_ctx_arg(DNNL_ARG_SRC); arg_cache.append_ctx_arg(DNNL_ARG_WEIGHTS); for (auto arg : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) - if (!attr_1x1.scales_.get(arg).has_default_values()) + if (!attr_1x1.scales_.has_default_values(arg)) arg_cache.append_ctx_arg(DNNL_ARG_ATTR_SCALES | arg); if (desc()->bias_desc.data_type != data_type::undef) arg_cache.append_ctx_arg(DNNL_ARG_BIAS); @@ -316,12 +316,12 @@ struct ref_fused_convolution_fwd_t : public primitive_t { arg_cache.append_ctx_arg(DNNL_ARG_WEIGHTS, DNNL_ARG_ATTR_POST_OP_DW | DNNL_ARG_WEIGHTS); for (auto arg : {DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) - if (!attr_dw.scales_.get(arg).has_default_values()) + if (!attr_dw.scales_.has_default_values(arg)) arg_cache.append_ctx_arg(DNNL_ARG_ATTR_SCALES | arg, DNNL_ARG_ATTR_POST_OP_DW | DNNL_ARG_ATTR_SCALES | arg); // dw_conv src_scale = 1x1_conv dst_scale - if (!attr_1x1.scales_.get(DNNL_ARG_DST).has_default_values()) + if (!attr_1x1.scales_.has_default_values(DNNL_ARG_DST)) arg_cache.append_ctx_arg( DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC, DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST); diff --git a/src/cpu/ref_inner_product_int8.cpp b/src/cpu/ref_inner_product_int8.cpp index 91198c680ab..322f39da638 100644 --- a/src/cpu/ref_inner_product_int8.cpp +++ b/src/cpu/ref_inner_product_int8.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2021-2023 Intel Corporation +* Copyright 2021-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -76,13 +76,12 @@ status_t ref_inner_product_int8_fwd_t::execute_forward( DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST); const auto &attr_scales = pd()->attr()->scales_; - const bool with_dst_scales - = !attr_scales.get(DNNL_ARG_DST).has_default_values(); + const bool with_dst_scales = !attr_scales.has_default_values(DNNL_ARG_DST); auto maybe_oscale = [&](float &d, dim_t oc) { // scale_idx_mult = 1 for per_oc scales and 0, otherwise const int scale_idx_mult - = attr_scales.get(DNNL_ARG_WEIGHTS).mask_ == (1 << 0); + = attr_scales.get_mask(DNNL_ARG_WEIGHTS) == (1 << 0); d *= src_scales[0] * wei_scales[oc * scale_idx_mult]; }; diff --git a/src/cpu/ref_sum.hpp b/src/cpu/ref_sum.hpp index 7256114d685..e0e2cfb8669 100644 --- a/src/cpu/ref_sum.hpp +++ b/src/cpu/ref_sum.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2017-2024 Intel Corporation +* Copyright 2017-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,7 +46,7 @@ struct ref_sum_t : public primitive_t { reorder_pds_.resize(n_ + need_output_reorder()); for (int i = 0; i < n_; ++i) { primitive_attr_t r_attr; - r_attr.scales_.set(DNNL_ARG_SRC, 0); + CHECK(r_attr.scales_.set(DNNL_ARG_SRC, 0)); if (i != 0) r_attr.post_ops_.append_sum(1.0); CHECK(reorder_primitive_desc_create(reorder_pds_[i], engine, src_md(i), dst_acc_md(), &r_attr)); diff --git a/src/cpu/reorder/cpu_reorder_pd.hpp b/src/cpu/reorder/cpu_reorder_pd.hpp index ca69992b0fe..1fbac15ee32 100644 --- a/src/cpu/reorder/cpu_reorder_pd.hpp +++ b/src/cpu/reorder/cpu_reorder_pd.hpp @@ -85,15 +85,15 @@ struct cpu_reorder_pd_t : public reorder_pd_t { const float *dst_scales) const { using namespace dnnl::impl::memory_tracking::names; - int mask = -1; - bool is_set = false; - auto status = attr->scales_.get(DNNL_ARG_DST, &mask, &is_set); - if (status != status::success) return nullptr; + if (attr->scales_.has_default_values(DNNL_ARG_DST)) { + return dst_scales; + } // It's possible that mask > 0 but `count` is still `1`. This case is // covered by `DEFINE_ARG_SCALES_BUFFER` macro and no need to inverse // in such case. - if (is_set && mask > 0 && count > 1) { + int mask = attr->scales_.get_mask(DNNL_ARG_DST); + if (mask > 0 && count > 1) { auto loc_scales = scratchpad.template get( key_reorder_precomputed_dst_scales); if (!loc_scales) return nullptr; diff --git a/src/cpu/reorder/simple_reorder.hpp b/src/cpu/reorder/simple_reorder.hpp index 00b8654ae62..73d8dca2c01 100644 --- a/src/cpu/reorder/simple_reorder.hpp +++ b/src/cpu/reorder/simple_reorder.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2016-2024 Intel Corporation +* Copyright 2016-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -133,12 +133,12 @@ inline status_t get_scales_mask( return status::invalid_arguments; *src_mask = 0; - if (!s.get(DNNL_ARG_SRC).has_default_values()) - *src_mask = s.get(DNNL_ARG_SRC).mask_; + if (!s.has_default_values(DNNL_ARG_SRC)) + *src_mask = s.get_mask(DNNL_ARG_SRC); *dst_mask = 0; - if (!s.get(DNNL_ARG_DST).has_default_values()) - *dst_mask = s.get(DNNL_ARG_DST).mask_; + if (!s.has_default_values(DNNL_ARG_DST)) + *dst_mask = s.get_mask(DNNL_ARG_DST); // This is used in a check function. if (*src_mask > 0 && *dst_mask > 0 && *dst_mask != *src_mask) @@ -152,11 +152,10 @@ inline bool simple_attr_check(const primitive_attr_t *attr, if (sum_support) skip_mask = skip_mask | smask_t::post_ops; if (!attr->has_default_values(skip_mask)) return false; for (int arg : {DNNL_ARG_SRC, DNNL_ARG_DST}) { - const auto &sc = attr->scales_.get(arg); // Data type for scales is not generally supported. - if (!sc.has_default_data_type()) return false; + if (!attr->scales_.has_default_data_type(arg)) return false; // Groups are generally not supported. - if (!sc.has_default_groups()) return false; + if (!attr->scales_.get(arg).has_default_groups()) return false; } if (many_scales_support) return true; int src_mask, dst_mask; @@ -2263,8 +2262,7 @@ struct simple_reorder_implhas_default_values(skip_mask), VERBOSE_UNSUPPORTED_ATTR); - VDISPATCH_REORDER_IC( - attr->scales_.get(DNNL_ARG_DST).has_default_values(), + VDISPATCH_REORDER_IC(attr->scales_.has_default_values(DNNL_ARG_DST), VERBOSE_UNSUPPORTED_SCALES_CFG); return status::success; } @@ -2293,8 +2291,8 @@ struct simple_reorder_implattr()->scales_.get(DNNL_ARG_SRC); - const bool has_src_scales = !sc_src.has_default_values(); + const auto &scales = pd->attr()->scales_; + const bool has_src_scales = !scales.has_default_values(DNNL_ARG_SRC); const auto &zps = pd->attr()->zero_points_; const bool has_src_zps = !zps.has_default_values(DNNL_ARG_SRC); @@ -2331,11 +2329,9 @@ struct simple_reorder_impl 0 ? sc_src.group_dims_[0] : 1; + const auto src_scales_group0 = scales.get_group(DNNL_ARG_SRC, 0); // Applied to the last dimension. - const auto src_scales_group1 - = sc_src.ndims_ > 0 ? sc_src.group_dims_[1] : 1; + const auto src_scales_group1 = scales.get_group(DNNL_ARG_SRC, 1); memory_desc_t src_scales_md {}; if (has_src_scales) { @@ -2537,11 +2533,11 @@ struct simple_reorder_implhas_default_values(skip_mask), VERBOSE_UNSUPPORTED_ATTR); VDISPATCH_REORDER_IC(simple_po_check(attr), VERBOSE_UNSUPPORTED_POSTOP); - const auto &sc_dst = attr->scales_.get(DNNL_ARG_DST); - const bool has_dst_scales = !sc_dst.has_default_values(); + const auto &scales = attr->scales_; + const bool has_dst_scales = !scales.has_default_values(DNNL_ARG_DST); if (has_dst_scales) { - VDISPATCH_REORDER_IC(sc_dst.has_default_data_type() - && sc_dst.has_default_groups(), + VDISPATCH_REORDER_IC(scales.has_default_data_type(DNNL_ARG_DST) + && scales.has_default_groups(DNNL_ARG_DST), VERBOSE_UNSUPPORTED_SCALES_CFG); } VDISPATCH_REORDER_IC( @@ -2565,14 +2561,12 @@ struct simple_reorder_implattr()->scales_.get(DNNL_ARG_SRC); - const bool has_src_scales = !sc_src.has_default_values(); + const auto &scales = pd->attr()->scales_; + const bool has_src_scales = !scales.has_default_values(DNNL_ARG_SRC); // Applied to the pre-last dimension. - const auto src_scales_group0 - = sc_src.ndims_ > 0 ? sc_src.group_dims_[0] : 1; + const auto src_scales_group0 = scales.get_group(DNNL_ARG_SRC, 0); // Applied to the last dimension. - const auto src_scales_group1 - = sc_src.ndims_ > 0 ? sc_src.group_dims_[1] : 1; + const auto src_scales_group1 = scales.get_group(DNNL_ARG_SRC, 1); memory_desc_t src_scales_md {}; if (has_src_scales) { get_quant_md(src_scales_md, ndims, input_d.dims(), src_scales_mask, @@ -2580,8 +2574,7 @@ struct simple_reorder_implattr()->scales_.get(DNNL_ARG_DST); - const bool has_dst_scales = !sc_dst.has_default_values(); + const bool has_dst_scales = !scales.has_default_values(DNNL_ARG_DST); memory_desc_t dst_scales_md {}; if (has_dst_scales) { get_quant_md(dst_scales_md, ndims, input_d.dims(), dst_scales_mask, @@ -2690,12 +2683,14 @@ struct simple_reorder_t : public primitive_t { spec>::is_applicable(src_md, dst_md, attr); if (status != status::success) return status; - int mask = -1; - bool is_set = false; - CHECK(attr->scales_.get(DNNL_ARG_DST, &mask, &is_set)); const memory_desc_wrapper input_d(src_md); - if (input_d.has_runtime_dims_or_strides() && is_set && mask > 0) - return status::unimplemented; + + int mask = -1; + if (!attr->scales_.has_default_values(DNNL_ARG_DST)) { + mask = attr->scales_.get_mask(DNNL_ARG_DST); + if (input_d.has_runtime_dims_or_strides() && mask > 0) + return status::unimplemented; + } auto _pd = make_unique_pd(attr, src_engine->kind(), src_md, dst_engine->kind(), dst_md); @@ -2709,7 +2704,7 @@ struct simple_reorder_t : public primitive_t { scratchpad.book(memory_tracking::names::key_reorder_space, scratchpad_sz_, 1, 16); - if (is_set && mask > 0) { + if (mask > 0) { dim_t D_mask; _pd->get_D_values(input_d, mask, nullptr, &D_mask, nullptr); scratchpad.template book( diff --git a/src/cpu/scale_utils.cpp b/src/cpu/scale_utils.cpp index c6d92a33e2f..8db6d5fb125 100644 --- a/src/cpu/scale_utils.cpp +++ b/src/cpu/scale_utils.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2022-2024 Intel Corporation +* Copyright 2022-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,21 +32,18 @@ constexpr size_t scales_simd_w = 16; } void book_precomputed_scales(memory_tracking::registrar_t &scratchpad, - const arg_scales_t &attr_scales, size_t wei_scale_count, + const scales_t &attr_scales, size_t wei_scale_count, bool force_scales_book) { using namespace dnnl::impl::memory_tracking::names; - const bool with_src_scales - = !attr_scales.get(DNNL_ARG_SRC).has_default_values(); + const bool with_src_scales = !attr_scales.has_default_values(DNNL_ARG_SRC); const bool with_wei_scales - = !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_values(); - const auto wei_scales_dt = attr_scales.get(DNNL_ARG_WEIGHTS).data_type_; - const auto wei_scale_groups_ndims - = attr_scales.get(DNNL_ARG_WEIGHTS).ndims_; + = !attr_scales.has_default_values(DNNL_ARG_WEIGHTS); + if ((with_src_scales && with_wei_scales) || force_scales_book - || (wei_scales_dt != data_type::f32 && with_wei_scales) - || (wei_scale_groups_ndims > 0 && with_wei_scales)) { - const int wei_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_; + || !attr_scales.has_default_data_type(DNNL_ARG_WEIGHTS) + || !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_groups()) { + const int wei_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS); const size_t precomputed_scales_size = wei_mask == 0 ? scales_simd_w : nstl::max( @@ -60,27 +57,26 @@ void book_precomputed_scales(memory_tracking::registrar_t &scratchpad, bool req_copy_scales( const primitive_attr_t *attr, const float scale_adjust_factor) { const auto &attr_scales = attr->scales_; - const bool with_src_scales - = !attr_scales.get(DNNL_ARG_SRC).has_default_values(); + const bool with_src_scales = !attr_scales.has_default_values(DNNL_ARG_SRC); const bool with_wei_scales - = !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_values(); - const auto wei_scales_dt = attr_scales.get(DNNL_ARG_WEIGHTS).data_type_; - const auto wei_scale_groups_ndims - = attr_scales.get(DNNL_ARG_WEIGHTS).ndims_; + = !attr_scales.has_default_values(DNNL_ARG_WEIGHTS); return (with_src_scales && with_wei_scales) || scale_adjust_factor != 1.0f - || (wei_scales_dt != data_type::f32 && with_wei_scales) - || (wei_scale_groups_ndims > 0 && with_wei_scales); + || !attr_scales.has_default_data_type(DNNL_ARG_WEIGHTS) + || !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_groups(); } const float *precompute_scales(const memory_tracking::grantor_t &scratchpad, const float *src_scales, const float *wei_scales, dim_t oc, const primitive_attr_t *attr, float scale_adjust_factor) { - // Note: per-ic-channel is no supported in default - const int wei_scale_mask = attr->scales_.get(DNNL_ARG_WEIGHTS).mask_; + // Note: per-ic-channel is no supported by default. + const int wei_scale_mask = attr->scales_.get_mask(DNNL_ARG_WEIGHTS); return precompute_scales(scratchpad, src_scales, wei_scales, 1, oc, false, - wei_scale_mask != 0, attr, scale_adjust_factor, false); + wei_scale_mask > 0, attr, scale_adjust_factor, false); } +// Note: `wei_scale_per_ic` and `wei_scale_per_oc` could be identified in this +// function unless different primitives have same definition of `per_ic` and +// `per_oc` masks. Mostly, matmul is different from anybody else. const float *precompute_scales(const memory_tracking::grantor_t &scratchpad, const float *src_scales, const float *wei_scales, dim_t IC, dim_t OC, const bool wei_scale_per_ic, const bool wei_scale_per_oc, @@ -89,14 +85,15 @@ const float *precompute_scales(const memory_tracking::grantor_t &scratchpad, using namespace dnnl::impl::memory_tracking::names; const auto &attr_scales = attr->scales_; - const bool with_src_scales - = !attr_scales.get(DNNL_ARG_SRC).has_default_values(); + const bool with_src_scales = !attr_scales.has_default_values(DNNL_ARG_SRC); const auto wei_scale_count = (wei_scale_per_ic ? IC : 1) * (wei_scale_per_oc ? OC : 1); const float *scales = nullptr; if (req_copy_scales(attr, scale_adjust_factor)) { - const int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS); + assert(wei_scale_mask >= 0); + size_t size = 0; auto loc_scales = scratchpad.template get(key_precomputed_scales, &size); @@ -108,12 +105,9 @@ const float *precompute_scales(const memory_tracking::grantor_t &scratchpad, const dim_t count = nstl::min( static_cast(size / sizeof(float)), wei_scale_count); const auto wei_scale_dt - = attr_scales.get(DNNL_ARG_WEIGHTS).data_type_; - const auto wei_scale_groups_ndims - = attr_scales.get(DNNL_ARG_WEIGHTS).ndims_; - const auto wei_scale_groups_ic = wei_scale_groups_ndims > 0 - ? attr_scales.get(DNNL_ARG_WEIGHTS).group_dims_[0] - : 1; + = attr_scales.get_data_type(DNNL_ARG_WEIGHTS); + const auto wei_scale_groups_ic + = attr_scales.get_group(DNNL_ARG_WEIGHTS, 0); // Note: per-ic-channel scales is only supported for // weights decompression for now if ((wei_scale_per_ic && wei_scale_groups_ic > 1) diff --git a/src/cpu/scale_utils.hpp b/src/cpu/scale_utils.hpp index 7c1ce535889..48164b776d4 100644 --- a/src/cpu/scale_utils.hpp +++ b/src/cpu/scale_utils.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2022-2024 Intel Corporation +* Copyright 2022-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ namespace impl { namespace cpu { void book_precomputed_scales(memory_tracking::registrar_t &scratchpad, - const arg_scales_t &attr_scales, size_t wei_scales_count, + const scales_t &attr_scales, size_t wei_scales_count, bool force_scales_book = false); bool req_copy_scales( diff --git a/src/cpu/x64/brgemm/brgemm.cpp b/src/cpu/x64/brgemm/brgemm.cpp index 38e2975fd02..11b3bc8a78e 100644 --- a/src/cpu/x64/brgemm/brgemm.cpp +++ b/src/cpu/x64/brgemm/brgemm.cpp @@ -435,23 +435,26 @@ status_t brgemm_desc_set_postops(brgemm_desc_t *brg, if (brg->with_scales) { // Note. the current version supports only two different output scale // types: - // 1) common (mask_ = 0) + // 1) common (mask = 0) // 2) per_n_dim_scale - broadcast across n dimension; // for convolution and inner product promitives it corresponds - // to "per_oc" mask_ = 1 << 1; for matmul - to - // mask_ = (1 << (ndims - 1))), where ndims is number of + // to "per_oc" mask = 1 << 1; for matmul - to + // mask = (1 << (ndims - 1))), where ndims is number of // dimensions for original matmul problem - // So if wei_scales.mask_ != 0 (not common) it's assumed here that scale - // type is per_n_dim_scale and driver which calls brgemm kernel checked - // that mask has correct value for this case - brg->is_oc_scale = wei_scales.mask_ != 0; + // So if wei_scales.get_mask() > 0 (not common) it's assumed here that + // scale type is per_n_dim_scale and driver which calls brgemm kernel + // checked that mask has correct value for this case + brg->is_oc_scale = wei_scales.get_mask() > 0; } const auto &dst_scales = attr->scales_.get(DNNL_ARG_DST); brg->with_dst_scales = !dst_scales.has_default_values(); - const bool scales_ok = src_scales.mask_ == 0 && dst_scales.mask_ == 0 - && attr->scales_.has_default_values( - {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}); + const bool scales_ok = attr->scales_.has_default_values({DNNL_ARG_SRC, + DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) + && IMPLICATION(!src_scales.has_default_values(), + src_scales.get_mask() == 0) + && IMPLICATION(!dst_scales.has_default_values(), + dst_scales.get_mask() == 0); if (!scales_ok) return status::unimplemented; auto init_zp_type diff --git a/src/cpu/x64/brgemm/capi/brgemm_api.cpp b/src/cpu/x64/brgemm/capi/brgemm_api.cpp index 3dae3e3409c..b70822995b7 100644 --- a/src/cpu/x64/brgemm/capi/brgemm_api.cpp +++ b/src/cpu/x64/brgemm/capi/brgemm_api.cpp @@ -259,10 +259,9 @@ status_t brgemm_t::execute(const void *A_ptr, const void *B_ptr, // TODO: delegate extra memory to scratchpad? std::vector wei_scales_v(N_); - const bool has_src_scales - = !attr_.scales_.get(DNNL_ARG_SRC).has_default_values(); + const bool has_src_scales = !attr_.scales_.has_default_values(DNNL_ARG_SRC); const bool has_wei_scales - = !attr_.scales_.get(DNNL_ARG_WEIGHTS).has_default_values(); + = !attr_.scales_.has_default_values(DNNL_ARG_WEIGHTS); // Save src scale value to re-use it. float src_scale_val = 1.f; @@ -279,7 +278,7 @@ status_t brgemm_t::execute(const void *A_ptr, const void *B_ptr, const void *wei_scales_ptr = attr_params->get_scales(DNNL_ARG_WEIGHTS); if (wei_scales_ptr == nullptr) return status::invalid_arguments; - int wei_mask = attr_.scales_.get(DNNL_ARG_WEIGHTS).mask_; + int wei_mask = attr_.scales_.get_mask(DNNL_ARG_WEIGHTS); if (wei_mask > 0) { for (dim_t i = 0; i < N_; i++) { const float wei_scale_val = cpu::io::load_float_value( @@ -300,7 +299,7 @@ status_t brgemm_t::execute(const void *A_ptr, const void *B_ptr, // Destination scales. Require manual extending to full simd broadcast. alignas(64) float dst_scales_buf[16] = {0}; - if (!attr_.scales_.get(DNNL_ARG_DST).has_default_values()) { + if (!attr_.scales_.has_default_values(DNNL_ARG_DST)) { const void *dst_scales_ptr = attr_params->get_scales(DNNL_ARG_DST); if (dst_scales_ptr == nullptr) return status::invalid_arguments; diff --git a/src/cpu/x64/jit_avx512_core_amx_1x1_conv_kernel.cpp b/src/cpu/x64/jit_avx512_core_amx_1x1_conv_kernel.cpp index e57e4b9ed76..18643219cd4 100644 --- a/src/cpu/x64/jit_avx512_core_amx_1x1_conv_kernel.cpp +++ b/src/cpu/x64/jit_avx512_core_amx_1x1_conv_kernel.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1217,9 +1217,8 @@ status_t jit_avx512_core_amx_1x1_fwd_kernel_t::init_conf(jit_conv_conf_t &jcp, = (avaliable_ops) ? ops_tile_store / avaliable_ops + 1 : 0; if (jcp.per_one_pstore > 12) jcp.per_one_pstore = 0; - const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS); const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST); - jcp.is_oc_scale = wei_scales.mask_ != 0; + jcp.is_oc_scale = attr.scales_.get_mask(DNNL_ARG_WEIGHTS) > 0; jcp.dst_scale = !dst_scales.has_default_values(); return status::success; diff --git a/src/cpu/x64/jit_avx512_core_amx_1x1_convolution.cpp b/src/cpu/x64/jit_avx512_core_amx_1x1_convolution.cpp index 5a61a9fa38f..33c84aeda26 100644 --- a/src/cpu/x64/jit_avx512_core_amx_1x1_convolution.cpp +++ b/src/cpu/x64/jit_avx512_core_amx_1x1_convolution.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -73,11 +73,10 @@ status_t jit_avx512_core_amx_1x1_convolution_fwd_t::execute_forward( DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS); DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const float *oscales = scale_utils::precompute_scales( ctx.get_scratchpad_grantor(), src_scales, wei_scales, pd()->IC(), - pd()->OC(), false, wei_scale_mask != 0, pd()->attr(), + pd()->OC(), false, wei_scale_mask > 0, pd()->attr(), jit_scale_precompute_.get()); DEFINE_ZERO_POINTS_BUFFER(src_zero_point, DNNL_ARG_SRC); diff --git a/src/cpu/x64/jit_avx512_core_amx_1x1_convolution.hpp b/src/cpu/x64/jit_avx512_core_amx_1x1_convolution.hpp index eae42216786..45785a4d12a 100644 --- a/src/cpu/x64/jit_avx512_core_amx_1x1_convolution.hpp +++ b/src/cpu/x64/jit_avx512_core_amx_1x1_convolution.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -118,8 +118,8 @@ struct jit_avx512_core_amx_1x1_convolution_fwd_t : public primitive_t { const auto attr = pd()->attr(); if (is_jit_supported && pd()->OC() > 1 && req_copy_scales(attr)) { const auto &attr_scales = attr->scales_; - int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_; - if (wei_scale_mask != 0) { + int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS); + if (wei_scale_mask > 0) { CHECK(safe_ptr_assign(jit_scale_precompute_, new jit_avx512_core_scale_precompute_t(attr))); CHECK(jit_scale_precompute_->create_kernel()); diff --git a/src/cpu/x64/jit_avx512_core_amx_conv_kernel.cpp b/src/cpu/x64/jit_avx512_core_amx_conv_kernel.cpp index 1e1e532de69..1af812d0e5d 100644 --- a/src/cpu/x64/jit_avx512_core_amx_conv_kernel.cpp +++ b/src/cpu/x64/jit_avx512_core_amx_conv_kernel.cpp @@ -2655,7 +2655,7 @@ status_t jit_avx512_core_amx_fwd_kernel_t::init_conf(jit_conv_conf_t &jcp, const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS); const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST); - jcp.is_oc_scale = wei_scales.mask_ != 0; + jcp.is_oc_scale = wei_scales.get_mask() > 0; jcp.dst_scale = !dst_scales.has_default_values(); // Note: currently unsupported, results in seg-fault @@ -3969,7 +3969,7 @@ status_t jit_avx512_core_amx_bwd_data_kernel_t::init_conf(jit_conv_conf_t &jcp, const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS); const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST); - jcp.is_ic_scale = wei_scales.mask_ != 0; + jcp.is_ic_scale = wei_scales.get_mask() > 0; jcp.dst_scale = !dst_scales.has_default_values(); return status::success; diff --git a/src/cpu/x64/jit_avx512_core_amx_convolution.cpp b/src/cpu/x64/jit_avx512_core_amx_convolution.cpp index 2377ca8bf47..55ad96a0245 100644 --- a/src/cpu/x64/jit_avx512_core_amx_convolution.cpp +++ b/src/cpu/x64/jit_avx512_core_amx_convolution.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -99,11 +99,10 @@ jit_avx512_core_amx_convolution_fwd_t::execute_forward_reduced_lowering( DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS); DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const float *oscales = scale_utils::precompute_scales( ctx.get_scratchpad_grantor(), src_scales, wei_scales, pd()->IC(), - pd()->OC(), false, wei_scale_mask != 0, pd()->attr(), + pd()->OC(), false, wei_scale_mask > 0, pd()->attr(), jit_scale_precompute_.get()); auto inp_p_buffer = ctx.get_scratchpad_grantor().template get( @@ -457,11 +456,10 @@ status_t jit_avx512_core_amx_convolution_fwd_t::execute_forward( DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS); DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const float *oscales = scale_utils::precompute_scales( ctx.get_scratchpad_grantor(), src_scales, wei_scales, pd()->IC(), - pd()->OC(), false, wei_scale_mask != 0, pd()->attr(), + pd()->OC(), false, wei_scale_mask > 0, pd()->attr(), jit_scale_precompute_.get()); // TODO: use block offset instead of hand-calculated one @@ -831,11 +829,10 @@ status_t jit_avx512_core_amx_convolution_bwd_data_t::execute_backward( DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS); DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const float *oscales = scale_utils::precompute_scales( ctx.get_scratchpad_grantor(), src_scales, wei_scales, pd()->IC(), - pd()->OC(), false, wei_scale_mask != 0, pd()->attr(), + pd()->OC(), false, wei_scale_mask > 0, pd()->attr(), jit_scale_precompute_.get()); amx_utils::execute_backward_convolution_body(ctx, pd()->jcp_, kernel_, diff --git a/src/cpu/x64/jit_avx512_core_amx_convolution.hpp b/src/cpu/x64/jit_avx512_core_amx_convolution.hpp index ccd9a4d0a73..c48820e7d52 100644 --- a/src/cpu/x64/jit_avx512_core_amx_convolution.hpp +++ b/src/cpu/x64/jit_avx512_core_amx_convolution.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -120,8 +120,8 @@ struct jit_avx512_core_amx_convolution_fwd_t : public primitive_t { const auto attr = pd()->attr(); if (is_jit_supported && pd()->OC() > 1 && req_copy_scales(attr)) { const auto &attr_scales = attr->scales_; - int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_; - if (wei_scale_mask != 0) { + int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS); + if (wei_scale_mask > 0) { CHECK(safe_ptr_assign(jit_scale_precompute_, new jit_avx512_core_scale_precompute_t(attr))); CHECK(jit_scale_precompute_->create_kernel()); @@ -203,8 +203,8 @@ struct jit_avx512_core_amx_convolution_bwd_data_t : public primitive_t { const auto attr = pd()->attr(); if (is_jit_supported && pd()->OC() > 1 && req_copy_scales(attr)) { const auto &attr_scales = attr->scales_; - int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_; - if (wei_scale_mask != 0) { + int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS); + if (wei_scale_mask > 0) { CHECK(safe_ptr_assign(jit_scale_precompute_, new jit_avx512_core_scale_precompute_t(attr))); CHECK(jit_scale_precompute_->create_kernel()); diff --git a/src/cpu/x64/jit_avx512_core_amx_deconvolution.cpp b/src/cpu/x64/jit_avx512_core_amx_deconvolution.cpp index de930820f80..cabd338661d 100644 --- a/src/cpu/x64/jit_avx512_core_amx_deconvolution.cpp +++ b/src/cpu/x64/jit_avx512_core_amx_deconvolution.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2021-2024 Intel Corporation +* Copyright 2021-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -78,11 +78,10 @@ status_t jit_avx512_core_amx_deconvolution_fwd_t::execute_forward( DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS); DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const float *oscales = precompute_scales(ctx.get_scratchpad_grantor(), src_scales, wei_scales, src_d.dims()[1], dst_d.dims()[1], false, - wei_scale_mask != 0, pd()->attr()); + wei_scale_mask > 0, pd()->attr()); // The body of bwd/d convolution harness is called with: // 1. src as input instead of diff_dst diff --git a/src/cpu/x64/jit_avx512_core_scale_precompute.cpp b/src/cpu/x64/jit_avx512_core_scale_precompute.cpp index 1f98294a715..9abae9b9e1f 100644 --- a/src/cpu/x64/jit_avx512_core_scale_precompute.cpp +++ b/src/cpu/x64/jit_avx512_core_scale_precompute.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2024 Intel Corporation +* Copyright 2024-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,7 +44,7 @@ const float *precompute_scales(const memory_tracking::grantor_t &scratchpad, ; if (jit_scale_precompute) { const auto &attr_scales = attr->scales_; - const int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS); size_t size = 0; auto loc_scales = scratchpad.template get( memory_tracking::names::key_precomputed_scales, &size); @@ -53,10 +53,10 @@ const float *precompute_scales(const memory_tracking::grantor_t &scratchpad, const auto wei_scale_stride_ic = wei_scale_per_ic ? wei_scale_per_oc ? OC : 1 : 0; const auto with_wei_scale - = !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_values(); - const auto wei_scale_groups_ndims - = with_wei_scale ? attr_scales.get(DNNL_ARG_WEIGHTS).ndims_ : 0; - const auto wei_scale_group_stride = wei_scale_groups_ndims > 0 + = !attr_scales.has_default_values(DNNL_ARG_WEIGHTS); + const auto wei_scale_has_groups = with_wei_scale + && !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_groups(); + const auto wei_scale_group_stride = wei_scale_has_groups ? wei_scale_stride_ic * sizeof(float) : 0; @@ -66,14 +66,14 @@ const float *precompute_scales(const memory_tracking::grantor_t &scratchpad, assert(req_copy_scales(attr, scale_adjust_factor)); assert(mayiuse(avx512_core)); - assert(wei_scale_mask != 0); - if (wei_scale_groups_ndims > 0) { + assert(wei_scale_mask > 0); + if (wei_scale_has_groups) { assert(count == wei_scale_count); const auto wei_scale_groups_ic - = attr_scales.get(DNNL_ARG_WEIGHTS).group_dims_[0]; + = attr_scales.get_group(DNNL_ARG_WEIGHTS, 0); const dim_t wei_scale_nb_ic = IC / wei_scale_groups_ic; const auto wei_scale_dt_sz = types::data_type_size( - attr_scales.get(DNNL_ARG_WEIGHTS).data_type_); + attr_scales.get_data_type(DNNL_ARG_WEIGHTS)); for (int nb_ic = 0; nb_ic < wei_scale_nb_ic; nb_ic++) { const auto offset = nb_ic * wei_scale_stride_ic; jrp.nelems_ = wei_scale_stride_ic; diff --git a/src/cpu/x64/jit_avx512_core_scale_precompute.hpp b/src/cpu/x64/jit_avx512_core_scale_precompute.hpp index 432a9aee05c..a93fceeca65 100644 --- a/src/cpu/x64/jit_avx512_core_scale_precompute.hpp +++ b/src/cpu/x64/jit_avx512_core_scale_precompute.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2024 Intel Corporation +* Copyright 2024-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -70,17 +70,12 @@ struct jit_avx512_core_scale_precompute_t : public jit_generator { const primitive_attr_t *attr, const float scale_adjust_factor = 1) : jit_generator(jit_name()) , attr_(attr) - , with_wei_scales_( - !attr_->scales_.get(DNNL_ARG_WEIGHTS).has_default_values()) + , with_wei_scales_(!attr_->scales_.has_default_values(DNNL_ARG_WEIGHTS)) , wei_scales_dt_(with_wei_scales_ - ? attr_->scales_.get(DNNL_ARG_WEIGHTS).data_type_ + ? attr_->scales_.get_data_type(DNNL_ARG_WEIGHTS) : data_type::f32) , wei_scales_dsz_(types::data_type_size(wei_scales_dt_)) - , wei_groups_ic_(with_wei_scales_ - && attr_->scales_.get(DNNL_ARG_WEIGHTS).ndims_ - > 0 - ? attr_->scales_.get(DNNL_ARG_WEIGHTS).group_dims_[0] - : 1) + , wei_groups_ic_(attr_->scales_.get_group(DNNL_ARG_WEIGHTS, 0)) , scale_adjust_factor_(scale_adjust_factor) , compute_scale_factor_(scale_adjust_factor_ != 1) {} diff --git a/src/cpu/x64/jit_avx512_core_x8s8s32x_1x1_conv_kernel.cpp b/src/cpu/x64/jit_avx512_core_x8s8s32x_1x1_conv_kernel.cpp index b8e581de704..a5387dbefad 100644 --- a/src/cpu/x64/jit_avx512_core_x8s8s32x_1x1_conv_kernel.cpp +++ b/src/cpu/x64/jit_avx512_core_x8s8s32x_1x1_conv_kernel.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2018-2024 Intel Corporation +* Copyright 2018-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1206,7 +1206,7 @@ status_t jit_avx512_core_x8s8s32x_1x1_conv_kernel::init_conf( const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS); const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST); - jcp.is_oc_scale = wei_scales.mask_ != 0; + jcp.is_oc_scale = wei_scales.get_mask() > 0; jcp.dst_scale = !dst_scales.has_default_values(); jcp.wei_adj_scale @@ -1222,7 +1222,7 @@ void jit_avx512_core_x8s8s32x_1x1_conv_kernel::init_scratchpad( const jit_1x1_conv_conf_t &jcp, const primitive_attr_t &attr) { using namespace dnnl::impl::memory_tracking::names; - const int wei_mask = attr.scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_mask = attr.scales_.get_mask(DNNL_ARG_WEIGHTS); const dim_t scales_count = wei_mask == 0 ? 1 : static_cast(jcp.oc) * jcp.ngroups; const dim_t count = nstl::max(scales_count, (dim_t)jcp.ic_block); diff --git a/src/cpu/x64/jit_avx512_core_x8s8s32x_1x1_convolution.cpp b/src/cpu/x64/jit_avx512_core_x8s8s32x_1x1_convolution.cpp index 538a89cf5ee..c1b0c07dfe3 100644 --- a/src/cpu/x64/jit_avx512_core_x8s8s32x_1x1_convolution.cpp +++ b/src/cpu/x64/jit_avx512_core_x8s8s32x_1x1_convolution.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2018-2023 Intel Corporation +* Copyright 2018-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -70,7 +70,7 @@ status_t jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t::execute_forward( = scratchpad.template get(key_conv_adjusted_scales); // Src scale is always a single value float src_scale = src_scales[0]; - int wei_mask = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + int wei_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); float factor = (pd()->jcp_.signed_input && (!pd()->jcp_.has_vnni)) ? 1.f / pd()->jcp_.wei_adj_scale : 1.f; @@ -92,7 +92,7 @@ status_t jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t::execute_forward( auto dw_local_scales = dw_scratchpad.template get(key_conv_adjusted_scales); auto attr_dw = pd()->dw_conv_pd_->attr(); - int wei_mask = attr_dw->scales_.get(DNNL_ARG_WEIGHTS).mask_; + int wei_mask = attr_dw->scales_.get_mask(DNNL_ARG_WEIGHTS); dim_t count = wei_mask == 0 ? 1 : pd()->dw_conv_pd_->OC(); float factor = 1.f / jcp_dw->wei_adj_scale; if (count == 1) { diff --git a/src/cpu/x64/jit_avx512_core_x8s8s32x_conv_kernel.cpp b/src/cpu/x64/jit_avx512_core_x8s8s32x_conv_kernel.cpp index a808e692752..f933cbf2071 100644 --- a/src/cpu/x64/jit_avx512_core_x8s8s32x_conv_kernel.cpp +++ b/src/cpu/x64/jit_avx512_core_x8s8s32x_conv_kernel.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2016-2024 Intel Corporation +* Copyright 2016-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1486,7 +1486,7 @@ status_t jit_avx512_core_x8s8s32x_fwd_kernel::init_conf(jit_conv_conf_t &jcp, const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS); const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST); - jcp.is_oc_scale = wei_scales.mask_ != 0; + jcp.is_oc_scale = wei_scales.get_mask() > 0; jcp.dst_scale = !dst_scales.has_default_values(); jcp.has_vnni = mayiuse(avx512_core_vnni); @@ -1765,7 +1765,7 @@ status_t jit_avx512_core_x8s8s32x_fwd_kernel::init_conf(jit_conv_conf_t &jcp, void jit_avx512_core_x8s8s32x_fwd_kernel::init_scratchpad( memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp, const primitive_attr_t &attr) { - const int wei_mask = attr.scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_mask = attr.scales_.get_mask(DNNL_ARG_WEIGHTS); const dim_t scales_count = wei_mask == 0 ? 1 : jcp.oc * jcp.ngroups; dim_t count = wei_mask == 0 ? (dim_t)16 : scales_count; scratchpad.book(key_conv_adjusted_scales, count); diff --git a/src/cpu/x64/jit_avx512_core_x8s8s32x_convolution.cpp b/src/cpu/x64/jit_avx512_core_x8s8s32x_convolution.cpp index 1242fd8e3f8..50d6915d2b1 100644 --- a/src/cpu/x64/jit_avx512_core_x8s8s32x_convolution.cpp +++ b/src/cpu/x64/jit_avx512_core_x8s8s32x_convolution.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2016-2023 Intel Corporation +* Copyright 2016-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,7 @@ const float *jit_avx512_core_x8s8s32x_convolution_fwd_t::adjust_oscales( const float *wei_scales) const { auto loc_scales = scratchpad.template get(key_conv_adjusted_scales); const float src_scale = src_scales[0]; - const int wei_mask = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); float factor = (pd()->jcp_.signed_input && (!pd()->jcp_.has_vnni)) ? 1.f / pd()->jcp_.wei_adj_scale : 1.f; diff --git a/src/cpu/x64/jit_avx512_core_x8s8s32x_deconvolution.cpp b/src/cpu/x64/jit_avx512_core_x8s8s32x_deconvolution.cpp index c02bc84bdfe..b082a3f68e2 100644 --- a/src/cpu/x64/jit_avx512_core_x8s8s32x_deconvolution.cpp +++ b/src/cpu/x64/jit_avx512_core_x8s8s32x_deconvolution.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2018-2024 Intel Corporation +* Copyright 2018-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -294,7 +294,7 @@ status_t _jit_avx512_core_x8s8s32x_deconv_fwd_kernel::init_conf( const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS); const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST); - jcp.is_oc_scale = wei_scales.mask_ != 0; + jcp.is_oc_scale = wei_scales.get_mask() > 0; jcp.dst_scale = !dst_scales.has_default_values(); jcp.dst_dt = dst_d.data_type(); @@ -386,7 +386,7 @@ bool _jit_avx512_core_x8s8s32x_deconv_fwd_kernel::post_ops_ok( void _jit_avx512_core_x8s8s32x_deconv_fwd_kernel::init_scratchpad( memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp, const primitive_attr_t &attr) { - const int mask = attr.scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int mask = attr.scales_.get_mask(DNNL_ARG_WEIGHTS); const dim_t scales_count = mask == 0 ? 1 : static_cast(jcp.oc) * jcp.ngroups; const dim_t count = nstl::max(scales_count, 16); @@ -1393,7 +1393,7 @@ const float *jit_avx512_core_x8s8s32x_deconvolution_fwd_t::adjust_oscales( const memory_tracking::grantor_t &scratchpad, const float *src_scales, const float *wei_scales) const { auto loc_scales = scratchpad.template get(key_conv_adjusted_scales); - int wei_mask = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + int wei_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); float factor = (pd()->jcp_.signed_input && (!pd()->jcp_.has_vnni)) ? 1.f / pd()->jcp_.wei_adj_scale : 1.0f; diff --git a/src/cpu/x64/jit_brdgmm_dw_conv.cpp b/src/cpu/x64/jit_brdgmm_dw_conv.cpp index 481fcf930cd..ab12b82bccf 100644 --- a/src/cpu/x64/jit_brdgmm_dw_conv.cpp +++ b/src/cpu/x64/jit_brdgmm_dw_conv.cpp @@ -240,7 +240,7 @@ status_t brdgmm_dw_convolution_fwd_t::pd_t::init(engine_t *engine) { const auto &wei_scales = attr_.scales_.get(DNNL_ARG_WEIGHTS); jcp.with_scale = !src_scales.has_default_values() || !wei_scales.has_default_values(); - jcp.is_oc_scale = wei_scales.mask_ != 0; + jcp.is_oc_scale = wei_scales.get_mask() > 0; const bool scales_ok = attr_scales_ok({DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}); @@ -556,8 +556,8 @@ status_t brdgmm_dw_convolution_fwd_t::init(engine_t *engine) { const auto attr = pd()->attr(); if (is_jit_supported && pd()->OC() > 1 && req_copy_scales(attr)) { const auto &attr_scales = attr->scales_; - int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_; - if (wei_scale_mask != 0) { + int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS); + if (wei_scale_mask > 0) { CHECK(safe_ptr_assign(jit_scale_precompute_, new jit_avx512_core_scale_precompute_t(attr))); CHECK(jit_scale_precompute_->create_kernel()); @@ -587,11 +587,10 @@ status_t brdgmm_dw_convolution_fwd_t::execute(const exec_ctx_t &ctx) const { DEFINE_ZERO_POINTS_BUFFER(src_zero_point, DNNL_ARG_SRC); DEFINE_ZERO_POINTS_BUFFER(dst_zero_point, DNNL_ARG_DST); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const float *oscales = scale_utils::precompute_scales( ctx.get_scratchpad_grantor(), src_scales, wei_scales, pd()->IC(), - pd()->OC(), false, wei_scale_mask != 0, pd()->attr(), + pd()->OC(), false, wei_scale_mask > 0, pd()->attr(), jit_scale_precompute_.get()); const memory_desc_wrapper weights_d(pd()->weights_md(0)); diff --git a/src/cpu/x64/jit_brgemm_1x1_conv.cpp b/src/cpu/x64/jit_brgemm_1x1_conv.cpp index 4d6bb70659b..c70d377fc12 100644 --- a/src/cpu/x64/jit_brgemm_1x1_conv.cpp +++ b/src/cpu/x64/jit_brgemm_1x1_conv.cpp @@ -284,8 +284,8 @@ status_t brgemm_1x1_convolution_fwd_t::init(engine_t *engine) { if (is_jit_supported && pd()->OC() > 1 && req_copy_scales(attr, jcp.scale_adjust_factor)) { const auto &attr_scales = attr->scales_; - int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_; - if (wei_scale_mask != 0) { + int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS); + if (wei_scale_mask > 0) { CHECK(safe_ptr_assign(jit_scale_precompute_, new jit_avx512_core_scale_precompute_t( attr, jcp.scale_adjust_factor))); @@ -721,11 +721,10 @@ status_t brgemm_1x1_convolution_fwd_t::execute_forward_all( DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS); DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const float *oscales = scale_utils::precompute_scales(scratchpad, src_scales, wei_scales, pd()->IC(), pd()->OC(), false, - wei_scale_mask != 0, pd()->attr(), jit_scale_precompute_.get(), + wei_scale_mask > 0, pd()->attr(), jit_scale_precompute_.get(), jcp.scale_adjust_factor); DEFINE_ZERO_POINT_VALUE(src_zero_point, DNNL_ARG_SRC); diff --git a/src/cpu/x64/jit_brgemm_conv.cpp b/src/cpu/x64/jit_brgemm_conv.cpp index b55281b1248..6bdf480102a 100644 --- a/src/cpu/x64/jit_brgemm_conv.cpp +++ b/src/cpu/x64/jit_brgemm_conv.cpp @@ -929,8 +929,8 @@ status_t brgemm_convolution_fwd_t::init(engine_t *engine) { if (is_jit_supported && pd()->OC() > 1 && req_copy_scales(attr, jcp.scale_adjust_factor)) { const auto &attr_scales = attr->scales_; - int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_; - if (wei_scale_mask != 0) { + int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS); + if (wei_scale_mask > 0) { CHECK(safe_ptr_assign(jit_scale_precompute_, new jit_avx512_core_scale_precompute_t( attr, jcp.scale_adjust_factor))); @@ -1270,11 +1270,10 @@ status_t brgemm_convolution_fwd_t::execute(const exec_ctx_t &ctx) const { const memory_tracking::grantor_t scratchpad = ctx.get_scratchpad_grantor(); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const float *oscales = scale_utils::precompute_scales(scratchpad, src_scales, wei_scales, pd()->IC(), pd()->OC(), false, - wei_scale_mask != 0, pd()->attr(), jit_scale_precompute_.get(), + wei_scale_mask > 0, pd()->attr(), jit_scale_precompute_.get(), jcp.scale_adjust_factor); brgemm_exec_ctx_t brgemm_ctx(ctx, _pd); diff --git a/src/cpu/x64/jit_brgemm_conv_bwd_strided.cpp b/src/cpu/x64/jit_brgemm_conv_bwd_strided.cpp index cf15c05c114..0b0cb2d8c4d 100644 --- a/src/cpu/x64/jit_brgemm_conv_bwd_strided.cpp +++ b/src/cpu/x64/jit_brgemm_conv_bwd_strided.cpp @@ -647,8 +647,8 @@ status_t brgemm_convolution_bwd_strided_t::init(engine_t *engine) { if (is_jit_supported && pd()->IC() > 1 && req_copy_scales(attr, jcp.scale_adjust_factor)) { const auto &attr_scales = attr->scales_; - int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_; - if (wei_scale_mask != 0) { + int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS); + if (wei_scale_mask > 0) { CHECK(safe_ptr_assign(jit_scale_precompute_, new jit_avx512_core_scale_precompute_t( attr, jcp.scale_adjust_factor))); @@ -698,11 +698,10 @@ status_t brgemm_convolution_bwd_strided_t::execute( const memory_tracking::grantor_t scratchpad = ctx.get_scratchpad_grantor(); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const float *oscales = scale_utils::precompute_scales(scratchpad, src_scales, wei_scales, pd()->OC(), pd()->IC(), false, - wei_scale_mask != 0, pd()->attr(), jit_scale_precompute_.get(), + wei_scale_mask > 0, pd()->attr(), jit_scale_precompute_.get(), jcp.scale_adjust_factor); brgemm_bwd_exec_ctx_t brgemm_ctx(ctx, _pd); diff --git a/src/cpu/x64/jit_brgemm_conv_bwd_utils.cpp b/src/cpu/x64/jit_brgemm_conv_bwd_utils.cpp index 268755c69ab..c7d4e12f9d8 100644 --- a/src/cpu/x64/jit_brgemm_conv_bwd_utils.cpp +++ b/src/cpu/x64/jit_brgemm_conv_bwd_utils.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2022-2024 Intel Corporation +* Copyright 2022-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -2048,7 +2048,7 @@ status_t init_conf(jit_brgemm_conv_conf_t &jcp, cpu_isa_t isa, jcp.with_scales = !src_scales.has_default_values() || !wei_scales.has_default_values() || jcp.scale_adjust_factor != 1.0f; - jcp.is_ic_scale = wei_scales.mask_ != 0; + jcp.is_ic_scale = wei_scales.get_mask() > 0; } jcp.req_brg_comp_pad = false; diff --git a/src/cpu/x64/jit_brgemm_conv_utils.cpp b/src/cpu/x64/jit_brgemm_conv_utils.cpp index 8e0e284cc7c..a0f973ed87e 100644 --- a/src/cpu/x64/jit_brgemm_conv_utils.cpp +++ b/src/cpu/x64/jit_brgemm_conv_utils.cpp @@ -2314,7 +2314,7 @@ status_t init_conf(jit_brgemm_conv_conf_t &jcp, cpu_isa_t isa, jcp.with_scales = !src_scales.has_default_values() || !wei_scales.has_default_values() || jcp.scale_adjust_factor != 1.0f; - jcp.is_oc_scale = wei_scales.mask_ != 0; + jcp.is_oc_scale = wei_scales.get_mask() > 0; const bool compensation_w_padding = (jcp.s8s8_compensation_required || jcp.src_zero_point) @@ -2577,7 +2577,7 @@ status_t init_1x1_conf(jit_brgemm_conv_conf_t &jcp, cpu_isa_t isa, jcp.with_scales = !src_scales.has_default_values() || !wei_scales.has_default_values() || jcp.scale_adjust_factor != 1.0f; - jcp.is_oc_scale = wei_scales.mask_ != 0; + jcp.is_oc_scale = wei_scales.get_mask() > 0; // enable ununroll_bd_loop for big shapes to reduce kernel sizes jcp.ununroll_bd_loop diff --git a/src/cpu/x64/jit_brgemm_inner_product.cpp b/src/cpu/x64/jit_brgemm_inner_product.cpp index 45d40417af8..c9d21c9cb19 100644 --- a/src/cpu/x64/jit_brgemm_inner_product.cpp +++ b/src/cpu/x64/jit_brgemm_inner_product.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -94,8 +94,7 @@ status_t brgemm_inner_product_fwd_t::execute_forward( DEFINE_ARG_SCALES_BUFFER(wei_scales, DNNL_ARG_WEIGHTS); DEFINE_ARG_SCALES_BUFFER(dst_scales, DNNL_ARG_DST); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); const float *oscales = scale_utils::precompute_scales(scratchpad, src_scales, wei_scales, pd()->IC(), pd()->OC(), false, wei_scale_mask == (1 << 0), diff --git a/src/cpu/x64/jit_brgemm_inner_product.hpp b/src/cpu/x64/jit_brgemm_inner_product.hpp index 0601a0b3cf7..8dbf1bf29eb 100644 --- a/src/cpu/x64/jit_brgemm_inner_product.hpp +++ b/src/cpu/x64/jit_brgemm_inner_product.hpp @@ -224,8 +224,8 @@ struct brgemm_inner_product_fwd_t : public primitive_t { const auto attr = pd()->attr(); if (is_jit_supported && pd()->OC() > 1 && req_copy_scales(attr)) { const auto &attr_scales = attr->scales_; - int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_; - if (wei_scale_mask != 0) { + int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS); + if (wei_scale_mask > 0) { CHECK(safe_ptr_assign(jit_scale_precompute_, new jit_avx512_core_scale_precompute_t(attr))); CHECK(jit_scale_precompute_->create_kernel()); diff --git a/src/cpu/x64/jit_brgemm_inner_product_utils.cpp b/src/cpu/x64/jit_brgemm_inner_product_utils.cpp index 3de2b4c10eb..b66bce716cd 100644 --- a/src/cpu/x64/jit_brgemm_inner_product_utils.cpp +++ b/src/cpu/x64/jit_brgemm_inner_product_utils.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -444,8 +444,7 @@ status_t jit_brgemm_ip_fwd_conf_t::init_conf(cpu_isa_t isa, const memory_desc_wrapper dst_d(&dst_md); if (!post_ops_ok(attr, dst_d)) return status::unimplemented; if (jbgp.with_scales) { - const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS); - jbgp.is_oc_scale = wei_scales.mask_ != 0; + jbgp.is_oc_scale = attr.scales_.get_mask(DNNL_ARG_WEIGHTS) > 0; } const int min_ic_divisor = is_amx_int8 ? 4 : is_amx_xf16 ? 2 : 1; diff --git a/src/cpu/x64/jit_brgemm_post_ops.cpp b/src/cpu/x64/jit_brgemm_post_ops.cpp index 97eebf89ce3..1ea3558c533 100644 --- a/src/cpu/x64/jit_brgemm_post_ops.cpp +++ b/src/cpu/x64/jit_brgemm_post_ops.cpp @@ -579,7 +579,8 @@ dnnl::impl::cpu::x64::jit_brgemm_kernel_post_ops_t< const auto &wei_scales = attr_.scales_.get(DNNL_ARG_WEIGHTS); // per_oc: conv: 1 << 0, (1 << 1) + (1 << 0) (with groups) // per_oc: ip: 1 << 0 - is_oc_scale_ = utils::one_of(wei_scales.mask_, 1 << 0, (1 << 1) + (1 << 0)); + is_oc_scale_ + = utils::one_of(wei_scales.get_mask(), 1 << 0, (1 << 1) + (1 << 0)); inp_dt_ = brg_.dt_c; out_dt_ = brg_.dt_d; diff --git a/src/cpu/x64/jit_gemm_inner_product_utils.cpp b/src/cpu/x64/jit_gemm_inner_product_utils.cpp index 52854681668..782a7e0887a 100644 --- a/src/cpu/x64/jit_gemm_inner_product_utils.cpp +++ b/src/cpu/x64/jit_gemm_inner_product_utils.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -288,7 +288,7 @@ jit_pp_kernel_t::jit_pp_kernel_t(size_t OC, size_t MB, dim_t dst_mb_stride, if (this->do_bias()) compute_vreg_bias_shift_ = compute_vregs_per_iter_++; - if (!attr->scales_.get(DNNL_ARG_DST).has_default_values()) { + if (!attr->scales_.has_default_values(DNNL_ARG_DST)) { this->do_dst_scale_ = true; vreg_dst_scale = Vmm(idx_compute_vreg_start_++); } diff --git a/src/cpu/x64/jit_uni_binary.cpp b/src/cpu/x64/jit_uni_binary.cpp index 889fea1df0e..4e713ff3daa 100644 --- a/src/cpu/x64/jit_uni_binary.cpp +++ b/src/cpu/x64/jit_uni_binary.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -172,10 +172,8 @@ status_t jit_uni_binary_t::pd_t::init(engine_t *engine) { conf_.is_f16 = conf_.dst_type == f16; conf_.op_type = get_op_type(src0_md_); assert(conf_.op_type != op_t::none); - conf_.do_scale_src0 - = !attr()->scales_.get(DNNL_ARG_SRC_0).has_default_values(); - conf_.do_scale_src1 - = !attr()->scales_.get(DNNL_ARG_SRC_1).has_default_values(); + conf_.do_scale_src0 = !attr()->scales_.has_default_values(DNNL_ARG_SRC_0); + conf_.do_scale_src1 = !attr()->scales_.has_default_values(DNNL_ARG_SRC_1); const auto sum_idx = po.find(primitive_kind::sum); conf_.do_sum = sum_idx != -1 && po.entry_[sum_idx].sum.scale != 0.f; conf_.with_eltwise = po.find(primitive_kind::eltwise) != -1; diff --git a/src/cpu/x64/jit_uni_group_normalization.cpp b/src/cpu/x64/jit_uni_group_normalization.cpp index 8f4b16c9806..e9196ea81cf 100644 --- a/src/cpu/x64/jit_uni_group_normalization.cpp +++ b/src/cpu/x64/jit_uni_group_normalization.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023-2024 Intel Corporation +* Copyright 2023-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -84,8 +84,8 @@ struct kernel_t : public jit_uni_group_normalization_fwd_t::kernel_base_t, with_eltwise_ = post_ops.find(primitive_kind::eltwise) != -1; const auto &attr_scales = pd->attr()->scales_; - with_src_scales_ = !attr_scales.get(DNNL_ARG_SRC).has_default_values(); - with_dst_scales_ = !attr_scales.get(DNNL_ARG_DST).has_default_values(); + with_src_scales_ = !attr_scales.has_default_values(DNNL_ARG_SRC); + with_dst_scales_ = !attr_scales.has_default_values(DNNL_ARG_DST); io::io_conf_t io_conf; io::io_tail_conf_t io_tail_conf(simd_w_, axis_simd_tail_, diff --git a/src/cpu/x64/jit_uni_instance_normalization.cpp b/src/cpu/x64/jit_uni_instance_normalization.cpp index d441712e2e0..7e0d86b0a7a 100644 --- a/src/cpu/x64/jit_uni_instance_normalization.cpp +++ b/src/cpu/x64/jit_uni_instance_normalization.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2024 Intel Corporation +* Copyright 2024-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -82,8 +82,8 @@ struct kernel_t : public jit_uni_instance_normalization_fwd_t::kernel_base_t, with_eltwise_ = post_ops.find(primitive_kind::eltwise) != -1; const auto &attr_scales = pd->attr()->scales_; - with_src_scales_ = !attr_scales.get(DNNL_ARG_SRC).has_default_values(); - with_dst_scales_ = !attr_scales.get(DNNL_ARG_DST).has_default_values(); + with_src_scales_ = !attr_scales.has_default_values(DNNL_ARG_SRC); + with_dst_scales_ = !attr_scales.has_default_values(DNNL_ARG_DST); io::io_conf_t io_conf; io::io_tail_conf_t io_tail_conf(simd_w_, axis_simd_tail_, diff --git a/src/cpu/x64/jit_uni_layer_normalization.cpp b/src/cpu/x64/jit_uni_layer_normalization.cpp index 5d532c43e01..2665cf2fe2c 100644 --- a/src/cpu/x64/jit_uni_layer_normalization.cpp +++ b/src/cpu/x64/jit_uni_layer_normalization.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -128,8 +128,8 @@ struct jit_stat_and_data_base_kernel_t : stat_and_data_kernel_t, with_eltwise_ = post_ops.find(primitive_kind::eltwise) != -1; const auto &attr_scales = pd_->attr()->scales_; - with_src_scales_ = !attr_scales.get(DNNL_ARG_SRC).has_default_values(); - with_dst_scales_ = !attr_scales.get(DNNL_ARG_DST).has_default_values(); + with_src_scales_ = !attr_scales.has_default_values(DNNL_ARG_SRC); + with_dst_scales_ = !attr_scales.has_default_values(DNNL_ARG_DST); io::io_conf_t io_conf; io::io_tail_conf_t io_tail_conf(simd_w_, axis_simd_tail_, diff --git a/src/cpu/x64/jit_uni_reorder.cpp b/src/cpu/x64/jit_uni_reorder.cpp index f79a5d32b1f..0ed3361133c 100644 --- a/src/cpu/x64/jit_uni_reorder.cpp +++ b/src/cpu/x64/jit_uni_reorder.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2018-2024 Intel Corporation +* Copyright 2018-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -2324,13 +2324,10 @@ status_t jit_uni_reorder_t::pd_t::init_scratchpad() { compensation_reduce_size); } - const memory_desc_wrapper input_d(src_md()); - int scales_mask = -1; - bool is_set = false; - CHECK(attr()->scales_.get(DNNL_ARG_DST, &scales_mask, &is_set)); - - if (is_set && scales_mask > 0) { - get_D_values(input_d, scales_mask, nullptr, &D_mask_, nullptr); + if (!attr()->scales_.has_default_values(DNNL_ARG_DST)) { + const memory_desc_wrapper input_d(src_md()); + int mask = attr()->scales_.get_mask(DNNL_ARG_DST); + get_D_values(input_d, mask, nullptr, &D_mask_, nullptr); if (D_mask_ > 1) { scratchpad.template book( memory_tracking::names::key_reorder_precomputed_dst_scales, diff --git a/src/cpu/x64/jit_uni_reorder_utils.cpp b/src/cpu/x64/jit_uni_reorder_utils.cpp index cf9b343cb37..5ef629ca359 100644 --- a/src/cpu/x64/jit_uni_reorder_utils.cpp +++ b/src/cpu/x64/jit_uni_reorder_utils.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2018-2024 Intel Corporation +* Copyright 2018-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -273,24 +273,21 @@ status_t prb_init(prb_t &p, const memory_desc_t &imd, const memory_desc_t &omd, p.src_scale_type = scale_type_t::NONE; int src_mask = 0; - bool is_src_set = false; - CHECK(attr->scales_.get(DNNL_ARG_SRC, &src_mask, &is_src_set)); - if (is_src_set) { + if (!attr->scales_.has_default_values(DNNL_ARG_SRC)) { + src_mask = attr->scales_.get_mask(DNNL_ARG_SRC); p.src_scale_type = src_mask == 0 ? scale_type_t::COMMON : scale_type_t::MANY; } p.dst_scale_type = scale_type_t::NONE; int dst_mask = 0; - bool is_dst_set = false; - CHECK(attr->scales_.get(DNNL_ARG_DST, &dst_mask, &is_dst_set)); - if (is_dst_set) { + if (!attr->scales_.has_default_values(DNNL_ARG_DST)) { + dst_mask = attr->scales_.get_mask(DNNL_ARG_DST); p.dst_scale_type = dst_mask == 0 ? scale_type_t::COMMON : scale_type_t::MANY; } - if (is_src_set && is_dst_set && src_mask != dst_mask) - return status::unimplemented; + if (src_mask != dst_mask) return status::unimplemented; p.scale_adjust = (om_d.extra().flags & memory_extra_flags::scale_adjust) ? om_d.extra().scale_adjust diff --git a/src/cpu/x64/jit_uni_softmax.cpp b/src/cpu/x64/jit_uni_softmax.cpp index 1ccd7f0cb08..89837058eb5 100644 --- a/src/cpu/x64/jit_uni_softmax.cpp +++ b/src/cpu/x64/jit_uni_softmax.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -969,9 +969,9 @@ struct jit_softmax_dense_kernel_t : jit_softmax_kernel_base_t, const auto &attr_scales = pd_->attr()->scales_; with_src_scales_ = is_superset(isa, avx2) - && !attr_scales.get(DNNL_ARG_SRC).has_default_values(); + && !attr_scales.has_default_values(DNNL_ARG_SRC); with_dst_scales_ = is_superset(isa, avx2) - && !attr_scales.get(DNNL_ARG_DST).has_default_values(); + && !attr_scales.has_default_values(DNNL_ARG_DST); io::io_conf_t io_conf; io::io_tail_conf_t io_tail_conf(simd_w_, axis_simd_tail_, @@ -1529,9 +1529,9 @@ struct jit_softmax_strided_kernel_t : jit_softmax_kernel_base_t, const auto &attr_scales = pd_->attr()->scales_; with_src_scales_ = is_superset(isa, avx2) - && !attr_scales.get(DNNL_ARG_SRC).has_default_values(); + && !attr_scales.has_default_values(DNNL_ARG_SRC); with_dst_scales_ = is_superset(isa, avx2) - && !attr_scales.get(DNNL_ARG_DST).has_default_values(); + && !attr_scales.has_default_values(DNNL_ARG_DST); io::io_conf_t io_conf; io::io_tail_conf_t io_tail_conf(simd_w_, axis_simd_tail_, diff --git a/src/cpu/x64/jit_uni_x8s8s32x_1x1_conv_kernel.cpp b/src/cpu/x64/jit_uni_x8s8s32x_1x1_conv_kernel.cpp index 8186789adc4..d2ff20d8db4 100644 --- a/src/cpu/x64/jit_uni_x8s8s32x_1x1_conv_kernel.cpp +++ b/src/cpu/x64/jit_uni_x8s8s32x_1x1_conv_kernel.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -910,7 +910,7 @@ status_t jit_uni_x8s8s32x_1x1_conv_kernel::init_conf( const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS); const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST); - jcp.is_oc_scale = wei_scales.mask_ != 0; + jcp.is_oc_scale = wei_scales.get_mask() > 0; jcp.dst_scale = !dst_scales.has_default_values(); jcp.wei_adj_scale @@ -927,7 +927,7 @@ void jit_uni_x8s8s32x_1x1_conv_kernel::init_scratchpad( const jit_1x1_conv_conf_t &jcp, const primitive_attr_t &attr) { using namespace dnnl::impl::memory_tracking::names; - const int wei_mask = attr.scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_mask = attr.scales_.get_mask(DNNL_ARG_WEIGHTS); const dim_t scales_count = wei_mask == 0 ? 1 : static_cast(jcp.oc) * jcp.ngroups; const dim_t count = nstl::max(scales_count, 8); diff --git a/src/cpu/x64/jit_uni_x8s8s32x_1x1_convolution.cpp b/src/cpu/x64/jit_uni_x8s8s32x_1x1_convolution.cpp index 6b0e7e9d9e9..8dadfd91d06 100644 --- a/src/cpu/x64/jit_uni_x8s8s32x_1x1_convolution.cpp +++ b/src/cpu/x64/jit_uni_x8s8s32x_1x1_convolution.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2023 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -76,7 +76,7 @@ status_t jit_uni_x8s8s32x_1x1_convolution_fwd_t::execute_forward( const float factor = (pd()->jcp_.signed_input && (!pd()->jcp_.has_vnni)) ? 1.f / pd()->jcp_.wei_adj_scale : 1.0f; - int wei_mask = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + int wei_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); if (wei_mask == 0) { utils::array_set( local_scales, src_scales[0] * wei_scales[0] * factor, 8); @@ -94,7 +94,7 @@ status_t jit_uni_x8s8s32x_1x1_convolution_fwd_t::execute_forward( auto dw_local_scales = dw_scratchpad.template get(key_conv_adjusted_scales); - int wei_mask = attr_dw->scales_.get(DNNL_ARG_WEIGHTS).mask_; + int wei_mask = attr_dw->scales_.get_mask(DNNL_ARG_WEIGHTS); float factor = 1.f / jcp_dw->wei_adj_scale; if (wei_mask == 0) { utils::array_set(dw_local_scales, diff --git a/src/cpu/x64/jit_uni_x8s8s32x_conv_kernel.cpp b/src/cpu/x64/jit_uni_x8s8s32x_conv_kernel.cpp index d4512765ee3..24275ba63cf 100644 --- a/src/cpu/x64/jit_uni_x8s8s32x_conv_kernel.cpp +++ b/src/cpu/x64/jit_uni_x8s8s32x_conv_kernel.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1315,7 +1315,7 @@ status_t jit_uni_x8s8s32x_fwd_kernel::init_conf(jit_conv_conf_t &jcp, const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS); const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST); - jcp.is_oc_scale = wei_scales.mask_ != 0; + jcp.is_oc_scale = wei_scales.get_mask() > 0; jcp.dst_scale = !dst_scales.has_default_values(); const auto zp = attr.zero_points_; @@ -1614,7 +1614,7 @@ void jit_uni_x8s8s32x_fwd_kernel::init_scratchpad( memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp, const primitive_attr_t &attr) { - const int mask = attr.scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int mask = attr.scales_.get_mask(DNNL_ARG_WEIGHTS); const dim_t scales_count = mask == 0 ? 1 : static_cast(jcp.oc) * jcp.ngroups; dim_t count = scales_count == 1 ? (dim_t)8 : scales_count; diff --git a/src/cpu/x64/jit_uni_x8s8s32x_convolution.cpp b/src/cpu/x64/jit_uni_x8s8s32x_convolution.cpp index bfa20f0d33e..8fc730b1cec 100644 --- a/src/cpu/x64/jit_uni_x8s8s32x_convolution.cpp +++ b/src/cpu/x64/jit_uni_x8s8s32x_convolution.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2023 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,7 +44,7 @@ const float *jit_uni_x8s8s32x_convolution_fwd_t::adjust_oscales( const memory_tracking::grantor_t &scratchpad, const float *src_scales, const float *wei_scales) const { auto loc_scales = scratchpad.template get(key_conv_adjusted_scales); - int wei_mask = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + int wei_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); float factor = (pd()->jcp_.signed_input && (!pd()->jcp_.has_vnni)) ? 1.f / pd()->jcp_.wei_adj_scale : 1.0f; diff --git a/src/cpu/x64/jit_uni_x8s8s32x_deconvolution.cpp b/src/cpu/x64/jit_uni_x8s8s32x_deconvolution.cpp index 9b03681d900..d8596136576 100644 --- a/src/cpu/x64/jit_uni_x8s8s32x_deconvolution.cpp +++ b/src/cpu/x64/jit_uni_x8s8s32x_deconvolution.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -272,7 +272,7 @@ status_t jit_uni_x8s8s32x_deconv_fwd_kernel::init_conf( const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS); const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST); - jcp.is_oc_scale = wei_scales.mask_ != 0; + jcp.is_oc_scale = wei_scales.get_mask() > 0; jcp.dst_scale = !dst_scales.has_default_values(); jcp.post_ops = p; @@ -386,7 +386,7 @@ template void jit_uni_x8s8s32x_deconv_fwd_kernel::init_scratchpad( memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp, const primitive_attr_t &attr) { - const int mask = attr.scales_.get(DNNL_ARG_WEIGHTS).mask_; + const int mask = attr.scales_.get_mask(DNNL_ARG_WEIGHTS); const dim_t scales_count = mask == 0 ? 1 : static_cast(jcp.oc) * jcp.ngroups; dim_t count = nstl::max(scales_count, 8); @@ -1451,7 +1451,7 @@ const float *jit_uni_x8s8s32x_deconvolution_fwd_t::adjust_oscales( const memory_tracking::grantor_t &scratchpad, const float *src_scales, const float *wei_scales) const { auto loc_scales = scratchpad.template get(key_conv_adjusted_scales); - int wei_mask = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; + int wei_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); float factor = (pd()->jcp_.signed_input && (!pd()->jcp_.has_vnni)) ? 1.f / pd()->jcp_.wei_adj_scale : 1.0f; diff --git a/src/cpu/x64/matmul/brgemm_matmul.cpp b/src/cpu/x64/matmul/brgemm_matmul.cpp index 0301b525d11..bd41e8af008 100644 --- a/src/cpu/x64/matmul/brgemm_matmul.cpp +++ b/src/cpu/x64/matmul/brgemm_matmul.cpp @@ -105,9 +105,9 @@ status_t brgemm_matmul_t::pd_t::init(engine_t *engine) { = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}; bool ok = attr_scales_ok(supported_args); const auto &asc = attr()->scales_; - if (!asc.get(DNNL_ARG_SRC).has_default_values() - && !asc.get(DNNL_ARG_WEIGHTS).has_default_values() - && asc.get(DNNL_ARG_WEIGHTS).mask_ != 0) { + if (!asc.has_default_values(DNNL_ARG_SRC) + && !asc.has_default_values(DNNL_ARG_WEIGHTS) + && asc.get_mask(DNNL_ARG_WEIGHTS) > 0) { // This case requires scratchpad if (N() == DNNL_RUNTIME_DIM_VAL) ok = false; } @@ -118,14 +118,14 @@ status_t brgemm_matmul_t::pd_t::init(engine_t *engine) { ok = ok && one_of(asc.get_data_type(DNNL_ARG_DST), undef, f32); } // Implementation has limited support w.r.t. scales groups. - if (!asc.get(DNNL_ARG_WEIGHTS).has_default_values()) { + if (!asc.has_default_values(DNNL_ARG_WEIGHTS)) { if (!asc.get(DNNL_ARG_WEIGHTS).has_default_groups()) { // Only grouping over K is supported. - ok = ok && asc.get(DNNL_ARG_WEIGHTS).group_dims_[1] == 1; + ok = ok && asc.get_group(DNNL_ARG_WEIGHTS, 1) == 1; // Only 'per_ocic' mask is supported, but not 'per_tensor' in // benchdnn terms. In numbers, it's '12' is supported while for // 4D '15' is required. - const int mask = asc.get(DNNL_ARG_WEIGHTS).mask_; + const int mask = asc.get_mask(DNNL_ARG_WEIGHTS); const int ndims = weights_md_.ndims; const int last_dim = (1 << (ndims - 1)); const int prelast_dim = (1 << (ndims - 2)); @@ -357,8 +357,8 @@ status_t brgemm_matmul_t::init(engine_t *engine) { if (is_jit_supported && wei_scale_count > 1 && req_copy_scales(attr) && !bgmmc.req_transpose_scales) { const auto &attr_scales = attr->scales_; - int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_; - if (wei_scale_mask != 0) { + int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS); + if (wei_scale_mask > 0) { CHECK(safe_ptr_assign(jit_scale_precompute_, new jit_avx512_core_scale_precompute_t(attr))); CHECK(jit_scale_precompute_->create_kernel()); @@ -383,10 +383,13 @@ status_t brgemm_matmul_t::execute_body(const exec_ctx_t &ctx) const { matmul_helper_t helper(src_d, weights_d, dst_d); const auto &bgmmc = pd()->get_brgemm_matmul_conf(); - const int wei_scale_mask - = pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_; - const bool wei_scale_per_k = wei_scale_mask & pd()->wei_qmask_K(); - const bool wei_scale_per_n = wei_scale_mask & pd()->wei_qmask_N(); + const bool has_wei_scales + = !pd()->attr()->scales_.has_default_values(DNNL_ARG_WEIGHTS); + const int wei_scale_mask = pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS); + const bool wei_scale_per_k + = has_wei_scales && (wei_scale_mask & pd()->wei_qmask_K()); + const bool wei_scale_per_n + = has_wei_scales && (wei_scale_mask & pd()->wei_qmask_N()); const float *oscales = scale_utils::precompute_scales( ctx.get_scratchpad_grantor(), src_scales, wei_scales, pd()->K(), pd()->N(), wei_scale_per_k, wei_scale_per_n, pd()->attr(), diff --git a/src/cpu/x64/matmul/brgemm_matmul_utils.cpp b/src/cpu/x64/matmul/brgemm_matmul_utils.cpp index 26cf27ef5c5..9e5b6021c01 100644 --- a/src/cpu/x64/matmul/brgemm_matmul_utils.cpp +++ b/src/cpu/x64/matmul/brgemm_matmul_utils.cpp @@ -1369,19 +1369,19 @@ status_t init_brgemm_matmul_conf(cpu_isa_t isa, brgemm_matmul_conf_t &bgmmc, const auto &src_scales = attr.scales_.get(DNNL_ARG_SRC); const auto &wei_scales = attr.scales_.get(DNNL_ARG_WEIGHTS); - bgmmc.with_scales = !src_scales.has_default_values() - || !wei_scales.has_default_values(); - if (bgmmc.with_scales) { + const bool has_wei_scales = !wei_scales.has_default_values(); + bgmmc.with_scales = !src_scales.has_default_values() || has_wei_scales; + if (has_wei_scales) { const auto wei_qmask_N = 1 << (bgmmc.ndims - 1); const auto wei_qmask_K = 1 << (bgmmc.ndims - 2); - bgmmc.is_oscale_per_k = wei_scales.mask_ & wei_qmask_K; - bgmmc.is_oscale_per_n = wei_scales.mask_ & wei_qmask_N; + bgmmc.is_oscale_per_k = wei_scales.get_mask() & wei_qmask_K; + bgmmc.is_oscale_per_n = wei_scales.get_mask() & wei_qmask_N; bgmmc.apply_scales_in_buffer_b = bgmmc.is_oscale_per_k && bgmmc.with_wei_decompression && bgmmc.N * bgmmc.K != 1; // only common and per-oc-channel scales are supported // only per-ic-channel scales is supprted with weight decompression - VCONDCHECK_BG(wei_scales.mask_ == 0 || bgmmc.is_oscale_per_n + VCONDCHECK_BG(wei_scales.get_mask() == 0 || bgmmc.is_oscale_per_n || IMPLICATION(bgmmc.is_oscale_per_k, bgmmc.with_wei_decompression), VERBOSE_UNSUPPORTED_SCALES_CFG); @@ -1390,8 +1390,8 @@ status_t init_brgemm_matmul_conf(cpu_isa_t isa, brgemm_matmul_conf_t &bgmmc, const auto &dst_scales = attr.scales_.get(DNNL_ARG_DST); bgmmc.with_dst_scales = !dst_scales.has_default_values(); // only common scales are supported - VCONDCHECK_BG(!(bgmmc.with_dst_scales && dst_scales.mask_ != 0), - VERBOSE_UNSUPPORTED_SCALES_CFG) + VCONDCHECK_BG(!(bgmmc.with_dst_scales && dst_scales.get_mask() > 0), + VERBOSE_UNSUPPORTED_SCALES_CFG); const auto &p = attr.post_ops_; bgmmc.with_sum = p.find(primitive_kind::sum) != -1; diff --git a/src/cpu/x64/matmul_inner_product.cpp b/src/cpu/x64/matmul_inner_product.cpp index 9a6e547cb24..48e0cf81f9f 100644 --- a/src/cpu/x64/matmul_inner_product.cpp +++ b/src/cpu/x64/matmul_inner_product.cpp @@ -244,13 +244,16 @@ status_t matmul_inner_product_fwd_t::pd_t::init_matmul_params( attr_.set_default_formats(dst_md(0)), VERBOSE_UNSUPPORTED_POSTOP); primitive_attr_t matmul_attr = *attr(); - const auto wei_mask = matmul_attr.scales_.get(DNNL_ARG_WEIGHTS).mask_; - if (wei_mask == 1) - VDISPATCH_INNER_PRODUCT_SC(matmul_attr.scales_.set(DNNL_ARG_WEIGHTS, - 1 << (mm_wei_md.ndims - 1)), - VERBOSE_UNSUPPORTED_ATTR); - else if (wei_mask != 0) - VDISPATCH_INNER_PRODUCT(false, VERBOSE_UNSUPPORTED_SCALES_CFG); + if (!matmul_attr.scales_.has_default_values(DNNL_ARG_WEIGHTS)) { + const auto wei_mask = matmul_attr.scales_.get_mask(DNNL_ARG_WEIGHTS); + if (wei_mask == 1) { + VDISPATCH_INNER_PRODUCT_SC(matmul_attr.scales_.set(DNNL_ARG_WEIGHTS, + 1 << (mm_wei_md.ndims - 1)), + VERBOSE_UNSUPPORTED_ATTR); + } else if (wei_mask > 0) { + VDISPATCH_INNER_PRODUCT(false, VERBOSE_UNSUPPORTED_SCALES_CFG); + } + } memory_desc_t mm_bia_md {}; // Inner Product bias is always a vector while MatMul requires bias to have diff --git a/src/gpu/generic/ref_concat.hpp b/src/gpu/generic/ref_concat.hpp index 5c230cc634d..cb4b510b1ed 100644 --- a/src/gpu/generic/ref_concat.hpp +++ b/src/gpu/generic/ref_concat.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -60,12 +60,8 @@ struct ref_concat_t : public gpu::primitive_t { reorder_pds_.resize(n_ + use_tent_dst()); for (int i = 0; i < n_; ++i) { primitive_attr_t r_attr; - int mask = 0; - bool is_set = false; - VDISPATCH_CONCAT_SC( - sc.get(DNNL_ARG_MULTIPLE_SRC + i, &mask, &is_set), - VERBOSE_UNSUPPORTED_SCALES_CFG); - if (is_set) { + if (!sc.get(DNNL_ARG_MULTIPLE_SRC + i).has_default_values()) { + int mask = sc.get_mask(DNNL_ARG_MULTIPLE_SRC + i); VDISPATCH_CONCAT(mask == 0, "non-zero mask"); VDISPATCH_CONCAT_SC(r_attr.scales_.set(DNNL_ARG_SRC, mask), VERBOSE_UNSUPPORTED_SCALES_CFG); diff --git a/src/gpu/generic/sycl/layer_normalizations_kernels.hpp b/src/gpu/generic/sycl/layer_normalizations_kernels.hpp index 1c09075158a..8e1a4b5e53e 100644 --- a/src/gpu/generic/sycl/layer_normalizations_kernels.hpp +++ b/src/gpu/generic/sycl/layer_normalizations_kernels.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023-2024 Intel Corporation +* Copyright 2023-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -79,12 +79,20 @@ struct layer_normalization_fwd_kernel_vec_t { memory_tensor_t data_mem(data_, conf_.data_md); memory_tensor_t scale_mem(scale_, conf_.data_scaleshift_md); memory_tensor_t shift_mem(shift_, conf_.data_scaleshift_md); - memory_plain_t rt_scale_mem(rt_scale_, conf_.scales_src_dt); - memory_plain_t dst_scale_mem(dst_scale_, conf_.scales_dst_dt); memory_tensor_t stat_mem(stat_, conf_.stat_md); memory_plain_t var_mem(var_, conf_.var_dt); memory_tensor_t dst_mem(dst_, conf_.dst_md); + float sr = 1.f; + if (!conf_.src_def) { + memory_plain_t rt_scale_mem(rt_scale_, conf_.scales_src_dt); + sr = rt_scale_mem.load(0); + } + float ds = 1.f; + if (!conf_.dst_def) { + memory_plain_t dst_scale_mem(dst_scale_, conf_.scales_dst_dt); + ds = dst_scale_mem.load(0); + } float eps = epsilon(); const size_t s_off = conf_.stat_md.off_l(idx); auto v_mean = stat_mem.load(s_off); @@ -104,8 +112,6 @@ struct layer_normalization_fwd_kernel_vec_t { float s = data_mem.load(src_off); float d = sm * (s - v_mean) + sv; - float sr = conf_.src_def ? 1.f : rt_scale_mem.load(0); - float ds = conf_.dst_def ? 1.f : dst_scale_mem.load(0); d = (d * sr * (1.f / ds)); dst_mem.store(d, d_off); } @@ -171,8 +177,6 @@ struct layer_normalization_fwd_kernel_vec1_t { memory_tensor_t data_mem(data_, conf_.data_md); memory_tensor_t scale_mem(scale_, conf_.data_scaleshift_md); memory_tensor_t shift_mem(shift_, conf_.data_scaleshift_md); - memory_plain_t rt_scale_mem(rt_scale_, conf_.scales_src_dt); - memory_plain_t dst_scale_mem(dst_scale_, conf_.scales_dst_dt); memory_tensor_t stat_out_mem(mean_out_, conf_.stat_md); memory_plain_t var_out_mem(var_out_, conf_.var_dt); memory_tensor_t dst_mem(dst_, conf_.dst_md); @@ -181,6 +185,17 @@ struct layer_normalization_fwd_kernel_vec1_t { stat_out_mem.store(0, idx); var_out_mem.store(0, idx); } + float sr = 1.f; + if (!conf_.src_def) { + memory_plain_t rt_scale_mem(rt_scale_, conf_.scales_src_dt); + sr = rt_scale_mem.load(0); + } + float ds = 1.f; + if (!conf_.dst_def) { + memory_plain_t dst_scale_mem(dst_scale_, conf_.scales_dst_dt); + ds = dst_scale_mem.load(0); + } + float eps = epsilon(); const size_t s_off = conf_.stat_md.off_l(idx); float v_mean = 0.f; @@ -217,9 +232,6 @@ struct layer_normalization_fwd_kernel_vec1_t { const auto d_off = dst_md().off_l(index); float s = data_mem.load(src_off); float d = sm * (s - v_mean) + sv; - - float sr = conf_.src_def ? 1.f : rt_scale_mem.load(0); - float ds = conf_.dst_def ? 1.f : dst_scale_mem.load(0); d = (d * sr * (1.f / ds)); dst_mem.store(d, d_off); diff --git a/src/gpu/generic/sycl/ref_binary.hpp b/src/gpu/generic/sycl/ref_binary.hpp index e6b8ec6f2ce..69c5af70b52 100644 --- a/src/gpu/generic/sycl/ref_binary.hpp +++ b/src/gpu/generic/sycl/ref_binary.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2022-2024 Intel Corporation +* Copyright 2022-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -87,8 +87,7 @@ struct ref_binary_t : public gpu::generic::sycl::primitive_t { const auto &scales = attr()->scales_; bool dt_ok = true; for (auto arg : supported_args) { - auto &s = scales.get(arg); - dt_ok = dt_ok && is_supported_type(s.data_type_); + dt_ok = dt_ok && is_supported_type(scales.get_data_type(arg)); } return dt_ok && attr_scales_ok(supported_args); } diff --git a/src/gpu/generic/sycl/ref_convolution.cpp b/src/gpu/generic/sycl/ref_convolution.cpp index b2f0458eba3..c448a32cbcf 100644 --- a/src/gpu/generic/sycl/ref_convolution.cpp +++ b/src/gpu/generic/sycl/ref_convolution.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2024 Intel Corporation +* Copyright 2024-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,11 +41,9 @@ status_t ref_convolution_fwd_t::pd_t::init_conf() { conf_.do_scale_data = !attr()->scales_.get(DNNL_ARG_SRC_0).has_default_values(); conf_.do_scale_weights - = !attr()->scales_.get(DNNL_ARG_WEIGHTS).has_default_values(); - conf_.do_scale_dst - = !attr()->scales_.get(DNNL_ARG_DST).has_default_values(); - conf_.single_weight_scale - = attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ == 0; + = !attr()->scales_.has_default_values(DNNL_ARG_WEIGHTS); + conf_.do_scale_dst = !attr()->scales_.has_default_values(DNNL_ARG_DST); + conf_.single_weight_scale = attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == 0; conf_.use_data_zeropoints = !attr()->zero_points_.has_default_values(DNNL_ARG_SRC_0); @@ -106,11 +104,9 @@ status_t ref_convolution_bwd_data_t::pd_t::init_conf() { conf_.do_scale_data = !attr()->scales_.get(DNNL_ARG_SRC_0).has_default_values(); conf_.do_scale_weights - = !attr()->scales_.get(DNNL_ARG_WEIGHTS).has_default_values(); - conf_.do_scale_dst - = !attr()->scales_.get(DNNL_ARG_DST).has_default_values(); - conf_.single_weight_scale - = attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ == 0; + = !attr()->scales_.has_default_values(DNNL_ARG_WEIGHTS); + conf_.do_scale_dst = !attr()->scales_.has_default_values(DNNL_ARG_DST); + conf_.single_weight_scale = attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == 0; conf_.use_data_zeropoints = !attr()->zero_points_.has_default_values(DNNL_ARG_SRC_0); @@ -172,11 +168,9 @@ status_t ref_convolution_bwd_weights_t::pd_t::init_conf() { conf_.do_scale_data = !attr()->scales_.get(DNNL_ARG_SRC_0).has_default_values(); conf_.do_scale_weights - = !attr()->scales_.get(DNNL_ARG_WEIGHTS).has_default_values(); - conf_.do_scale_dst - = !attr()->scales_.get(DNNL_ARG_DST).has_default_values(); - conf_.single_weight_scale - = attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ == 0; + = !attr()->scales_.has_default_values(DNNL_ARG_WEIGHTS); + conf_.do_scale_dst = !attr()->scales_.has_default_values(DNNL_ARG_DST); + conf_.single_weight_scale = attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == 0; conf_.use_data_zeropoints = !attr()->zero_points_.has_default_values(DNNL_ARG_SRC_0); diff --git a/src/gpu/generic/sycl/ref_convolution.hpp b/src/gpu/generic/sycl/ref_convolution.hpp index 55faae92c08..738455ddcba 100644 --- a/src/gpu/generic/sycl/ref_convolution.hpp +++ b/src/gpu/generic/sycl/ref_convolution.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2024 Intel Corporation +* Copyright 2024-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -66,7 +66,7 @@ inline bool check_convolution_scales_types(const primitive_attr_t *attr) { const auto &scales = attr->scales_; for (auto arg : supported_args) { - auto dt = scales.get(arg).data_type_; + const auto dt = scales.get_data_type(arg); if (!is_supported_type(dt)) { return false; } } return true; diff --git a/src/gpu/generic/sycl/ref_layer_normalizations.cpp b/src/gpu/generic/sycl/ref_layer_normalizations.cpp index 69763aa2568..f5c7b487594 100644 --- a/src/gpu/generic/sycl/ref_layer_normalizations.cpp +++ b/src/gpu/generic/sycl/ref_layer_normalizations.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023-2024 Intel Corporation +* Copyright 2023-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,15 +41,11 @@ status_t ref_layer_normalization_fwd_t::pd_t::init_conf() { conf_.block_size = 16; conf_.rt_scaling = !attr()->scales_.has_default_values(); - conf_.src_def = attr()->scales_.get(DNNL_ARG_SRC).has_default_values(); - conf_.dst_def = attr()->scales_.get(DNNL_ARG_DST).has_default_values(); - - conf_.scales_src_dt = conf_.src_def - ? data_type_t::dnnl_f32 - : attr()->scales_.get(DNNL_ARG_SRC).data_type_; - conf_.scales_dst_dt = conf_.dst_def - ? data_type_t::dnnl_f32 - : attr()->scales_.get(DNNL_ARG_DST).data_type_; + conf_.src_def = attr()->scales_.has_default_values(DNNL_ARG_SRC); + conf_.dst_def = attr()->scales_.has_default_values(DNNL_ARG_DST); + + conf_.scales_src_dt = attr()->scales_.get_data_type(DNNL_ARG_SRC); + conf_.scales_dst_dt = attr()->scales_.get_data_type(DNNL_ARG_DST); conf_.use_scale = use_scale(); conf_.use_shift = use_shift(); diff --git a/src/gpu/generic/sycl/ref_layer_normalizations.hpp b/src/gpu/generic/sycl/ref_layer_normalizations.hpp index 52bd8e6f16f..1f5a23fd238 100644 --- a/src/gpu/generic/sycl/ref_layer_normalizations.hpp +++ b/src/gpu/generic/sycl/ref_layer_normalizations.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023-2024 Intel Corporation +* Copyright 2023-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -81,7 +81,7 @@ struct ref_layer_normalization_fwd_t : public gpu::generic::sycl::primitive_t { const auto &scales = attr()->scales_; for (auto arg : supported_args) { - auto dt = scales.get(arg).data_type_; + const auto dt = scales.get_data_type(arg); if (!is_supported_type(dt)) { return false; } } return true; diff --git a/src/gpu/generic/sycl/ref_matmul.cpp b/src/gpu/generic/sycl/ref_matmul.cpp index d79f80b3cf0..2aaaa9d7a91 100644 --- a/src/gpu/generic/sycl/ref_matmul.cpp +++ b/src/gpu/generic/sycl/ref_matmul.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2024 Intel Corporation +* Copyright 2024-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,11 +29,10 @@ void ref_matmul_t::pd_t::init_conf() { conf_.do_scale_data = !attr()->scales_.get(DNNL_ARG_SRC_0).has_default_values(); conf_.do_scale_weights - = !attr()->scales_.get(DNNL_ARG_WEIGHTS).has_default_values(); - conf_.do_scale_dst - = !attr()->scales_.get(DNNL_ARG_DST).has_default_values(); + = !attr()->scales_.has_default_values(DNNL_ARG_WEIGHTS); + conf_.do_scale_dst = !attr()->scales_.has_default_values(DNNL_ARG_DST); conf_.single_weights_scale - = attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ == 0; + = attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) == 0; conf_.use_data_zeropoints = !attr()->zero_points_.has_default_values(DNNL_ARG_SRC_0); diff --git a/src/gpu/generic/sycl/ref_matmul.hpp b/src/gpu/generic/sycl/ref_matmul.hpp index 2fa308b419f..06eba1d4215 100644 --- a/src/gpu/generic/sycl/ref_matmul.hpp +++ b/src/gpu/generic/sycl/ref_matmul.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2024 Intel Corporation +* Copyright 2024-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -122,8 +122,7 @@ struct ref_matmul_t : public gpu::generic::sycl::primitive_t { const auto &scales = attr()->scales_; bool dt_ok = true; for (auto arg : supported_args) { - auto &s = scales.get(arg); - dt_ok = dt_ok && is_supported_type(s.data_type_); + dt_ok = dt_ok && is_supported_type(scales.get_data_type(arg)); } return dt_ok && attr_scales_ok(supported_args); } diff --git a/src/gpu/generic/sycl/ref_reorder.cpp b/src/gpu/generic/sycl/ref_reorder.cpp index c7c8c25bfdc..135bc5b77dd 100644 --- a/src/gpu/generic/sycl/ref_reorder.cpp +++ b/src/gpu/generic/sycl/ref_reorder.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2024 Intel Corporation +* Copyright 2024-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,10 +34,9 @@ status_t ref_reorder_t::pd_t::init_conf() { conf_.do_scale_src = !attr()->scales_.get(DNNL_ARG_SRC_0).has_default_values(); - conf_.scale_src_mask = attr()->scales_.get(DNNL_ARG_SRC_0).mask_; - conf_.do_scale_dst - = !attr()->scales_.get(DNNL_ARG_DST).has_default_values(); - conf_.scale_dst_mask = attr()->scales_.get(DNNL_ARG_DST).mask_; + conf_.scale_src_mask = attr()->scales_.get_mask(DNNL_ARG_SRC_0); + conf_.do_scale_dst = !attr()->scales_.has_default_values(DNNL_ARG_DST); + conf_.scale_dst_mask = attr()->scales_.get_mask(DNNL_ARG_DST); conf_.post_ops = sycl_post_ops_t(attr(), dst_md()); return status::success; diff --git a/src/gpu/generic/sycl/ref_reorder.hpp b/src/gpu/generic/sycl/ref_reorder.hpp index 3155d8d1486..91f77e8ac6c 100644 --- a/src/gpu/generic/sycl/ref_reorder.hpp +++ b/src/gpu/generic/sycl/ref_reorder.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2024 Intel Corporation +* Copyright 2024-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -103,7 +103,7 @@ struct ref_reorder_t : public gpu::generic::sycl::primitive_t { const auto &scales = attr()->scales_; for (auto arg : supported_args) { - auto dt = scales.get(arg).data_type_; + const auto dt = scales.get_data_type(arg); if (!is_supported_type(dt)) { return false; } } return true; diff --git a/src/gpu/generic/sycl/ref_softmax.cpp b/src/gpu/generic/sycl/ref_softmax.cpp index 96ab1b4f79a..01a9cbedc54 100644 --- a/src/gpu/generic/sycl/ref_softmax.cpp +++ b/src/gpu/generic/sycl/ref_softmax.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023-2024 Intel Corporation +* Copyright 2023-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,10 +36,8 @@ status_t ref_sycl_softmax_fwd_t::pd_t::init_conf() { conf_.channels = axis_size(); conf_.wk_size = inner_size() * outer_size(); - conf_.do_scale_src - = !attr()->scales_.get(DNNL_ARG_SRC).has_default_values(); - conf_.do_scale_dst - = !attr()->scales_.get(DNNL_ARG_DST).has_default_values(); + conf_.do_scale_src = !attr()->scales_.has_default_values(DNNL_ARG_SRC); + conf_.do_scale_dst = !attr()->scales_.has_default_values(DNNL_ARG_DST); conf_.post_ops = sycl_post_ops_t(attr(), dst_md()); diff --git a/src/gpu/generic/sycl/ref_softmax.hpp b/src/gpu/generic/sycl/ref_softmax.hpp index 8143eb6bc51..c4049fa56b2 100644 --- a/src/gpu/generic/sycl/ref_softmax.hpp +++ b/src/gpu/generic/sycl/ref_softmax.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023-2024 Intel Corporation +* Copyright 2023-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,7 +50,7 @@ struct ref_sycl_softmax_fwd_t : public gpu::generic::sycl::primitive_t { VDISPATCH_SOFTMAX(attr()->has_default_values( sm::scales_runtime | sm::post_ops), VERBOSE_UNSUPPORTED_ATTR); - VDISPATCH_SOFTMAX(attr_oscale_ok(), VERBOSE_UNSUPPORTED_SCALES_CFG); + VDISPATCH_SOFTMAX(attr_scales_ok(), VERBOSE_UNSUPPORTED_SCALES_CFG); VDISPATCH_SOFTMAX(sycl_post_ops_t::post_ops_ok(attr(), true, false), VERBOSE_UNSUPPORTED_POSTOP); VDISPATCH_SOFTMAX_SC( @@ -67,15 +67,6 @@ struct ref_sycl_softmax_fwd_t : public gpu::generic::sycl::primitive_t { sycl_softmax_conf_t conf_; status_t init_conf(); - bool attr_oscale_ok() const { - const auto &scales = attr()->scales_; - bool ok = true; - for (const auto &e : scales.scales_) { - ok = ok && e.second.mask_ == 0; - } - return ok; - } - bool check_data_types(data_type_t src) { return utils::one_of(src, data_type::f32, data_type::bf16, data_type::f16, data_type::s8, data_type::u8); diff --git a/src/gpu/generic/sycl/ref_sum.hpp b/src/gpu/generic/sycl/ref_sum.hpp index 0bec1af1845..d289e461e6d 100644 --- a/src/gpu/generic/sycl/ref_sum.hpp +++ b/src/gpu/generic/sycl/ref_sum.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2022-2024 Intel Corporation +* Copyright 2022-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -60,15 +60,13 @@ struct ref_sum_t : public gpu::generic::sycl::primitive_t { // Block formats are not yet supported // Dimensions can not be > 6 - VDISPATCH_SUM( - !(!src_d.is_plain() - || src_d.ndims() > xpu::sycl::md_t::max_dims), + VDISPATCH_SUM(src_d.is_plain() + && src_d.ndims() <= xpu::sycl::md_t::max_dims, VERBOSE_UNSUPPORTED_TENSOR_LAYOUT, "src"); - VDISPATCH_SUM(!(!attr()->scales_.has_default_values() - && !is_supported_type( - scales.get(DNNL_ARG_SRC + i) - .data_type_)), + VDISPATCH_SUM(attr()->scales_.has_default_values() + || is_supported_type( + scales.get_data_type(DNNL_ARG_SRC + i)), VERBOSE_UNSUPPORTED_ATTR); } diff --git a/src/gpu/generic/sycl/reorder_kernels.hpp b/src/gpu/generic/sycl/reorder_kernels.hpp index e9e317fcd99..2a3ff1d6281 100644 --- a/src/gpu/generic/sycl/reorder_kernels.hpp +++ b/src/gpu/generic/sycl/reorder_kernels.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2024 Intel Corporation +* Copyright 2024-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -73,14 +73,14 @@ struct reorder_kernel_t { = (i < src_md().ndims()) ? src_md().strides()[i] : INT_MAX; } dims_t dims_scales_src; - if (conf_.scale_src_mask != 0) { + if (conf_.scale_src_mask > 0) { for (int i = 0; i < max_supported_ndims; i++) { dims_scales_src[i] = conf_.scale_src_mask >> i & 1 ? dims[i] : 1; } } dims_t dims_scales_dst; - if (conf_.scale_dst_mask != 0) { + if (conf_.scale_dst_mask > 0) { for (int i = 0; i < max_supported_ndims; i++) { dims_scales_dst[i] = conf_.scale_dst_mask >> i & 1 ? dims[i] : 1; @@ -97,7 +97,7 @@ struct reorder_kernel_t { auto src = src_mem.load(idx); if (conf_.do_scale_src) { - if (conf_.scale_src_mask != 0) { + if (conf_.scale_src_mask > 0) { int scale_idx = 0; for (int i = 0; i < max_supported_ndims; i++) { if (i < src_md().ndims()) { @@ -116,7 +116,7 @@ struct reorder_kernel_t { auto acc = src; acc = conf_.post_ops.apply(acc, dst_, dst_idx); if (conf_.do_scale_dst) { - if (conf_.scale_dst_mask != 0) { + if (conf_.scale_dst_mask > 0) { int scale_idx = 0; for (int i = 0; i < max_supported_ndims; i++) { if (i < src_md().ndims()) { diff --git a/src/gpu/gpu_utils.hpp b/src/gpu/gpu_utils.hpp index fe56ccaba41..66a73f4f673 100644 --- a/src/gpu/gpu_utils.hpp +++ b/src/gpu/gpu_utils.hpp @@ -32,7 +32,7 @@ namespace gpu { inline dim_t get_attr_oscales_count(int mask, const memory_desc_wrapper &md) { dim_t count = 1; - if (mask == 0) return count; + if (mask <= 0) return count; for (int d = 0; d < md.ndims(); d++) { const int dim_mask = 1 << d; @@ -45,13 +45,14 @@ inline dim_t get_attr_oscales_count(int mask, const memory_desc_wrapper &md) { class scales_query_t { public: bool has_default_values() const { return scales_.has_default_values(); } - int get_mask() const { return scales_.mask_; } + int get_mask() const { return scales_.get_mask(); } size_t get_count() const { return count_; } - data_type_t get_data_type() const { return scales_.data_type_; } + data_type_t get_data_type() const { return scales_.get_data_type(); } dim_t get_group() const { - if (scales_.ndims_ < 2) return 1; - const auto g0 = scales_.group_dims_[0]; - const auto g1 = scales_.group_dims_[1]; + if (scales_.has_default_groups()) return 1; + + const auto g0 = scales_.get_group(0); + const auto g1 = scales_.get_group(1); assert(utils::one_of(1, g0, g1)); return g0 > 1 ? g0 : g1; } @@ -59,13 +60,16 @@ class scales_query_t { int get_group_dim() const { // If groups are not identified, they should be set to `1`, and // it shouldn't hurt to divide by 1 any dim. Just use 0th for that. - if (scales_.ndims_ < 2) return 0; - const auto g0 = scales_.group_dims_[0]; - const auto g1 = scales_.group_dims_[1]; + if (scales_.has_default_groups()) return 0; + + const auto g0 = scales_.get_group(0); + const auto g1 = scales_.get_group(1); assert(utils::one_of(1, g0, g1)); UNUSED(g1); const int g_dim = g0 > 1 ? 0 : 1; - return ndims_ - scales_.ndims_ + g_dim; + // Note: hardcoded value so far. + // TODO: replace with some API when ndims can be different from 2. + return ndims_ - /* scales_.get_groups_ndims() = */ 2 + g_dim; } memory_storage_t &get_scales(const exec_ctx_t &ctx) const { @@ -77,11 +81,11 @@ class scales_query_t { int arg) : arg_(arg), ndims_(mdw.ndims()) { scales_ = attr->scales_.get(arg); - count_ = get_attr_oscales_count(scales_.mask_, mdw); + count_ = get_attr_oscales_count(scales_.get_mask(), mdw); } private: - runtime_scales_t scales_; + quant_entry_t scales_; dim_t count_ = 0; int arg_ = 0; int ndims_ = 0; diff --git a/src/gpu/intel/jit/conv/config.cpp b/src/gpu/intel/jit/conv/config.cpp index 711cc8845f7..64f3113ea36 100644 --- a/src/gpu/intel/jit/conv/config.cpp +++ b/src/gpu/intel/jit/conv/config.cpp @@ -974,7 +974,9 @@ bool post_ops_ok(const conv_problem_t &prb, const hw_t &hw) { scales[i] = scale_args[i].second; if (!attr->scales_.has_default_values(scales)) return false; for (int arg : scales) { - int mask = attr->scales_.get(arg).mask_; + if (attr->scales_.has_default_values(arg)) continue; + + int mask = attr->scales_.get(arg).get_mask(); // XXX: per_oc for BWD_D is treated as per_ic assuming it's called from // deconvolution. if (arg == DNNL_ARG_WEIGHTS) { diff --git a/src/gpu/intel/jit/gemm/gen_gemm.hpp b/src/gpu/intel/jit/gemm/gen_gemm.hpp index ae7b6df8a25..20c2155a38b 100644 --- a/src/gpu/intel/jit/gemm/gen_gemm.hpp +++ b/src/gpu/intel/jit/gemm/gen_gemm.hpp @@ -307,11 +307,15 @@ struct gen_gemm_t : public gpu_gemm_t { auto &wei_scales = attr()->scales_.get(DNNL_ARG_WEIGHTS); auto &src_scales = attr()->scales_.get(DNNL_ARG_SRC); - if (quant_enabled_ && wei_scales.ndims_ > 1) wei_scales_2d_ = true; - if (quant_enabled_ && src_scales.ndims_ > 1) src_scales_2d_ = true; + if (quant_enabled_ && !wei_scales.has_default_groups()) + wei_scales_2d_ = true; + if (quant_enabled_ && !src_scales.has_default_groups()) + src_scales_2d_ = true; for (auto s : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) { - auto mask = attr()->scales_.get(s).mask_; + if (attr()->scales_.has_default_values(s)) continue; + + auto mask = attr()->scales_.get_mask(s); VDISPATCH_GEMM(utils::one_of(mask, 0, mask_scalar, mask_per_oc, mask_per_ic) || (s == DNNL_ARG_WEIGHTS && wei_scales_2d_ @@ -322,12 +326,11 @@ struct gen_gemm_t : public gpu_gemm_t { } if (wei_scales_2d_) { - auto scales_group_k - = wei_scales.ndims_ > 0 ? wei_scales.group_dims_[0] : 1; + auto scales_group_k = wei_scales.get_group(0); if (scales_group_k >= d->k()) { wei_scales_2d_ = false; } else { - wei_scales_type = wei_scales.data_type_; + wei_scales_type = wei_scales.get_data_type(); if (!wei_zp_2d) wei_q2d_group_k = scales_group_k; else { @@ -336,14 +339,13 @@ struct gen_gemm_t : public gpu_gemm_t { } } // Non-trivial N group unsupported. - VDISPATCH_GEMM(wei_scales.group_dims_[1] == 1, + VDISPATCH_GEMM(wei_scales.get_group(1) == 1, VERBOSE_UNSUPPORTED_SCALES_CFG); } if (src_scales_2d_) { - src_scales_type = src_scales.data_type_; - src_po_sc_ = src_scales.mask_ == 2; - auto scales_group_k - = src_scales.ndims_ > 0 ? src_scales.group_dims_[1] : 1; + src_scales_type = src_scales.get_data_type(); + src_po_sc_ = src_scales.get_mask() == 2; + auto scales_group_k = src_scales.get_group(1); if (scales_group_k >= d->k()) src_scales_2d_ = false; else { diff --git a/src/gpu/intel/jit/gemm/jit_gemm_pd.cpp b/src/gpu/intel/jit/gemm/jit_gemm_pd.cpp index 20ca4105a33..a31bb52d744 100644 --- a/src/gpu/intel/jit/gemm/jit_gemm_pd.cpp +++ b/src/gpu/intel/jit/gemm/jit_gemm_pd.cpp @@ -92,71 +92,67 @@ status_t jit_gemm_pd_t::init_post_ops() { } if (!wei_scales->has_default_values()) { - const auto &mask = wei_scales->mask_; + const auto &mask = wei_scales->get_mask(); bool convert = (mask == 0 || math::is_pow2(mask)); - if (wei_scales->ndims_ > 1) - convert |= (wei_scales->group_dims_[0] >= d->k()); + if (!wei_scales->has_default_groups()) + convert |= (wei_scales->get_group(0) >= d->k()); if (convert) { - ok = ok && (mask == 0 || mask == (1 << (d->c_desc.ndims - 1))); - dim_t dims = {(mask > 0) ? d->m() : 1}; CHECK(memory_desc_init_by_tag(wei_scales_md, 1, &dims, - wei_scales->data_type_, format_tag::a)); + wei_scales->get_data_type(), format_tag::a)); - auto status = post_ops_.prepend_binary(binary_mul, &wei_scales_md); - if (status != status::success) return status; + CHECK(post_ops_.prepend_binary(binary_mul, &wei_scales_md)); binary_srcs_.insert(binary_srcs_.begin(), binary_src_t {binary_src_t::scales, DNNL_ARG_WEIGHTS}); } } if (!src_scales->has_default_values()) { - const auto &mask = src_scales->mask_; + const auto &mask = src_scales->get_mask(); bool convert = (mask == 0); - if (src_scales->ndims_ > 1) { - convert |= (src_scales->group_dims_[1] >= d->k()); - convert |= (src_scales->group_dims_[0] >= d->n()); + if (!src_scales->has_default_groups()) { + convert |= (src_scales->get_group(1) >= d->k()); + convert |= (src_scales->get_group(0) >= d->n()); } if (convert) { if (mask == 0) { dim_t dims = 1; CHECK(memory_desc_init_by_tag(src_scales_md, 1, &dims, - src_scales->data_type_, format_tag::a)); - } else if (src_scales->ndims_ > 1) { - int n_group = src_scales->group_dims_[0]; - int k_group = src_scales->group_dims_[1]; + src_scales->get_data_type(), format_tag::a)); + } else if (!src_scales->has_default_groups()) { + // TODO: is it inverted? + int n_group = src_scales->get_group(0); + int k_group = src_scales->get_group(1); dim_t dims[] = {(mask & (d->batch() > 1 ? 2 : 1)) ? d->n() / n_group : 1, d->k() / k_group}; CHECK(memory_desc_init_by_tag(src_scales_md, 2, dims, - src_scales->data_type_, format_tag::ab)); + src_scales->get_data_type(), format_tag::ab)); } else { dim_t dims[] = {d->n(), 1}; CHECK(memory_desc_init_by_tag(src_scales_md, 2, dims, - src_scales->data_type_, format_tag::ab)); + src_scales->get_data_type(), format_tag::ab)); } - auto status = post_ops_.prepend_binary(binary_mul, &src_scales_md); - if (status != status::success) return status; + CHECK(post_ops_.prepend_binary(binary_mul, &src_scales_md)); binary_srcs_.insert(binary_srcs_.begin(), binary_src_t {binary_src_t::scales, DNNL_ARG_SRC}); } } if (!c_scales->has_default_values()) { - const auto &mask = c_scales->mask_; + const auto &mask = c_scales->get_mask(); bool convert = (mask == 0 || math::is_pow2(mask)); - if (c_scales->ndims_ > 1) - convert |= (c_scales->group_dims_[0] >= d->m()); + if (!c_scales->has_default_groups()) + convert |= (c_scales->get_group(0) >= d->m()); if (convert) { ok = ok && (mask == 0 || mask == (1 << (d->c_desc.ndims - 1))); dim_t dims = {(mask > 0) ? d->m() : 1}; CHECK(memory_desc_init_by_tag(c_scales_md, 1, &dims, - c_scales->data_type_, format_tag::a)); + c_scales->get_data_type(), format_tag::a)); - auto status = post_ops_.append_binary(binary_div, &c_scales_md); - if (status != status::success) return status; + CHECK(post_ops_.append_binary(binary_div, &c_scales_md)); binary_srcs_.push_back( binary_src_t {binary_src_t::scales, DNNL_ARG_DST}); diff --git a/src/gpu/intel/jit/ir/post_ops.cpp b/src/gpu/intel/jit/ir/post_ops.cpp index 4ca7837763a..c8b2e1544af 100644 --- a/src/gpu/intel/jit/ir/post_ops.cpp +++ b/src/gpu/intel/jit/ir/post_ops.cpp @@ -50,12 +50,10 @@ post_op_context_t::post_op_context_t(const primitive_attr_t &attr, if (buf.is_empty()) continue; int key = kernel_info.key(scale_args[i].first) & ~DNNL_ARG_ATTR_SCALES; - auto scales = attr.scales_.get(key); - if (scales.has_default_values()) continue; - int mask = scales.mask_; - auto sc_type = scales.data_type_ == data_type::undef - ? type_t::f32() - : scales.data_type_; + if (attr.scales_.has_default_values(key)) continue; + + int mask = attr.scales_.get_mask(key); + auto sc_type = attr.scales_.get_data_type(key); view_t view; switch (key) { case DNNL_ARG_SRC: @@ -67,8 +65,8 @@ post_op_context_t::post_op_context_t(const primitive_attr_t &attr, break; case DNNL_ARG_WEIGHTS: // Convert o/i weights mask to src/dst. - // XXX: per_oc for BWD_D is treated as per_ic assuming it's - // called from deconvolution. + // XXX: per_oc for BWD_D is treated as per_ic assuming + // it's called from deconvolution. ir_assert(utils::one_of(mask, 0, 1, 3)); wei_scales_type = sc_type; view = po_vm_.create_view(sc_type, (mask) ? 1 << 1 : 0); @@ -76,7 +74,7 @@ post_op_context_t::post_op_context_t(const primitive_attr_t &attr, wei_scales_mask = mask; break; case DNNL_ARG_DST: // Invert dst scales right after load. - ir_assert(utils::one_of(mask, 0, 2)); + ir_assert(mask == 0); dst_scales_type = sc_type; view = po_vm_.create_view(sc_type, mask); dst_scales = add_input_tensor(view, buf); @@ -297,7 +295,8 @@ bool post_op_context_t::init_need_to_restore_zero_padding( && out_md.dims[1] != out_md.padded_dims[1]) return true; auto dst_scales = attr.scales_.get(DNNL_ARG_DST); - if (!dst_scales.has_default_values() && dst_scales.mask_ != 0) return true; + if (!dst_scales.has_default_values() && dst_scales.get_mask() != 0) + return true; return false; } diff --git a/src/gpu/intel/jit/ir/tensor_config.cpp b/src/gpu/intel/jit/ir/tensor_config.cpp index a7c5e4f7c8d..c21a198650d 100644 --- a/src/gpu/intel/jit/ir/tensor_config.cpp +++ b/src/gpu/intel/jit/ir/tensor_config.cpp @@ -59,9 +59,8 @@ void init_extra_tensors(const zero_points_config_t &zp_cfg, auto scale_args = get_scale_args(); for (int i = 0; i < (int)scale_args.size(); i++) { int arg = scale_args[i].second; - auto &s = attr.scales_.get(arg); - if (s.has_default_values()) continue; - std::vector dims = {(s.mask_ == 0) ? 1 : oc}; + if (attr.scales_.has_default_values(arg)) continue; + std::vector dims = {(attr.scales_.get_mask(arg) == 0) ? 1 : oc}; layout_t layout(type_t::f32(), 0, dims); int arg_key = DNNL_ARG_ATTR_SCALES | arg; tensor_cfg.add_tensor(scale_args[i].first, arg_key, /*is_input=*/true, diff --git a/src/gpu/intel/jit/reorder/gen_reorder.cpp b/src/gpu/intel/jit/reorder/gen_reorder.cpp index 974b35210a8..fe34e153b02 100644 --- a/src/gpu/intel/jit/reorder/gen_reorder.cpp +++ b/src/gpu/intel/jit/reorder/gen_reorder.cpp @@ -58,8 +58,13 @@ status_t gen_reorder_t::pd_t::init(impl::engine_t *engine, return true; }; auto scales_ok = [&]() { - return (attr()->scales_.get(DNNL_ARG_SRC).mask_ == 0) - && (attr()->scales_.get(DNNL_ARG_DST).mask_ == 0); + const bool src_scale_ok + = attr()->scales_.has_default_values(DNNL_ARG_SRC) + || attr()->scales_.get_mask(DNNL_ARG_SRC) == 0; + const bool dst_scale_ok + = attr()->scales_.has_default_values(DNNL_ARG_DST) + || attr()->scales_.get_mask(DNNL_ARG_DST) == 0; + return src_scale_ok && dst_scale_ok; }; auto is_bf16_or_f32_or_f8 = [](data_type_t dt) { return utils::one_of(dt, bf16, f32, f8_e5m2, f8_e4m3); diff --git a/src/gpu/intel/ocl/convolution_inner_product.cpp b/src/gpu/intel/ocl/convolution_inner_product.cpp index b86c67e17c2..a103c85320a 100644 --- a/src/gpu/intel/ocl/convolution_inner_product.cpp +++ b/src/gpu/intel/ocl/convolution_inner_product.cpp @@ -198,7 +198,7 @@ status_t convolution_inner_product_fwd_t::execute_forward( const auto &args = ctx.args(); for (const int arg : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) { - if (pd()->attr()->scales_.get(arg).has_default_values()) continue; + if (pd()->attr()->scales_.has_default_values(arg)) continue; c_args[DNNL_ARG_ATTR_SCALES | arg] = args.at(DNNL_ARG_ATTR_SCALES | arg); diff --git a/src/gpu/intel/ocl/gemm/gemm_with_post_ops.cpp b/src/gpu/intel/ocl/gemm/gemm_with_post_ops.cpp index f3542d69934..3b9361eee05 100644 --- a/src/gpu/intel/ocl/gemm/gemm_with_post_ops.cpp +++ b/src/gpu/intel/ocl/gemm/gemm_with_post_ops.cpp @@ -46,7 +46,9 @@ status_t gemm_with_post_ops_t::pd_t::init(impl::engine_t *engine) { const primitive_attr_t *attributes_with_po = attr(); for (int arg : {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) { - const auto &mask = attr()->scales_.get(arg).mask_; + if (attr()->scales_.has_default_values(arg)) continue; + + const auto &mask = attr()->scales_.get_mask(arg); if (arg == DNNL_ARG_WEIGHTS && !wei_decomp) VDISPATCH_GEMM((mask == 0 || mask == (1 << (dst_md()->ndims - 1))), VERBOSE_UNSUPPORTED_SCALES_CFG); @@ -88,7 +90,7 @@ status_t gemm_with_post_ops_t::pd_t::init(impl::engine_t *engine) { // Setup empty attributes but keep zero points for gemm. primitive_attr_t attributes_without_po = *attr(); attributes_without_po.set_post_ops(post_ops_t()); - attributes_without_po.scales_ = arg_scales_t(); + attributes_without_po.scales_ = scales_t(); attributes_without_po.zero_points_ = zero_points_t(); int src_mask, wei_mask; auto zp = attributes_with_po->zero_points_; @@ -158,12 +160,10 @@ status_t gemm_with_post_ops_t::pd_t::init_kernel_ctx( kernel_ctx.set_data_type(c_type); const auto &attr_scales = attr()->scales_; - const bool with_src_scales - = !attr_scales.get(DNNL_ARG_SRC).has_default_values(); + const bool with_src_scales = !attr_scales.has_default_values(DNNL_ARG_SRC); const bool with_wei_scales - = !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_values(); - const bool with_dst_scales - = !attr_scales.get(DNNL_ARG_DST).has_default_values(); + = !attr_scales.has_default_values(DNNL_ARG_WEIGHTS); + const bool with_dst_scales = !attr_scales.has_default_values(DNNL_ARG_DST); auto is_int_type = [](data_type_t t) { return utils::one_of(t, data_type::s8, data_type::u8, data_type::s32); }; @@ -250,7 +250,7 @@ status_t gemm_with_post_ops_t::execute(const gemm_exec_ctx_t &ctx) const { arg_list.set(idx++, GEMM_CTX_ARG_STORAGE(a_scales)); arg_list.set(idx++, GEMM_CTX_ARG_STORAGE(c_scales)); arg_list.set(idx++, - pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).mask_ != 0 ? 1 : 0); + pd()->attr()->scales_.get_mask(DNNL_ARG_WEIGHTS) > 0 ? 1 : 0); arg_list.set(idx, GEMM_CTX_ARG_STORAGE(c_zero_point)); auto nd_range = pd()->dispatch_.nd_range(); exec_status = parallel_for(ctx, nd_range, post_process_kernel_, arg_list); diff --git a/src/gpu/intel/ocl/gemm/ref_gemm.hpp b/src/gpu/intel/ocl/gemm/ref_gemm.hpp index b3452437ea7..efb6f412fd4 100644 --- a/src/gpu/intel/ocl/gemm/ref_gemm.hpp +++ b/src/gpu/intel/ocl/gemm/ref_gemm.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -136,9 +136,14 @@ struct ref_gemm_t : public gpu_gemm_t { bool attr_oscale_ok() const { const auto &scales = attr()->scales_; - return scales.get(DNNL_ARG_SRC).mask_ == 0 - && scales.get(DNNL_ARG_WEIGHTS).mask_ == 0 - && scales.get(DNNL_ARG_DST).mask_ == 0; + const bool src_scale_ok = scales.has_default_values(DNNL_ARG_SRC) + || scales.get_mask(DNNL_ARG_SRC) == 0; + const bool wei_scale_ok + = scales.has_default_values(DNNL_ARG_WEIGHTS) + || scales.get_mask(DNNL_ARG_WEIGHTS) == 0; + const bool dst_scale_ok = scales.has_default_values(DNNL_ARG_DST) + || scales.get_mask(DNNL_ARG_DST) == 0; + return src_scale_ok && wei_scale_ok && dst_scale_ok; } bool attr_zp_ok() const { diff --git a/src/gpu/intel/ocl/gemm_inner_product.hpp b/src/gpu/intel/ocl/gemm_inner_product.hpp index 6e8d36b80b0..d7e8810595a 100644 --- a/src/gpu/intel/ocl/gemm_inner_product.hpp +++ b/src/gpu/intel/ocl/gemm_inner_product.hpp @@ -95,14 +95,19 @@ struct gemm_inner_product_fwd_t : public gpu_primitive_t { "memory_desc_reshape()"); } primitive_attr_t gemm_attr = *attr(); - auto wei_mask = gemm_attr.scales_.get(DNNL_ARG_WEIGHTS).mask_; - if (wei_mask == 1) //transpose mask for gemm - VDISPATCH_INNER_PRODUCT_SC( - gemm_attr.scales_.set( - DNNL_ARG_WEIGHTS, 1 << (b_md.ndims - 1)), - VERBOSE_UNSUPPORTED_ATTR); - else if (wei_mask != 0) - return status::unimplemented; + if (!gemm_attr.scales_.has_default_values(DNNL_ARG_WEIGHTS)) { + auto wei_mask = gemm_attr.scales_.get_mask(DNNL_ARG_WEIGHTS); + if (wei_mask == 1) { + // Transpose the mask for gemm. + VDISPATCH_INNER_PRODUCT_SC( + gemm_attr.scales_.set( + DNNL_ARG_WEIGHTS, 1 << (b_md.ndims - 1)), + VERBOSE_UNSUPPORTED_SCALES_CFG); + } else { + VDISPATCH_INNER_PRODUCT( + wei_mask == 0, VERBOSE_UNSUPPORTED_ATTR); + } + } VDISPATCH_INNER_PRODUCT_SC( create_gemm_pd(gemm_pd_, engine, &a_md, &b_md, &c_md, &bias_md, desc()->accum_data_type, &gemm_attr, diff --git a/src/gpu/intel/ocl/gemm_matmul.hpp b/src/gpu/intel/ocl/gemm_matmul.hpp index 5b1e19cbaf3..24e717e56b0 100644 --- a/src/gpu/intel/ocl/gemm_matmul.hpp +++ b/src/gpu/intel/ocl/gemm_matmul.hpp @@ -70,17 +70,25 @@ struct gemm_matmul_t : public gpu_primitive_t { return status::success; }; - auto adjust_scales_mask = [&](arg_scales_t &scales, int arg, - int diff_dims) { - int mask = 0, nd = 0; - bool is_set = false; - data_type_t dt = dnnl_data_type_undef; - dims_t dims = {}; - CHECK(attr()->scales_.get(arg, &mask, &is_set, &nd, dims, &dt)); - mask = mask >> diff_dims; - if (is_set) { CHECK(scales.set(arg, mask, nd, dims, dt)); } - return status::success; - }; + // The function shrinks the mask for scales and updates it in + // `scales` object. + auto adjust_scales_mask + = [&](scales_t &scales, int arg, int diff_dims) { + if (attr()->scales_.has_default_values(arg)) + return status::success; + + int mask = attr()->scales_.get_mask(arg) >> diff_dims; + data_type_t dt = attr()->scales_.get_data_type(arg); + int nd = 0; + dims_t dims {}; + if (!attr()->scales_.get(arg).has_default_groups()) { + nd = 2; // Note: hardcoded so far. + dims[0] = attr()->scales_.get_group(arg, 0); + dims[1] = attr()->scales_.get_group(arg, 1); + } + CHECK(scales.set(arg, mask, dt, nd, dims)); + return status::success; + }; if (!attr()->zero_points_.has_default_values()) { CHECK(map_gemm_zp(DNNL_ARG_SRC, DNNL_ARG_B)); CHECK(map_gemm_zp( diff --git a/src/gpu/intel/ocl/gen9_binary.hpp b/src/gpu/intel/ocl/gen9_binary.hpp index 2ef8df1c52e..40b256f274f 100644 --- a/src/gpu/intel/ocl/gen9_binary.hpp +++ b/src/gpu/intel/ocl/gen9_binary.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,13 +63,17 @@ struct gen9_binary_t : public gpu_primitive_t { VDISPATCH_BINARY(!is_ternary_op(), VERBOSE_BAD_ALGORITHM); VDISPATCH_BINARY( IMPLICATION(!attr()->scales_.has_default_values(), - utils::one_of(dst_md()->data_type, s8, u8) - && utils::everyone_is( - attr()->scales_.get(DNNL_ARG_SRC_0) - .mask_, - attr()->scales_.get(DNNL_ARG_SRC_1) - .mask_, - 0)), + utils::one_of(dst_md()->data_type, s8, u8)), + VERBOSE_UNSUPPORTED_SCALES_CFG); + VDISPATCH_BINARY( + IMPLICATION(!attr()->scales_.get(DNNL_ARG_SRC_0) + .has_default_values(), + attr()->scales_.get_mask(DNNL_ARG_SRC_0) == 0), + VERBOSE_UNSUPPORTED_SCALES_CFG); + VDISPATCH_BINARY( + IMPLICATION(!attr()->scales_.get(DNNL_ARG_SRC_1) + .has_default_values(), + attr()->scales_.get_mask(DNNL_ARG_SRC_1) == 0), VERBOSE_UNSUPPORTED_SCALES_CFG); VDISPATCH_BINARY(attr()->has_default_values(attr_skip_mask), VERBOSE_UNSUPPORTED_ATTR); diff --git a/src/gpu/intel/ocl/gen9_softmax.hpp b/src/gpu/intel/ocl/gen9_softmax.hpp index f7a1880e1bd..0d2a6dd0913 100644 --- a/src/gpu/intel/ocl/gen9_softmax.hpp +++ b/src/gpu/intel/ocl/gen9_softmax.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -215,9 +215,9 @@ struct gen9_softmax_fwd_t : public gpu_primitive_t { kernel_ctx.add_option("-cl-std=CL2.0"); kernel_ctx.define_int("LOGSOFTMAX", pd()->is_logsoftmax()); kernel_ctx.define_int("WITH_SRC_SCALES", - !pd()->attr()->scales_.get(DNNL_ARG_SRC).has_default_values()); + !pd()->attr()->scales_.has_default_values(DNNL_ARG_SRC)); kernel_ctx.define_int("WITH_DST_SCALES", - !pd()->attr()->scales_.get(DNNL_ARG_DST).has_default_values()); + !pd()->attr()->scales_.has_default_values(DNNL_ARG_DST)); const memory_desc_wrapper dst_mdw(pd()->dst_md()); const memory_desc_wrapper src_mdw(pd()->src_md()); diff --git a/src/gpu/intel/ocl/generic_reorder.cpp b/src/gpu/intel/ocl/generic_reorder.cpp index 3094556cef4..cdd592889dc 100644 --- a/src/gpu/intel/ocl/generic_reorder.cpp +++ b/src/gpu/intel/ocl/generic_reorder.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2021-2024 Intel Corporation +* Copyright 2021-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -791,8 +791,8 @@ status_t generic_reorder_t::pd_t::init_conf(impl::engine_t *engine) { memcpy(&new_a, src_md(), sizeof(new_a)); memcpy(&new_b, dst_md(), sizeof(new_b)); compress(new_a, new_b, src_mask, dst_mask); - if (src_mask) CHECK(attr_copy.scales_.set(DNNL_ARG_SRC, src_mask)); - if (dst_mask) CHECK(attr_copy.scales_.set(DNNL_ARG_DST, dst_mask)); + if (src_mask >= 0) { CHECK(attr_copy.scales_.set(DNNL_ARG_SRC, src_mask)); } + if (dst_mask >= 0) { CHECK(attr_copy.scales_.set(DNNL_ARG_DST, dst_mask)); } if (!is_generic_faster_than_ref(new_a, new_b)) return status::unimplemented; diff --git a/src/gpu/intel/ocl/micro_sdpa.cpp b/src/gpu/intel/ocl/micro_sdpa.cpp index 41f007f3acb..75851ffbf06 100644 --- a/src/gpu/intel/ocl/micro_sdpa.cpp +++ b/src/gpu/intel/ocl/micro_sdpa.cpp @@ -203,10 +203,11 @@ sdpa_config_t *choose_config_xehpc( /// | 3 (1100) | true | /// | 1 (1000) | true | /// | 8 (0001) | false | -bool with_quantize_common(const runtime_scales_t &scales) { - return !scales.has_default_values() - && (((scales.mask_ & 3) != 0 && (scales.mask_ & 12) == 0) - || scales.mask_ == 0); +bool with_quantize_common(const quant_entry_t &scale_entry) { + return !scale_entry.has_default_values() + && (((scale_entry.get_mask() & 3) != 0 + && (scale_entry.get_mask() & 12) == 0) + || scale_entry.get_mask() == 0); } /// Returns true if a common zero points value is used for each slice of the diff --git a/src/gpu/intel/ocl/micro_sdpa.hpp b/src/gpu/intel/ocl/micro_sdpa.hpp index 5942b4ea2a9..0e2f748ed8a 100644 --- a/src/gpu/intel/ocl/micro_sdpa.hpp +++ b/src/gpu/intel/ocl/micro_sdpa.hpp @@ -92,7 +92,7 @@ struct micro_sdpa_t : public gpu_primitive_t { "tensors", qry_md()->dims[1], key_md()->dims[1], val_md()->dims[1]); - int kq_scales_mask = desc()->kq_scales.mask_; + int kq_scales_mask = desc()->kq_scales.get_mask(); int kq_zp_mask = desc()->kq_zero_points.get(DNNL_ARG_WEIGHTS); if (!desc()->kq_scales.has_default_values() && !desc()->kq_zero_points.has_default_values()) @@ -119,7 +119,7 @@ struct micro_sdpa_t : public gpu_primitive_t { key_group_size()); } - int vs_scales_mask = desc()->vs_scales.mask_; + int vs_scales_mask = desc()->vs_scales.get_mask(); int vs_zp_mask = desc()->vs_zero_points.get(DNNL_ARG_WEIGHTS); if (!desc()->vs_scales.has_default_values() && !desc()->vs_zero_points.has_default_values()) diff --git a/src/gpu/intel/ocl/multi_po_reorder_binary.hpp b/src/gpu/intel/ocl/multi_po_reorder_binary.hpp index e10b1f4e961..4b9c73a0444 100644 --- a/src/gpu/intel/ocl/multi_po_reorder_binary.hpp +++ b/src/gpu/intel/ocl/multi_po_reorder_binary.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023-2024 Intel Corporation +* Copyright 2023-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,8 +41,8 @@ struct multi_po_reorder_binary : public gpu_primitive_t { DECLARE_COMMON_PD_T("multi_po_reorder_binary", multi_po_reorder_binary); status_t init(impl::engine_t *engine) { - if (attr()->scales_.get(DNNL_ARG_SRC_0).is_set_ - || attr()->scales_.get(DNNL_ARG_SRC_1).is_set_ + if (!attr()->scales_.get(DNNL_ARG_SRC_0).has_default_values() + || !attr()->scales_.get(DNNL_ARG_SRC_1).has_default_values() || attr()->post_ops_.len() >= 1) { VDISPATCH_BINARY(false, VERBOSE_UNSUPPORTED_ATTR); } diff --git a/src/gpu/intel/ocl/ref_group_normalization.cpp b/src/gpu/intel/ocl/ref_group_normalization.cpp index a5e5f14121d..4cd8dded952 100644 --- a/src/gpu/intel/ocl/ref_group_normalization.cpp +++ b/src/gpu/intel/ocl/ref_group_normalization.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2024 Intel Corporation +* Copyright 2024-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -108,9 +108,9 @@ status_t ref_group_normalization_fwd_t::pd_t::init_kernel_ctx( compute::kernel_ctx_t &kernel_ctx) const { kernel_ctx.define_int("WITH_SRC_SCALES", - !attr()->scales_.get(DNNL_ARG_SRC).has_default_values()); + !attr()->scales_.has_default_values(DNNL_ARG_SRC)); kernel_ctx.define_int("WITH_DST_SCALES", - !attr()->scales_.get(DNNL_ARG_DST).has_default_values()); + !attr()->scales_.has_default_values(DNNL_ARG_DST)); init_kernel_ctx_common(kernel_ctx, this); // promote macros defined by parameters to OpenCL command line diff --git a/src/gpu/intel/ocl/ref_layer_normalization.hpp b/src/gpu/intel/ocl/ref_layer_normalization.hpp index 45fb291964a..13d50f23d2d 100644 --- a/src/gpu/intel/ocl/ref_layer_normalization.hpp +++ b/src/gpu/intel/ocl/ref_layer_normalization.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -93,9 +93,9 @@ struct ref_layer_normalization_fwd_t : public gpu_primitive_t { CHECK(status); kernel_ctx.define_int("WITH_SRC_SCALES", - !pd()->attr()->scales_.get(DNNL_ARG_SRC).has_default_values()); + !pd()->attr()->scales_.has_default_values(DNNL_ARG_SRC)); kernel_ctx.define_int("WITH_DST_SCALES", - !pd()->attr()->scales_.get(DNNL_ARG_DST).has_default_values()); + !pd()->attr()->scales_.has_default_values(DNNL_ARG_DST)); CHECK(create_kernel(engine, &kernel_, "ref_lnorm_fwd", kernel_ctx)); if (!kernel_) return status::runtime_error; diff --git a/src/gpu/intel/ocl/ref_matmul.cpp b/src/gpu/intel/ocl/ref_matmul.cpp index 8fa152073d5..62ab19de3cf 100644 --- a/src/gpu/intel/ocl/ref_matmul.cpp +++ b/src/gpu/intel/ocl/ref_matmul.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -84,15 +84,13 @@ status_t ref_matmul_t::execute_ref(const exec_ctx_t &ctx) const { const dim_t K = a_d.dims()[last]; const auto &attr_scales = pd()->attr()->scales_; - const int wei_scale_mask = attr_scales.get(DNNL_ARG_WEIGHTS).mask_; + const int wei_scale_mask = attr_scales.get_mask(DNNL_ARG_WEIGHTS); const bool wei_scale_per_k = wei_scale_mask & pd()->wei_qmask_K(); - const auto wei_scale_group_ndim = attr_scales.get(DNNL_ARG_WEIGHTS).ndims_; - const auto wei_scale_group_k = wei_scale_group_ndim > 0 - ? attr_scales.get(DNNL_ARG_WEIGHTS).group_dims_[0] + const auto wei_scale_group_k + = !attr_scales.get(DNNL_ARG_WEIGHTS).has_default_groups() + ? attr_scales.get_group(DNNL_ARG_WEIGHTS, 0) : (wei_scale_per_k ? 1 : K); - const auto wei_scale_group_n = wei_scale_group_ndim > 0 - ? attr_scales.get(DNNL_ARG_WEIGHTS).group_dims_[1] - : 1; + const auto wei_scale_group_n = attr_scales.get_group(DNNL_ARG_WEIGHTS, 1); const auto wei_scale_ngroups_k = K / wei_scale_group_k; // Identify wei_scales dimensions as user may not pass them. dims_t wei_scale_dims {}; @@ -120,11 +118,11 @@ status_t ref_matmul_t::execute_ref(const exec_ctx_t &ctx) const { const dim_t wei_scale_stride_b1 = b_d.ndims() > 3 ? wei_scale_strides[b_d.ndims() - 4] : 0; - const int src_scale_mask = attr_scales.get(DNNL_ARG_SRC).mask_; + const int src_scale_mask = attr_scales.get_mask(DNNL_ARG_SRC); const bool src_scale_per_k = src_scale_mask & pd()->src_qmask_K(); - const auto src_scale_group_ndim = attr_scales.get(DNNL_ARG_SRC).ndims_; - const auto src_scale_group_k = src_scale_group_ndim > 0 - ? attr_scales.get(DNNL_ARG_SRC).group_dims_[1] + const auto src_scale_group_k + = !attr_scales.get(DNNL_ARG_SRC).has_default_groups() + ? attr_scales.get_group(DNNL_ARG_SRC, 1) : (src_scale_per_k ? 1 : K); const auto src_scale_ngroups_k = K / src_scale_group_k; // Identify src_scales dimensions as user may not pass them. diff --git a/src/gpu/intel/ocl/ref_matmul.hpp b/src/gpu/intel/ocl/ref_matmul.hpp index 428f0289c46..b20ac69a940 100644 --- a/src/gpu/intel/ocl/ref_matmul.hpp +++ b/src/gpu/intel/ocl/ref_matmul.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -230,19 +230,19 @@ struct ref_matmul_t : public gpu_primitive_t { def_data_type(kernel_ctx, pd()->bia_dt_, "BIA"); def_data_type(kernel_ctx, pd()->desc()->accum_data_type, "ACC"); def_data_type(kernel_ctx, - pd()->attr()->scales_.get(DNNL_ARG_WEIGHTS).data_type_, + pd()->attr()->scales_.get_data_type(DNNL_ARG_WEIGHTS), "WEI_SCALES"); def_data_type(kernel_ctx, pd()->attr()->zero_points_.get_data_type(DNNL_ARG_WEIGHTS), "WEI_ZP"); def_data_type(kernel_ctx, - pd()->attr()->scales_.get(DNNL_ARG_SRC).data_type_, + pd()->attr()->scales_.get_data_type(DNNL_ARG_SRC), "SRC_SCALES"); def_data_type(kernel_ctx, pd()->attr()->zero_points_.get_data_type(DNNL_ARG_SRC), "SRC_ZP"); def_data_type(kernel_ctx, - pd()->attr()->scales_.get(DNNL_ARG_DST).data_type_, + pd()->attr()->scales_.get_data_type(DNNL_ARG_DST), "DST_SCALES"); kernels_.resize(2); CHECK(create_kernel(engine, &kernels_[0], "ref_matmul", kernel_ctx)); diff --git a/src/gpu/intel/ocl/reusable_lnorm.cpp b/src/gpu/intel/ocl/reusable_lnorm.cpp index 77b2c65d9cf..35fe364e7e9 100644 --- a/src/gpu/intel/ocl/reusable_lnorm.cpp +++ b/src/gpu/intel/ocl/reusable_lnorm.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023-2024 Intel Corporation +* Copyright 2023-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,8 +49,8 @@ static status_t init_conf_common(const layer_normalization_pd_t *pd, conf->dst_dt = dst_buf.data_type; auto scales = pd->attr()->scales_; - conf->with_src_scale = !scales.get(DNNL_ARG_SRC).has_default_values(); - conf->with_dst_scale = !scales.get(DNNL_ARG_DST).has_default_values(); + conf->with_src_scale = !scales.has_default_values(DNNL_ARG_SRC); + conf->with_dst_scale = !scales.has_default_values(DNNL_ARG_DST); // We require that the lnorm axis is a single dense block, so that it can // be represented by a stride + size alone. diff --git a/src/gpu/intel/ocl/simple_binary.hpp b/src/gpu/intel/ocl/simple_binary.hpp index 013517a71f7..a166c558437 100644 --- a/src/gpu/intel/ocl/simple_binary.hpp +++ b/src/gpu/intel/ocl/simple_binary.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -97,7 +97,7 @@ struct simple_binary_t : public gpu_primitive_t { status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const; bool with_scales(int position) const { - return !attr()->scales_.get(position).has_default_values(); + return !attr()->scales_.has_default_values(position); } bool with_scales() const { diff --git a/src/gpu/intel/ocl/simple_layer_normalization.hpp b/src/gpu/intel/ocl/simple_layer_normalization.hpp index 2719bd4491d..f225b19f30a 100644 --- a/src/gpu/intel/ocl/simple_layer_normalization.hpp +++ b/src/gpu/intel/ocl/simple_layer_normalization.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2024 Intel Corporation +* Copyright 2024-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -92,9 +92,9 @@ struct simple_layer_normalization_fwd_t : public gpu_primitive_t { CHECK(status); kernel_ctx.define_int("WITH_SRC_SCALES", - !pd()->attr()->scales_.get(DNNL_ARG_SRC).has_default_values()); + !pd()->attr()->scales_.has_default_values(DNNL_ARG_SRC)); kernel_ctx.define_int("WITH_DST_SCALES", - !pd()->attr()->scales_.get(DNNL_ARG_DST).has_default_values()); + !pd()->attr()->scales_.has_default_values(DNNL_ARG_DST)); CHECK(create_kernel(engine, &kernel_, "simple_lnorm_fwd", kernel_ctx)); if (!kernel_) return status::runtime_error; diff --git a/src/gpu/intel/ocl/simple_softmax.hpp b/src/gpu/intel/ocl/simple_softmax.hpp index 49ed31d9087..e80fcabf5ce 100644 --- a/src/gpu/intel/ocl/simple_softmax.hpp +++ b/src/gpu/intel/ocl/simple_softmax.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -140,9 +140,9 @@ struct simple_softmax_fwd_t : public gpu_primitive_t { kernel_ctx.add_option("-cl-std=CL2.0"); kernel_ctx.define_int("LOGSOFTMAX", pd()->is_logsoftmax()); kernel_ctx.define_int("WITH_SRC_SCALES", - !pd()->attr()->scales_.get(DNNL_ARG_SRC).has_default_values()); + !pd()->attr()->scales_.has_default_values(DNNL_ARG_SRC)); kernel_ctx.define_int("WITH_DST_SCALES", - !pd()->attr()->scales_.get(DNNL_ARG_DST).has_default_values()); + !pd()->attr()->scales_.has_default_values(DNNL_ARG_DST)); const memory_desc_wrapper dst_mdw(pd()->dst_md()); const memory_desc_wrapper src_mdw(pd()->src_md()); diff --git a/src/gpu/intel/ocl/vectorized_lnorm.hpp b/src/gpu/intel/ocl/vectorized_lnorm.hpp index b7afb6c3bbf..20278a030b4 100644 --- a/src/gpu/intel/ocl/vectorized_lnorm.hpp +++ b/src/gpu/intel/ocl/vectorized_lnorm.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,9 +89,9 @@ struct vectorized_lnorm_fwd_t : public gpu_primitive_t { CHECK(status); kernel_ctx.define_int("WITH_SRC_SCALES", - !pd()->attr()->scales_.get(DNNL_ARG_SRC).has_default_values()); + !pd()->attr()->scales_.has_default_values(DNNL_ARG_SRC)); kernel_ctx.define_int("WITH_DST_SCALES", - !pd()->attr()->scales_.get(DNNL_ARG_DST).has_default_values()); + !pd()->attr()->scales_.has_default_values(DNNL_ARG_DST)); CHECK(create_kernel( engine, &kernel_, "vectorized_lnorm_fwd", kernel_ctx)); diff --git a/src/gpu/intel/primitive_conf.cpp b/src/gpu/intel/primitive_conf.cpp index 0523dc0fbbe..cd5540ae406 100644 --- a/src/gpu/intel/primitive_conf.cpp +++ b/src/gpu/intel/primitive_conf.cpp @@ -104,22 +104,23 @@ attr_info_t attr_info_t::create(const primitive_attr_t *attr) { const auto &src_scales = attr->scales_.get(DNNL_ARG_SRC); attr_info.with_src_scales = !src_scales.has_default_values(); attr_info.with_src0_scale = !src_scales.has_default_values(); - attr_info.src_scales_mask = src_scales.mask_; - attr_info.src_scales_data_type = src_scales.data_type_; + attr_info.src_scales_data_type = src_scales.get_data_type(); const auto &src1_scales = attr->scales_.get(DNNL_ARG_SRC_1); attr_info.with_src1_scale = !src1_scales.has_default_values(); - gpu_assert(src1_scales.mask_ == 0); + if (attr_info.with_src1_scale) { gpu_assert(src1_scales.get_mask() == 0); } const auto &wei_scales = attr->scales_.get(DNNL_ARG_WEIGHTS); attr_info.with_wei_scales = !wei_scales.has_default_values(); - attr_info.wei_scales_mask = wei_scales.mask_; - attr_info.wei_scales_data_type = wei_scales.data_type_; + // TODO: remove the default `0` value. + attr_info.wei_scales_mask + = attr_info.with_wei_scales ? wei_scales.get_mask() : 0; + attr_info.wei_scales_data_type = wei_scales.get_data_type(); const auto &dst_scales = attr->scales_.get(DNNL_ARG_DST); attr_info.with_dst_scales = !dst_scales.has_default_values(); - attr_info.dst_scales_mask = dst_scales.mask_; - attr_info.dst_scales_data_type = dst_scales.data_type_; + attr_info.dst_scales_mask = dst_scales.get_mask(); + attr_info.dst_scales_data_type = dst_scales.get_data_type(); // zero points const auto &zp = attr->zero_points_; @@ -827,7 +828,6 @@ status_t def_attr_info_impl(compute::kernel_ctx_t &kernel_ctx, kernel_ctx.define_int("WITH_SRC_SCALES", attr_info.with_src_scales); kernel_ctx.define_int("WITH_WEI_SCALES", attr_info.with_wei_scales); kernel_ctx.define_int("WITH_DST_SCALES", attr_info.with_dst_scales); - kernel_ctx.define_int("SRC_SCALES_MASK", attr_info.src_scales_mask); kernel_ctx.define_int("WEI_SCALES_MASK", attr_info.wei_scales_mask); kernel_ctx.define_int("DST_SCALES_MASK", attr_info.dst_scales_mask); def_data_type(kernel_ctx, attr_info.src_scales_data_type, "SRC_SCALES", diff --git a/src/gpu/intel/primitive_conf.hpp b/src/gpu/intel/primitive_conf.hpp index b4684dc274e..2fcf821a27b 100644 --- a/src/gpu/intel/primitive_conf.hpp +++ b/src/gpu/intel/primitive_conf.hpp @@ -83,9 +83,8 @@ struct attr_info_t { bool with_src_scales; bool with_wei_scales; bool with_dst_scales; - bool src_scales_mask; - bool wei_scales_mask; - bool dst_scales_mask; + int wei_scales_mask; + int dst_scales_mask; data_type_t src_scales_data_type; data_type_t wei_scales_data_type; data_type_t dst_scales_data_type; diff --git a/src/gpu/nvidia/cudnn_binary.hpp b/src/gpu/nvidia/cudnn_binary.hpp index 4bba9c0ba6b..8226b6832cc 100644 --- a/src/gpu/nvidia/cudnn_binary.hpp +++ b/src/gpu/nvidia/cudnn_binary.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -66,12 +66,7 @@ struct cudnn_binary_t : public gpu::primitive_t { || has_zero_dims(dst_md()->dims, dst_md()->ndims); } - bool check_scales_mask() const { - for (const auto &s : attr()->scales_.scales_) { - if (s.second.mask_ != 0) return false; - } - return true; - } + bool check_scales_mask() const { return attr_scales_ok(); } bool check_no_blocking() const { // Blocking is not supported by cudnnOpTensor, return false if any diff --git a/src/gpu/nvidia/cudnn_convolution.hpp b/src/gpu/nvidia/cudnn_convolution.hpp index 329081c4633..07154104e86 100644 --- a/src/gpu/nvidia/cudnn_convolution.hpp +++ b/src/gpu/nvidia/cudnn_convolution.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -155,15 +155,17 @@ struct cudnn_convolution_fwd_t : public gpu::primitive_t { && ndims() < 5; } - bool attr_scales_ok() const { - const auto &scales = attr()->scales_; - const auto &supported_args - = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}; - if (!scales.has_default_values(supported_args)) return false; - // cuDNN does not support scaling per dimension. - for (auto arg : supported_args) - if (scales.get(arg).mask_ != 0) return false; - return true; + bool attr_scales_ok(const std::vector &supported_args + = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) const { + bool ok = attr()->scales_.has_default_values(supported_args); + for (int arg : supported_args) { + if (attr()->scales_.has_default_values(arg)) continue; + + const auto &mask = attr()->scales_.get_mask(arg); + // cuDNN does not support scaling per dimension. + ok = ok && (mask == 0); + } + return ok; } }; diff --git a/src/gpu/nvidia/cudnn_convolution_impl.hpp b/src/gpu/nvidia/cudnn_convolution_impl.hpp index c6b04245c3a..e0fb47e796b 100644 --- a/src/gpu/nvidia/cudnn_convolution_impl.hpp +++ b/src/gpu/nvidia/cudnn_convolution_impl.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -143,13 +143,10 @@ struct cudnn_convolution_impl_base_t with_bias = pd->with_bias(); beta = 0.0f; do_scaling = !pd->attr()->scales_.has_default_values(); - do_dst_scaling - = !pd->attr()->scales_.get(DNNL_ARG_DST).has_default_values(); - do_src_scaling - = !pd->attr()->scales_.get(DNNL_ARG_SRC).has_default_values(); - do_wei_scaling = !pd->attr() - ->scales_.get(DNNL_ARG_WEIGHTS) - .has_default_values(); + do_dst_scaling = !pd->attr()->scales_.has_default_values(DNNL_ARG_DST); + do_src_scaling = !pd->attr()->scales_.has_default_values(DNNL_ARG_SRC); + do_wei_scaling + = !pd->attr()->scales_.has_default_values(DNNL_ARG_WEIGHTS); dnnl_descs[x] = *pd->invariant_src_md(); dnnl_descs[weights] = *pd->invariant_wei_md(); dnnl_descs[y] = *pd->invariant_dst_md(); diff --git a/src/gpu/nvidia/cudnn_inner_product.hpp b/src/gpu/nvidia/cudnn_inner_product.hpp index b4d5c7aa438..a6ec15d578d 100644 --- a/src/gpu/nvidia/cudnn_inner_product.hpp +++ b/src/gpu/nvidia/cudnn_inner_product.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -39,15 +39,17 @@ struct cudnn_inner_product_fwd_t : public gpu::primitive_t { struct pd_t : public inner_product_fwd_pd_t { using inner_product_fwd_pd_t::inner_product_fwd_pd_t; - bool attr_scales_ok() const { - const auto &scales = attr()->scales_; - const auto &supported_args - = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}; - if (!scales.has_default_values(supported_args)) return false; - // cuDNN does not support scaling per dimension. - for (auto arg : supported_args) - if (scales.get(arg).mask_ != 0) return false; - return true; + bool attr_scales_ok(const std::vector &supported_args + = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}) const { + bool ok = attr()->scales_.has_default_values(supported_args); + for (int arg : supported_args) { + if (attr()->scales_.has_default_values(arg)) continue; + + const auto &mask = attr()->scales_.get_mask(arg); + // cuDNN does not support scaling per dimension. + ok = ok && (mask == 0); + } + return ok; } std::shared_ptr inner_product_impl_; diff --git a/src/gpu/nvidia/cudnn_matmul.hpp b/src/gpu/nvidia/cudnn_matmul.hpp index f1674c860fe..d24470b5336 100644 --- a/src/gpu/nvidia/cudnn_matmul.hpp +++ b/src/gpu/nvidia/cudnn_matmul.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -101,10 +101,11 @@ struct cudnn_matmul_t : public gpu::primitive_t { if (!scales.has_default_values(supported_args)) return false; // cuDNN does not support scaling per dimension. for (auto arg : supported_args) { - auto &s = scales.get(arg); - if (scales.get(arg).mask_ != 0 - || !utils::one_of( - s.data_type_, s8, s32, f32, f16, bf16)) + if (scales.has_default_values(arg)) continue; + + if (scales.get_mask(arg) > 0) return false; + if (!utils::one_of( + scales.get_data_type(arg), s8, s32, f32, f16, bf16)) return false; } return true; diff --git a/src/gpu/nvidia/cudnn_matmul_impl.hpp b/src/gpu/nvidia/cudnn_matmul_impl.hpp index bdbb25358c2..c77a26d47e4 100644 --- a/src/gpu/nvidia/cudnn_matmul_impl.hpp +++ b/src/gpu/nvidia/cudnn_matmul_impl.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -50,7 +50,7 @@ struct cublas_params : cublas_base_params { return status::unimplemented; } - with_dst_scale_ = !attr->scales_.get(DNNL_ARG_DST).has_default_values(); + with_dst_scale_ = !attr->scales_.has_default_values(DNNL_ARG_DST); with_separate_bias_ = with_bias; if ((with_separate_bias_) && (bias_md->data_type != dst_md->data_type)) { diff --git a/src/gpu/nvidia/cudnn_matmul_lt.hpp b/src/gpu/nvidia/cudnn_matmul_lt.hpp index 0b6907d094c..d482a17e3c7 100644 --- a/src/gpu/nvidia/cudnn_matmul_lt.hpp +++ b/src/gpu/nvidia/cudnn_matmul_lt.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2024 Intel Corporation +* Copyright 2024-2025 Intel Corporation * Copyright 2024 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -191,7 +191,7 @@ struct cudnn_matmul_lt_t : public gpu::primitive_t { memory_desc_t s32_dst_md_; bool default_scale(int ARG) const { - return attr()->scales_.get(ARG).has_default_values(); + return attr()->scales_.has_default_values(ARG); } private: @@ -215,10 +215,9 @@ struct cudnn_matmul_lt_t : public gpu::primitive_t { } status_t create_scale_binary_pd(impl::engine_t *engine, int ARG) { - auto scale_md = dnnl_memory_desc(); - scale_md.ndims = attr()->scales_.get(ARG).ndims_; - scale_md.data_type = attr()->scales_.get(ARG).data_type_; - scale_md.format_kind = dnnl_blocked; + memory_desc_t scale_md; + scale_md.data_type = attr()->scales_.get_data_type(ARG); + scale_md.format_kind = format_kind::blocked; auto format_desc = create_scaling_format_desc(ARG, scale_md); scale_md.format_desc = {format_desc}; @@ -246,7 +245,7 @@ struct cudnn_matmul_lt_t : public gpu::primitive_t { } blocking_desc_t create_scaling_format_desc( - int ARG, dnnl_memory_desc &scale_md) { + int ARG, memory_desc_t &scale_md) { blocking_desc_t format_desc; memory_desc_t md; if (ARG == DNNL_ARG_SRC) { @@ -255,11 +254,13 @@ struct cudnn_matmul_lt_t : public gpu::primitive_t { md = *weights_md(0); } else if (ARG == DNNL_ARG_DST) { md = *dst_md(); + } else { + assert(!"unexpected arg"); } scale_md.ndims = md.ndims; for (int i = 0; i < md.ndims; i++) { - if (attr()->scales_.get(1).mask_ & (1 << i)) { + if (attr()->scales_.get_mask(ARG) & (1 << i)) { scale_md.dims[i] = md.dims[i]; } else { scale_md.dims[i] = 1; @@ -303,20 +304,17 @@ struct cudnn_matmul_lt_t : public gpu::primitive_t { bool single_scale(int ARG) const { const auto &scales = attr()->scales_; - return scales.get(ARG).mask_ == 0; + return scales.get_mask(ARG) == 0; } - bool scales_ok() { - data_type_t src_scale_dt - = attr()->scales_.get(DNNL_ARG_SRC).data_type_; - data_type_t wei_scale_dt - = attr()->scales_.get(DNNL_ARG_WEIGHTS).data_type_; - bool src_scales_ok = default_scale(DNNL_ARG_SRC) - || utils::one_of( - src_scale_dt, data_type::s8, data_type::s32); - bool wei_scales_ok = default_scale(DNNL_ARG_WEIGHTS) - || utils::one_of( - wei_scale_dt, data_type::s8, data_type::s32); + bool scales_ok() const { + bool src_scales_ok = IMPLICATION(!default_scale(DNNL_ARG_SRC), + utils::one_of(attr()->scales_.get_data_type(DNNL_ARG_SRC), + data_type::s8, data_type::s32)); + bool wei_scales_ok = IMPLICATION(!default_scale(DNNL_ARG_WEIGHTS), + utils::one_of( + attr()->scales_.get_data_type(DNNL_ARG_WEIGHTS), + data_type::s8, data_type::s32)); return src_scales_ok && wei_scales_ok; } diff --git a/src/gpu/nvidia/cudnn_matmul_lt_impl.hpp b/src/gpu/nvidia/cudnn_matmul_lt_impl.hpp index f529cd67000..e97879d0b07 100644 --- a/src/gpu/nvidia/cudnn_matmul_lt_impl.hpp +++ b/src/gpu/nvidia/cudnn_matmul_lt_impl.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2024 Intel Corporation +* Copyright 2024-2025 Intel Corporation * Copyright 2024 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -55,20 +55,23 @@ struct cublas_lt_params : cublas_base_params { || dst_d.has_runtime_dims_or_strides() || weights_d.has_runtime_dims_or_strides(); - if (attr->scales_.get(DNNL_ARG_SRC).has_default_values()) { - auto src_scale = attr->scales_.get(DNNL_ARG_SRC); - if (src_scale.mask_ != 0) { multi_src_scale_ = true; } + if (attr->scales_.has_default_values(DNNL_ARG_SRC)) { + if (attr->scales_.get_mask(DNNL_ARG_SRC) > 0) { + multi_src_scale_ = true; + } } - if (attr->scales_.get(DNNL_ARG_WEIGHTS).has_default_values()) { - auto wei_scale = attr->scales_.get(DNNL_ARG_WEIGHTS); - if (wei_scale.mask_ != 0) { multi_wei_scale_ = true; } + if (attr->scales_.has_default_values(DNNL_ARG_WEIGHTS)) { + if (attr->scales_.get_mask(DNNL_ARG_WEIGHTS) > 0) { + multi_wei_scale_ = true; + } } - with_dst_scale_ = !attr->scales_.get(DNNL_ARG_DST).has_default_values(); + with_dst_scale_ = !attr->scales_.has_default_values(DNNL_ARG_DST); if (with_dst_scale_) { - auto dst_scale = attr->scales_.get(DNNL_ARG_DST); - if (dst_scale.mask_ != 0) { multi_dst_scale_ = true; } + if (attr->scales_.get_mask(DNNL_ARG_DST) > 0) { + multi_dst_scale_ = true; + } } // Initialise flags and variables for the imma case (E.g. imma_case_ flag). diff --git a/src/gpu/nvidia/cudnn_reorder.hpp b/src/gpu/nvidia/cudnn_reorder.hpp index 30c7bc185dd..76c4b4a5120 100644 --- a/src/gpu/nvidia/cudnn_reorder.hpp +++ b/src/gpu/nvidia/cudnn_reorder.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -83,14 +83,17 @@ struct cudnn_reorder_t : public gpu::primitive_t { return ok; } - bool scales_ok() const { - const auto &scales = attr()->scales_; - const auto &supported_args = {DNNL_ARG_FROM, DNNL_ARG_TO}; - if (!scales.has_default_values(supported_args)) return false; - // cuDNN does not support scaling per dimension. - for (auto arg : supported_args) - if (scales.get(arg).mask_ != 0) return false; - return true; + bool scales_ok(const std::vector &supported_args + = {DNNL_ARG_FROM, DNNL_ARG_TO}) const { + bool ok = attr()->scales_.has_default_values(supported_args); + for (int arg : supported_args) { + if (attr()->scales_.has_default_values(arg)) continue; + + const auto &mask = attr()->scales_.get_mask(arg); + // cuDNN does not support scaling per dimension. + ok = ok && (mask == 0); + } + return ok; } bool post_ops_ok() const { diff --git a/src/gpu/nvidia/cudnn_reorder_lt.hpp b/src/gpu/nvidia/cudnn_reorder_lt.hpp index b8e1d2afc7f..f5343a361fd 100644 --- a/src/gpu/nvidia/cudnn_reorder_lt.hpp +++ b/src/gpu/nvidia/cudnn_reorder_lt.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -117,14 +117,17 @@ struct cudnn_reorder_lt_t : public gpu::primitive_t { return ok; } - bool scales_ok() const { - const auto &scales = attr()->scales_; - const auto &supported_args = {DNNL_ARG_FROM, DNNL_ARG_TO}; - if (!scales.has_default_values(supported_args)) return false; - // cuDNN does not support scaling per dimension. - for (auto arg : supported_args) - if (scales.get(arg).mask_ != 0) return false; - return true; + bool scales_ok(const std::vector &supported_args + = {DNNL_ARG_FROM, DNNL_ARG_TO}) const { + bool ok = attr()->scales_.has_default_values(supported_args); + for (int arg : supported_args) { + if (attr()->scales_.has_default_values(arg)) continue; + + const auto &mask = attr()->scales_.get_mask(arg); + // cuDNN does not support scaling per dimension. + ok = ok && (mask == 0); + } + return ok; } bool post_ops_ok() const { @@ -144,11 +147,6 @@ struct cudnn_reorder_lt_t : public gpu::primitive_t { && post_ops_ok(); if (!ok) return status::unimplemented; - primitive_attr_t r_attr; - int mask = 0; - bool is_set = false; - auto src = DNNL_ARG_DST; - auto dst = DNNL_ARG_SRC; if (src_float_) { src_scratch_md_ = *src_md(); dst_scratch_md_ = create_temp_md(src_scratch_md_); @@ -157,21 +155,23 @@ struct cudnn_reorder_lt_t : public gpu::primitive_t { src_scratch_md_ = create_temp_md(dst_scratch_md_); dst_scratch_md_ = *dst_md(); } - attr()->scales_.get(src, &mask, &is_set); - if (is_set) { r_attr.scales_.set(src, mask); } - attr()->scales_.get(dst, &mask, &is_set); - if (is_set) { r_attr.scales_.set(dst, mask); } + primitive_attr_t r_attr; + if (!attr()->scales_.has_default_values(DNNL_ARG_SRC)) { + const auto mask = attr()->scales_.get_mask(DNNL_ARG_SRC); + r_attr.scales_.set(DNNL_ARG_SRC, mask); + } - status_t generic_ok = reorder_primitive_desc_create( - generic_reorder_desc_, engine, &src_scratch_md_, - &dst_scratch_md_, &r_attr); - ok = ok && (generic_ok == status::success); + if (!attr()->scales_.has_default_values(DNNL_ARG_DST)) { + const auto mask = attr()->scales_.get_mask(DNNL_ARG_DST); + r_attr.scales_.set(DNNL_ARG_DST, mask); + } - if (!ok) return status::unimplemented; + CHECK(reorder_primitive_desc_create(generic_reorder_desc_, engine, + &src_scratch_md_, &dst_scratch_md_, &r_attr)); init_scratchpad(); - return dnnl_success; + return status::success; } void init_scratchpad() { diff --git a/src/gpu/nvidia/cudnn_softmax.cpp b/src/gpu/nvidia/cudnn_softmax.cpp index 1a39656dc06..29f317df470 100644 --- a/src/gpu/nvidia/cudnn_softmax.cpp +++ b/src/gpu/nvidia/cudnn_softmax.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020-2022 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -33,12 +33,12 @@ status_t cudnn_softmax_fwd_t::execute(const exec_ctx_t &ctx) const { nvidia::stream_t *cuda_stream = utils::downcast(ctx.stream()); - if (!pd()->attr()->scales_.get(DNNL_ARG_SRC).has_default_values()) + if (!pd()->attr()->scales_.has_default_values(DNNL_ARG_SRC)) CHECK(stream_utils::copy_input_arg_to_host(ctx, cuda_stream, &host_scales_[0], DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC, sizeof(float))); - if (!pd()->attr()->scales_.get(DNNL_ARG_DST).has_default_values()) + if (!pd()->attr()->scales_.has_default_values(DNNL_ARG_DST)) CHECK(stream_utils::copy_input_arg_to_host(ctx, cuda_stream, &host_scales_[1], DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST, sizeof(float))); diff --git a/src/gpu/nvidia/cudnn_softmax.hpp b/src/gpu/nvidia/cudnn_softmax.hpp index 3ce67c4acaf..49736634516 100644 --- a/src/gpu/nvidia/cudnn_softmax.hpp +++ b/src/gpu/nvidia/cudnn_softmax.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020 Codeplay Software Limited * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -56,7 +56,7 @@ struct cudnn_softmax_fwd_t : public gpu::primitive_t { && set_default_formats() == status::success && src_d.is_plain() && dst_d.is_plain() && dst_d == src_d && IMPLICATION(!attr()->scales_.has_default_values(), - check_scales_mask() + attr_scales_ok() && dst_d.data_type() != data_type::s8); if (!ok) return status::unimplemented; @@ -64,12 +64,6 @@ struct cudnn_softmax_fwd_t : public gpu::primitive_t { return softmax_impl_->init(this); } - bool check_scales_mask() const { - for (const auto &s : attr()->scales_.scales_) { - if (s.second.mask_ != 0) return false; - } - return true; - } std::shared_ptr softmax_impl_; }; diff --git a/tests/gtests/test_iface_attr.cpp b/tests/gtests/test_iface_attr.cpp index 720aed6eb7f..23e091b1c96 100644 --- a/tests/gtests/test_iface_attr.cpp +++ b/tests/gtests/test_iface_attr.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2017-2024 Intel Corporation +* Copyright 2017-2025 Intel Corporation * Copyright 2020-2021 FUJITSU LIMITED * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -281,15 +281,17 @@ HANDLE_EXCEPTIONS_FOR_TEST_F(attr_test_t, TestScalesWithGroups) { for (auto arg : supported_args) { // single non-default scales for supported arg attr.set_scales(arg, 0, {}); - // multiple scales with groups + // multiple scales with a single group dim attr.set_scales(arg, 1 << 0, {4}); + // multiple scales with multiple group dims + attr.set_scales(arg, 1 << 0, {4, 1}); // scales with groups and a data type - attr.set_scales(arg, 1 << 0, {4}, data_type::f32); + attr.set_scales(arg, 1 << 0, {4, 1}, data_type::f32); } for (auto arg : unsupported_args) { // multiple scales with groups for unsupported args - EXPECT_ANY_THROW(attr.set_scales(arg, 1 << 0, {4})); + EXPECT_ANY_THROW(attr.set_scales(arg, 1 << 0, {4, 1})); // multiple scales with non-default data type for unsupported args EXPECT_ANY_THROW(attr.set_scales(arg, 1 << 0, {}, data_type::bf16)); }