diff --git a/nncf/__init__.py b/nncf/__init__.py index 4ace2c75548..9a284db02bc 100644 --- a/nncf/__init__.py +++ b/nncf/__init__.py @@ -21,6 +21,7 @@ from nncf.parameters import CompressWeightsMode as CompressWeightsMode from nncf.parameters import DropType as DropType from nncf.parameters import ModelType as ModelType +from nncf.parameters import QuantizationMode as QuantizationMode from nncf.parameters import TargetDevice as TargetDevice from nncf.quantization import QuantizationPreset as QuantizationPreset from nncf.quantization import compress_weights as compress_weights diff --git a/nncf/common/hardware/config.py b/nncf/common/hardware/config.py index 371098d9390..b683d974ad0 100644 --- a/nncf/common/hardware/config.py +++ b/nncf/common/hardware/config.py @@ -20,7 +20,7 @@ from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.logging import nncf_logger from nncf.common.quantization import quantizers as quant -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.utils.helpers import product_dict from nncf.common.utils.os import safe_open diff --git a/nncf/common/quantization/initialization/range.py b/nncf/common/quantization/initialization/range.py index 703400f6d53..6486c44d2c0 100644 --- a/nncf/common/quantization/initialization/range.py +++ b/nncf/common/quantization/initialization/range.py @@ -12,7 +12,7 @@ from typing import Dict, List, Optional from nncf.common.initialization.dataloader import NNCFDataLoader -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerGroup from nncf.config.schemata.defaults import NUM_INIT_SAMPLES diff --git a/nncf/common/quantization/quantizer_propagation/graph.py b/nncf/common/quantization/quantizer_propagation/graph.py index a46968ea974..fe8e37acf64 100644 --- a/nncf/common/quantization/quantizer_propagation/graph.py +++ b/nncf/common/quantization/quantizer_propagation/graph.py @@ -42,7 +42,7 @@ from nncf.common.quantization.quantizer_setup import QuantizationInsertionPointBase from nncf.common.quantization.quantizer_setup import QuantizationPointId from nncf.common.quantization.quantizer_setup import WeightQuantizationInsertionPoint -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import UnifiedScaleType from nncf.common.scopes import should_consider_scope diff --git a/nncf/common/quantization/quantizer_propagation/solver.py b/nncf/common/quantization/quantizer_propagation/solver.py index c686a62d40d..4771f41d685 100644 --- a/nncf/common/quantization/quantizer_propagation/solver.py +++ b/nncf/common/quantization/quantizer_propagation/solver.py @@ -40,7 +40,7 @@ from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup from nncf.common.quantization.structs import QuantizableWeightedLayerNode from nncf.common.quantization.structs import QuantizationConstraints -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import QuantizerGroup from nncf.common.quantization.structs import UnifiedScaleType diff --git a/nncf/common/quantization/quantizer_setup.py b/nncf/common/quantization/quantizer_setup.py index caf2359a876..5dd0758aa53 100644 --- a/nncf/common/quantization/quantizer_setup.py +++ b/nncf/common/quantization/quantizer_setup.py @@ -18,7 +18,7 @@ from nncf.common.graph import NNCFNodeName from nncf.common.logging import nncf_logger from nncf.common.quantization.structs import NonWeightQuantizerId -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import UnifiedScaleType from nncf.common.quantization.structs import WeightQuantizerId diff --git a/nncf/common/quantization/structs.py b/nncf/common/quantization/structs.py index f0eda4e8f6a..794232c8aaf 100644 --- a/nncf/common/quantization/structs.py +++ b/nncf/common/quantization/structs.py @@ -22,9 +22,9 @@ @api() -class QuantizationMode: +class QuantizationScheme: """ - Basic enumeration for quantization mode specification. + Basic enumeration for quantization scheme specification. :param SYMMETRIC: :param ASYMMETRIC: @@ -43,7 +43,7 @@ class QuantizerConfig: def __init__( self, num_bits: int = QUANTIZATION_BITS, - mode: QuantizationMode = QuantizationMode.SYMMETRIC, + mode: QuantizationScheme = QuantizationScheme.SYMMETRIC, signedness_to_force: Optional[bool] = None, per_channel: bool = QUANTIZATION_PER_CHANNEL, ): @@ -66,7 +66,7 @@ def __eq__(self, other): def __str__(self): return "B:{bits} M:{mode} SGN:{signedness} PC:{per_channel}".format( bits=self.num_bits, - mode="S" if self.mode == QuantizationMode.SYMMETRIC else "A", + mode="S" if self.mode == QuantizationScheme.SYMMETRIC else "A", signedness="ANY" if self.signedness_to_force is None else ("S" if self.signedness_to_force else "U"), per_channel="Y" if self.per_channel else "N", ) @@ -86,7 +86,7 @@ def is_valid_requantization_for(self, other: "QuantizerConfig") -> bool: """ fail_conditions = [ self.num_bits > other.num_bits, - self.mode is QuantizationMode.ASYMMETRIC and other.mode is QuantizationMode.SYMMETRIC, + self.mode is QuantizationScheme.ASYMMETRIC and other.mode is QuantizationScheme.SYMMETRIC, self.signedness_to_force is None and other.signedness_to_force is not None, self.signedness_to_force is True and other.signedness_to_force is False, ] @@ -153,7 +153,7 @@ class QuantizerSpec: """ def __init__( - self, num_bits: int, mode: QuantizationMode, signedness_to_force: bool, narrow_range: bool, half_range: bool + self, num_bits: int, mode: QuantizationScheme, signedness_to_force: bool, narrow_range: bool, half_range: bool ): """ :param num_bits: Bitwidth of the quantization. @@ -334,5 +334,5 @@ class QuantizationPreset(Enum): def get_params_configured_by_preset(self, quant_group: QuantizerGroup) -> Dict: if quant_group == QuantizerGroup.ACTIVATIONS and self == QuantizationPreset.MIXED: - return {"mode": QuantizationMode.ASYMMETRIC} - return {"mode": QuantizationMode.SYMMETRIC} + return {"mode": QuantizationScheme.ASYMMETRIC} + return {"mode": QuantizationScheme.SYMMETRIC} diff --git a/nncf/experimental/tensor/functions.py b/nncf/experimental/tensor/functions.py index c434de3b1bf..403179cd9c4 100644 --- a/nncf/experimental/tensor/functions.py +++ b/nncf/experimental/tensor/functions.py @@ -10,13 +10,15 @@ # limitations under the License. import functools -from typing import Callable, List, Optional, Tuple, Union +from typing import Callable, List, Optional, Tuple, TypeVar, Union from nncf.experimental.tensor.enums import TensorDataType from nncf.experimental.tensor.enums import TensorDeviceType from nncf.experimental.tensor.tensor import Tensor from nncf.experimental.tensor.tensor import unwrap_tensor_data +TypeInfo = TypeVar("TypeInfo") + def _tensor_guard(func: callable): """ @@ -428,6 +430,18 @@ def _binary_reverse_op_nowarn(a: Tensor, b: Union[Tensor, float], operator_fn: C return Tensor(_binary_reverse_op_nowarn(a.data, unwrap_tensor_data(b), operator_fn)) +@functools.singledispatch +@_tensor_guard +def finfo(a: Tensor) -> TypeInfo: + """ + Returns machine limits for tensor type. + + :param a: Tensor. + :return: TypeInfo. + """ + return finfo(a.data) + + def _dispatch_list(fn: "functools._SingleDispatchCallable", tensor_list: List[Tensor], *args, **kwargs): """ Dispatches the function to the type of the wrapped data of the first element in tensor_list. diff --git a/nncf/experimental/tensor/numpy_functions.py b/nncf/experimental/tensor/numpy_functions.py index 7899c2807e4..228093d2d20 100644 --- a/nncf/experimental/tensor/numpy_functions.py +++ b/nncf/experimental/tensor/numpy_functions.py @@ -206,3 +206,8 @@ def _( # Run operator with disabled warning with np.errstate(invalid="ignore", divide="ignore"): return operator_fn(b, a) + + +@_register_numpy_types(fns.finfo) +def _(a: np.ndarray) -> np.finfo: + return np.finfo(a.dtype) diff --git a/nncf/experimental/tensorflow/quantization/quantizers.py b/nncf/experimental/tensorflow/quantization/quantizers.py index 1380bccf8be..db84fcb0156 100644 --- a/nncf/experimental/tensorflow/quantization/quantizers.py +++ b/nncf/experimental/tensorflow/quantization/quantizers.py @@ -13,7 +13,7 @@ import tensorflow as tf -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.utils.registry import Registry from nncf.tensorflow.layers.operation import InputType from nncf.tensorflow.quantization.quantizers import AsymmetricQuantizer diff --git a/nncf/onnx/quantization/quantize_model.py b/nncf/onnx/quantization/quantize_model.py index 7be23384964..d448df99377 100644 --- a/nncf/onnx/quantization/quantize_model.py +++ b/nncf/onnx/quantization/quantize_model.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional, Union +from typing import Optional import onnx @@ -18,8 +18,10 @@ from nncf.data import Dataset from nncf.onnx.graph.nncf_graph_builder import GraphConverter from nncf.parameters import ModelType +from nncf.parameters import QuantizationMode from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters +from nncf.quantization.advanced_parameters import QuantizationParameters from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization from nncf.quantization.telemetry_extractors import CompressionStartedWithQuantizeApi from nncf.scopes import IgnoredScope @@ -31,10 +33,11 @@ def quantize_impl( model: onnx.ModelProto, calibration_dataset: Dataset, - preset: Union[QuantizationPreset, None], - target_device: TargetDevice, - subset_size: int, - fast_bias_correction: bool, + mode: Optional[QuantizationMode] = None, + preset: Optional[QuantizationPreset] = None, + target_device: TargetDevice = TargetDevice.ANY, + subset_size: int = 300, + fast_bias_correction: bool = True, model_type: Optional[ModelType] = None, ignored_scope: Optional[IgnoredScope] = None, advanced_parameters: Optional[AdvancedQuantizationParameters] = None, @@ -44,6 +47,8 @@ def quantize_impl( """ if target_device == TargetDevice.CPU_SPR: raise RuntimeError("target_device == CPU_SPR is not supported.") + if mode is not None: + raise ValueError(f"mode={mode} is not supported") if model.opset_import[0].version < 10: raise RuntimeError("ONNX models with opset version < 10 do not support quantization.") if model.opset_import[0].version < 13: @@ -53,8 +58,8 @@ def quantize_impl( ) if advanced_parameters is None: advanced_parameters = AdvancedQuantizationParameters() - advanced_parameters.weights_quantization_params.per_channel = False - advanced_parameters.activations_quantization_params.per_channel = False + advanced_parameters.weights_quantization_params = QuantizationParameters(per_channel=False) + advanced_parameters.activations_quantization_params = QuantizationParameters(per_channel=False) quantization_algorithm = PostTrainingQuantization( preset=preset, diff --git a/nncf/openvino/graph/metatypes/groups.py b/nncf/openvino/graph/metatypes/groups.py index 31f78cbd8b4..eef2fba12df 100644 --- a/nncf/openvino/graph/metatypes/groups.py +++ b/nncf/openvino/graph/metatypes/groups.py @@ -85,9 +85,7 @@ ] -FAKE_QUANTIZE_OPERATIONS = [ - ov_metatypes.OVFakeQuantizeMetatype, -] +FAKE_QUANTIZE_OPERATIONS = [ov_metatypes.OVFakeQuantizeMetatype, ov_metatypes.OVFakeConvertMetatype] CONSTANT_OPERATIONS = [ diff --git a/nncf/openvino/graph/metatypes/openvino_metatypes.py b/nncf/openvino/graph/metatypes/openvino_metatypes.py index f6d89d14b9f..a51ea1f2187 100644 --- a/nncf/openvino/graph/metatypes/openvino_metatypes.py +++ b/nncf/openvino/graph/metatypes/openvino_metatypes.py @@ -333,6 +333,12 @@ class OVFakeQuantizeMetatype(OVOpMetatype): op_names = ["FakeQuantize"] +@OV_OPERATOR_METATYPES.register() +class OVFakeConvertMetatype(OVOpMetatype): + name = "FakeConvertOp" + op_names = ["FakeConvert"] + + @OV_OPERATOR_METATYPES.register() class OVLessMetatype(OVOpMetatype): name = "LessOp" @@ -713,13 +719,13 @@ def get_operation_const_op(operation: ov.Node, const_port_id: int) -> Optional[o # There are several cases here # (Constant) -> (Operation) # (Constant) -> (Convert) -> (Operation) - # (Constant) -> (Convert) -> (FakeQuantize) -> (Operation) - # (Constant) -> (Convert) -> (FakeQuantize) -> (Reshape) -> (Operation) + # (Constant) -> (Convert) -> (FakeQuantize, FakeConvert) -> (Operation) + # (Constant) -> (Convert) -> (FakeQuantize, FakeConvert) -> (Reshape) -> (Operation) # and etc. We need properly find the constant node. So we start with # `node` and traverse up until the constant node is not found. queue = deque([node]) constant_node = None - allowed_propagation_types_list = ["Convert", "FakeQuantize", "Reshape"] + allowed_propagation_types_list = ["Convert", "FakeQuantize", "FakeConvert", "Reshape"] while len(queue) != 0: curr_node = queue.popleft() diff --git a/nncf/openvino/graph/model_transformer.py b/nncf/openvino/graph/model_transformer.py index fd178bbd182..78c98e0c7f2 100644 --- a/nncf/openvino/graph/model_transformer.py +++ b/nncf/openvino/graph/model_transformer.py @@ -25,6 +25,7 @@ from nncf.openvino.graph.node_utils import get_result_node_name from nncf.openvino.graph.transformations.commands import OVBiasCorrectionCommand from nncf.openvino.graph.transformations.commands import OVBiasInsertionCommand +from nncf.openvino.graph.transformations.commands import OVConvertInsertionCommand from nncf.openvino.graph.transformations.commands import OVExtractIfBodyCommand from nncf.openvino.graph.transformations.commands import OVFQNodeRemovingCommand from nncf.openvino.graph.transformations.commands import OVInplaceFnInsertionCommand @@ -34,6 +35,7 @@ from nncf.openvino.graph.transformations.commands import OVQuantizerInsertionCommand from nncf.openvino.graph.transformations.commands import OVUpdateIfBodyCommand from nncf.openvino.graph.transformations.commands import OVWeightUpdateCommand +from nncf.quantization.fake_quantize import FakeConvertParameters from nncf.quantization.fake_quantize import FakeQuantizeParameters @@ -47,6 +49,7 @@ def __init__(self, model: TModel): self._command_transformation_ordered_pairs = [ (OVFQNodeRemovingCommand, self._apply_fq_nodes_removing_transformation), (OVQuantizerInsertionCommand, self._apply_quantizer_insertion_transformations), + (OVConvertInsertionCommand, self._apply_convert_insertion_transformations), (OVBiasCorrectionCommand, self._apply_bias_correction_transformations), (OVWeightUpdateCommand, self._apply_weight_update_transformations), (OVModelExtractionCommand, self._apply_model_extraction_transformation), @@ -58,6 +61,11 @@ def __init__(self, model: TModel): (OVExtractIfBodyCommand, self._apply_extract_if_body_transformation), ] + @staticmethod + def _convert_to_fp16(data): + clip_data = np.clip(data, np.finfo(np.float16).min, np.finfo(np.float16).max) + return clip_data.astype(np.float16) + @staticmethod def _get_name_to_node_mapping(model: ov.Model) -> Dict[str, ov.Node]: """ @@ -235,25 +243,86 @@ def _apply_quantizer_insertion_transformations( return model @staticmethod - def convert_params_to_fp16( - fq_params: FakeQuantizeParameters, - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + def _apply_convert_insertion_transformations( + model: ov.Model, transformations: List[OVConvertInsertionCommand] + ) -> ov.Model: """ - Converts FakeQuantize parameters to FP16 precision. + Applies transformations on the model. - :param fq_params: FakeQuantize node attributes. - :return: FakeQuantize parameters in FP16 precision. + :param model: Model to apply transformations. + :param transformations: List of the OVConvertInsertionCommand transformations. + :return: Model with inserted FakeConvert nodes. """ + name_to_node_mapping = OVModelTransformer._get_name_to_node_mapping(model) + for transformation in transformations: + OVModelTransformer._insert_fake_convert_op(transformation, name_to_node_mapping) + return model - def _convert_to_fp16(data): - clip_data = np.clip(data, np.finfo(np.float16).min, np.finfo(np.float16).max) - return clip_data.astype(np.float16) + @staticmethod + def _create_fake_quantize( + op_output: ov.Output, + fake_quantize_params: FakeQuantizeParameters, + fake_quantize_name: str, + convert_to_fp16: bool, + ) -> ov.Node: + """ + Creates FakeQuantize node. + + :param op_output: Output of the previous node. + :param fake_quantize_params: FakeQuantizeParameters instance. + :param fake_quantize_name: New layer name. + :param convert_to_fp16: Whether convert parameters to FP16 or not. + :return: ov.Node instance. + """ + + input_low = fake_quantize_params.input_low.data + input_high = fake_quantize_params.input_high.data + output_low = fake_quantize_params.output_low.data + output_high = fake_quantize_params.output_high.data + levels = fake_quantize_params.levels + + if convert_to_fp16: + input_low = OVModelTransformer._convert_to_fp16(input_low) + input_high = OVModelTransformer._convert_to_fp16(input_high) + output_low = OVModelTransformer._convert_to_fp16(output_low) + output_high = OVModelTransformer._convert_to_fp16(output_high) + + return opset.fake_quantize( + op_output, input_low, input_high, output_low, output_high, levels, name=fake_quantize_name + ) - input_low = _convert_to_fp16(fq_params.input_low.data) - input_high = _convert_to_fp16(fq_params.input_high.data) - output_low = _convert_to_fp16(fq_params.output_low.data) - output_high = _convert_to_fp16(fq_params.output_high.data) - return input_low, input_high, output_low, output_high + @staticmethod + def _create_fake_convert( + op_output: ov.Output, + fake_convert_params: FakeConvertParameters, + fake_convert_name: str, + convert_to_fp16: bool, + ) -> ov.Node: + """ + Creates FakeConvert node. + + :param op_output: Output of the previous node. + :param fake_convert_params: FakeConvertParameters instance. + :param fake_convert_name: New layer name. + :param convert_to_fp16: Whether convert parameters to FP16 or not. + :return: ov.Node instance. + """ + + scale = fake_convert_params.scale.data + shift = fake_convert_params.shift.data + + if convert_to_fp16: + scale = OVModelTransformer._convert_to_fp16(scale) + shift = OVModelTransformer._convert_to_fp16(shift) + + destination_type = fake_convert_params.destination_type.value + return opset.fake_convert( + data=op_output, + scale=scale, + shift=shift, + destination_type=destination_type, + name=fake_convert_name, + ) @staticmethod def _insert_fake_quantize_op( @@ -266,11 +335,6 @@ def _insert_fake_quantize_op( :param name_to_node_mapping: Mapping from node name to node instance. """ fq_params = transformation.quantizer_parameters - input_low = fq_params.input_low.data - input_high = fq_params.input_high.data - output_low = fq_params.output_low.data - output_high = fq_params.output_high.data - levels = fq_params.levels node_name = transformation.target_point.target_node_name target_node = name_to_node_mapping[node_name] @@ -280,8 +344,7 @@ def _insert_fake_quantize_op( inp_node = target_node.input(port_id) input_node_output = inp_node.get_source_output() data_type = inp_node.get_element_type() - if data_type == ov.Type(np.float16): - input_low, input_high, output_low, output_high = OVModelTransformer.convert_params_to_fp16(fq_params) + convert_to_fp16 = data_type == ov.Type(np.float16) name = "fq_weights" if transform_type == TargetType.OPERATION_WITH_WEIGHTS else "fq_input" fq_name = f"{node_name}/{name}_{port_id}" @@ -292,23 +355,84 @@ def _insert_fake_quantize_op( if out.get_node().get_type_name() == "FakeQuantize": fq = out.get_node() if fq is None: - fq = opset.fake_quantize( - input_node_output, input_low, input_high, output_low, output_high, levels, name=fq_name + fq = OVModelTransformer._create_fake_quantize( + op_output=input_node_output, + fake_quantize_params=fq_params, + fake_quantize_name=fq_name, + convert_to_fp16=convert_to_fp16, ) inp_node.replace_source_output(fq.output(0)) elif transform_type == TargetType.POST_LAYER_OPERATION: output = target_node.output(port_id) data_type = output.get_element_type() - if data_type == ov.Type(np.float16): - input_low, input_high, output_low, output_high = OVModelTransformer.convert_params_to_fp16(fq_params) + convert_to_fp16 = data_type == ov.Type(np.float16) target_inputs = output.get_target_inputs() fq_name = f"{node_name}/fq_output_{port_id}" - fq = opset.fake_quantize(output, input_low, input_high, output_low, output_high, levels, name=fq_name) + fq = OVModelTransformer._create_fake_quantize( + op_output=output, + fake_quantize_params=fq_params, + fake_quantize_name=fq_name, + convert_to_fp16=convert_to_fp16, + ) for inp_node in target_inputs: inp_node.replace_source_output(fq.output(0)) else: raise RuntimeError(f"Incorrect target point type {transform_type}") + @staticmethod + def _insert_fake_convert_op( + transformation: OVConvertInsertionCommand, name_to_node_mapping: Dict[str, ov.Node] + ) -> None: + """ + Inserts FakeConvert Operation to a model which name_to_node_mapping is passed. + + :param transformation: FakeConvert insertion command. + :param name_to_node_mapping: Mapping from node name to node instance. + """ + fc_params = transformation.convert_parameters + + node_name = transformation.target_point.target_node_name + target_node = name_to_node_mapping[node_name] + port_id = transformation.target_point.port_id + transform_type = transformation.target_point.type + name = "weights" if transform_type == TargetType.OPERATION_WITH_WEIGHTS else "input" + + if transform_type in [TargetType.PRE_LAYER_OPERATION, TargetType.OPERATION_WITH_WEIGHTS]: + inp_node = target_node.input(port_id) + input_node_output = inp_node.get_source_output() + + fc = None + if transform_type == TargetType.OPERATION_WITH_WEIGHTS: + # If the nodes share one weight tensor, we should have only one quantizer on that + for out in input_node_output.get_target_inputs(): + if out.get_node().get_type_name() == "FakeConvert": + fc = out.get_node() + if fc is None: + convert_to_fp16 = inp_node.get_element_type() == ov.Type(np.float16) + fc_name = f"{node_name}/fc_{name}_{port_id}" + fc = OVModelTransformer._create_fake_convert( + op_output=input_node_output, + fake_convert_params=fc_params, + fake_convert_name=fc_name, + convert_to_fp16=convert_to_fp16, + ) + inp_node.replace_source_output(fc.output(0)) + elif transform_type == TargetType.POST_LAYER_OPERATION: + output = target_node.output(port_id) + convert_to_fp16 = output.get_element_type() == ov.Type(np.float16) + target_inputs = output.get_target_inputs() + fc_name = f"{node_name}/fc_output_{port_id}" + fc = OVModelTransformer._create_fake_convert( + op_output=output, + fake_convert_params=fc_params, + fake_convert_name=fc_name, + convert_to_fp16=convert_to_fp16, + ) + for inp_node in target_inputs: + inp_node.replace_source_output(fc.output(0)) + else: + raise RuntimeError(f"Incorrect target point type {transform_type}") + @staticmethod def _apply_bias_correction_transformations(model, transformations: List[OVBiasCorrectionCommand]) -> ov.Model: """ diff --git a/nncf/openvino/graph/transformations/commands.py b/nncf/openvino/graph/transformations/commands.py index 491515aa0f5..7e3090d2c8e 100644 --- a/nncf/openvino/graph/transformations/commands.py +++ b/nncf/openvino/graph/transformations/commands.py @@ -20,6 +20,7 @@ from nncf.common.graph.transformations.commands import TransformationCommand from nncf.common.graph.transformations.commands import TransformationType from nncf.openvino.graph.node_utils import InplaceInsertionFnType +from nncf.quantization.fake_quantize import FakeConvertParameters from nncf.quantization.fake_quantize import FakeQuantizeParameters @@ -93,6 +94,16 @@ def union(self, other: "TransformationCommand") -> "TransformationCommand": raise NotImplementedError() +class OVConvertInsertionCommand(OVInsertionCommand): + def __init__(self, target_point: OVTargetPoint, convert_parameters: FakeConvertParameters): + super().__init__(target_point) + self.convert_parameters = convert_parameters + + def union(self, other: "TransformationCommand") -> "TransformationCommand": + # Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand + raise NotImplementedError() + + class OVBiasCorrectionCommand(TransformationCommand): """ Corrects bias value in the model based on the input value. diff --git a/nncf/openvino/pot/quantization/quantize_model.py b/nncf/openvino/pot/quantization/quantize_model.py index b649a0d64bf..ad15e8ed359 100644 --- a/nncf/openvino/pot/quantization/quantize_model.py +++ b/nncf/openvino/pot/quantization/quantize_model.py @@ -29,6 +29,7 @@ from nncf.openvino.quantization.backend_parameters import is_weight_compression_needed from nncf.parameters import DropType from nncf.parameters import ModelType +from nncf.parameters import QuantizationMode from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedAccuracyRestorerParameters from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters @@ -161,28 +162,29 @@ def _create_quantization_group_config( :return: A POT quantization group configuration as dict. """ config = {} - if quantization_params.num_bits is not None: - config["bits"] = quantization_params.num_bits - - if quantization_params.mode is not None: - config["mode"] = str(quantization_params.mode) - if quantization_params.per_channel is not None: - config["perchannel"] = quantization_params.per_channel - - not_supported_params = { - "narrow_range": quantization_params.narrow_range, - "signedness_to_force": quantization_params.signedness_to_force, - } - for name, value in not_supported_params.items(): - if value is not None: - raise RuntimeError( - "Quantization algorithm from the OpenVINO backend does not support " - f"{name} directly, please, use backend specific parameters level_low " - "and level_high to specify the quantization levels for activations " - "and weights quantization groups to specify the quantization levels." - 'Example:\n {"activations" : {"level_low": 0, "level_high": 255}}\n' - '{"weights" : {"level_low": -127, "level_high": 127}}' - ) + if quantization_params is not None: + if quantization_params.num_bits is not None: + config["bits"] = quantization_params.num_bits + + if quantization_params.mode is not None: + config["mode"] = str(quantization_params.mode) + if quantization_params.per_channel is not None: + config["perchannel"] = quantization_params.per_channel + + not_supported_params = { + "narrow_range": quantization_params.narrow_range, + "signedness_to_force": quantization_params.signedness_to_force, + } + for name, value in not_supported_params.items(): + if value is not None: + raise RuntimeError( + "Quantization algorithm from the OpenVINO backend does not support " + f"{name} directly, please, use backend specific parameters level_low " + "and level_high to specify the quantization levels for activations " + "and weights quantization groups to specify the quantization levels." + 'Example:\n {"activations" : {"level_low": 0, "level_high": 255}}\n' + '{"weights" : {"level_low": -127, "level_high": 127}}' + ) if BackendParameters.LEVEL_LOW in backend_params: config["level_low"] = backend_params[BackendParameters.LEVEL_LOW] if BackendParameters.LEVEL_HIGH in backend_params: @@ -324,6 +326,7 @@ def _create_engine_config( def quantize_impl( model: ov.Model, calibration_dataset: Dataset, + mode: Optional[QuantizationMode] = None, preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, @@ -337,6 +340,9 @@ def quantize_impl( """ pot.utils.logger.init_logger(level=logging.getLevelName(nncf_logger.getEffectiveLevel())) + if mode is not None: + raise ValueError(f"mode={mode} is not supported") + if advanced_parameters is None: advanced_parameters = AdvancedQuantizationParameters() diff --git a/nncf/openvino/quantization/backend_parameters.py b/nncf/openvino/quantization/backend_parameters.py index ffaf218e105..c1cf82c13ba 100644 --- a/nncf/openvino/quantization/backend_parameters.py +++ b/nncf/openvino/quantization/backend_parameters.py @@ -33,6 +33,6 @@ def is_weight_compression_needed(advanced_parameters: Optional[AdvancedQuantizat :param advanced_parameters: Advanced quantization parameters. :return: True if weight compression is needed, False otherwise. """ - if advanced_parameters is None: - return True - return advanced_parameters.backend_params.get(BackendParameters.COMPRESS_WEIGHTS, True) + if advanced_parameters is not None and advanced_parameters.backend_params is not None: + return advanced_parameters.backend_params.get(BackendParameters.COMPRESS_WEIGHTS, True) + return True diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py index ee35ea8bc02..215ed0ccab5 100644 --- a/nncf/openvino/quantization/quantize_model.py +++ b/nncf/openvino/quantization/quantize_model.py @@ -27,6 +27,7 @@ from nncf.openvino.rt_info import dump_parameters from nncf.parameters import DropType from nncf.parameters import ModelType +from nncf.parameters import QuantizationMode from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedAccuracyRestorerParameters from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters @@ -55,7 +56,7 @@ def should_use_pot(advanced_parameters: Optional[AdvancedQuantizationParameters] :raises ImportError if POT is not found in the Python environment. """ use_pot = USE_POT_AS_DEFAULT - if advanced_parameters is not None: + if advanced_parameters is not None and advanced_parameters.backend_params is not None: use_pot = advanced_parameters.backend_params.get(BackendParameters.USE_POT, USE_POT_AS_DEFAULT) if not use_pot: @@ -76,6 +77,7 @@ def should_use_pot(advanced_parameters: Optional[AdvancedQuantizationParameters] def native_quantize_if_op_impl( model: ov.Model, calibration_dataset: Dataset, + mode: Optional[QuantizationMode] = None, preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, @@ -92,6 +94,7 @@ def native_quantize_if_op_impl( "The BiasCorrection algorithm is not supported for OpenVINO models with If operation." ) quantization_algorithm = PostTrainingQuantization( + mode=mode, preset=preset, target_device=target_device, subset_size=subset_size, @@ -134,6 +137,7 @@ def native_quantize_if_op_impl( def native_quantize_impl( model: ov.Model, calibration_dataset: Dataset, + mode: Optional[QuantizationMode] = None, preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, @@ -146,6 +150,7 @@ def native_quantize_impl( Implementation of the `quantize()` method for the OpenVINO backend via the OpenVINO Runtime API. """ quantization_algorithm = PostTrainingQuantization( + mode=mode, preset=preset, target_device=target_device, subset_size=subset_size, @@ -211,15 +216,15 @@ def native_quantize_with_accuracy_control_impl( copied_parameters.backend_params[BackendParameters.COMPRESS_WEIGHTS] = False quantized_model = quantize_impl( - model, - calibration_dataset, - preset, - target_device, - subset_size, - fast_bias_correction, - model_type, - ignored_scope, - copied_parameters, + model=model, + calibration_dataset=calibration_dataset, + preset=preset, + target_device=target_device, + subset_size=subset_size, + fast_bias_correction=fast_bias_correction, + model_type=model_type, + ignored_scope=ignored_scope, + advanced_parameters=copied_parameters, ) if advanced_accuracy_restorer_parameters.intermediate_model_dir: @@ -319,6 +324,7 @@ def native_quantize_with_accuracy_control_impl( def quantize_impl( model: ov.Model, calibration_dataset: Dataset, + mode: Optional[QuantizationMode] = None, preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, @@ -340,15 +346,16 @@ def quantize_impl( quantize_fn = native_quantize_if_op_impl return quantize_fn( - model, - calibration_dataset, - preset, - target_device, - subset_size, - fast_bias_correction, - model_type, - ignored_scope, - advanced_parameters, + model=model, + calibration_dataset=calibration_dataset, + mode=mode, + preset=preset, + target_device=target_device, + subset_size=subset_size, + fast_bias_correction=fast_bias_correction, + model_type=model_type, + ignored_scope=ignored_scope, + advanced_parameters=advanced_parameters, ) diff --git a/nncf/parameters.py b/nncf/parameters.py index 97ccea267be..28892f3bbf3 100644 --- a/nncf/parameters.py +++ b/nncf/parameters.py @@ -87,3 +87,17 @@ class CompressWeightsMode(Enum): INT4_ASYM = "int4_asym" NF4 = "nf4" INT8 = "int8" # Deprecated mode + + +@api(canonical_alias="nncf.QuantizationMode") +class QuantizationMode(Enum): + """ + Defines special modes. + Currently contains only FP8-related modes (https://arxiv.org/pdf/2209.05433.pdf). + + :param FP8_E4M3: Mode with 4-bit exponent and 3-bit mantissa. + :param FP8_E5M2: Mode with 5-bit exponent and 2-bit mantissa. + """ + + FP8_E4M3 = "fp8_e4m3" + FP8_E5M2 = "fp8_e5m2" diff --git a/nncf/quantization/advanced_parameters.py b/nncf/quantization/advanced_parameters.py index 5a8dd596509..4da0f4ed526 100644 --- a/nncf/quantization/advanced_parameters.py +++ b/nncf/quantization/advanced_parameters.py @@ -17,9 +17,9 @@ from dataclasses import fields from dataclasses import is_dataclass from enum import Enum -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Union -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.utils.api_marker import api from nncf.quantization.range_estimator import AggregatorType from nncf.quantization.range_estimator import RangeEstimatorParameters @@ -56,6 +56,20 @@ class OverflowFix(Enum): DISABLE = "disable" +@api() +class FP8Type(Enum): + """ + Defines FP8 special types (https://arxiv.org/pdf/2209.05433.pdf). + + :param E4M3: Mode with 4-bit exponent and 3-bit mantissa. + :param E5M2: Mode with 5-bit exponent and 2-bit mantissa. + + """ + + E4M3 = "f8e4m3" + E5M2 = "f8e5m2" + + @api() @dataclass class QuantizationParameters: @@ -93,6 +107,19 @@ class QuantizationParameters: narrow_range: Optional[bool] = None +@api() +@dataclass +class FP8QuantizationParameters: + """ + Contains convert parameters for weights or activations. + + :param destination_type: Currently contains E4M3 or E5M2 for FP8 precision. + :type destination_type: FP8Type + """ + + destination_type: Optional[FP8Type] = None + + @api() @dataclass class AdvancedBiasCorrectionParameters: @@ -178,8 +205,8 @@ class AdvancedQuantizationParameters: disable_bias_correction: bool = False # Advanced Quantization parameters - activations_quantization_params: QuantizationParameters = field(default_factory=QuantizationParameters) - weights_quantization_params: QuantizationParameters = field(default_factory=QuantizationParameters) + activations_quantization_params: Union[QuantizationParameters, FP8QuantizationParameters] = None + weights_quantization_params: Union[QuantizationParameters, FP8QuantizationParameters] = None # Range estimator parameters activations_range_estimator_params: RangeEstimatorParameters = field(default_factory=RangeEstimatorParameters) @@ -277,16 +304,17 @@ def convert_quantization_parameters_to_dict(params: QuantizationParameters) -> D :return: Quantization parameters as dict in the legacy format """ result = {} - if params.num_bits is not None: - result["bits"] = params.num_bits - if params.mode is not None: - result["mode"] = params.mode - if params.signedness_to_force is not None: - result["signed"] = params.signedness_to_force - if params.per_channel is not None: - result["per_channel"] = params.per_channel - if params.narrow_range is not None: - raise RuntimeError("narrow_range parameter is not supported in the legacy format") + if params is not None: + if params.num_bits is not None: + result["bits"] = params.num_bits + if params.mode is not None: + result["mode"] = params.mode + if params.signedness_to_force is not None: + result["signed"] = params.signedness_to_force + if params.per_channel is not None: + result["per_channel"] = params.per_channel + if params.narrow_range is not None: + raise RuntimeError("narrow_range parameter is not supported in the legacy format") return result diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index cba0d681cdd..d8fb116d403 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -12,7 +12,7 @@ import collections import dataclasses from copy import deepcopy -from typing import Any, Dict, List, Optional, OrderedDict, Set, TypeVar +from typing import Any, Dict, List, Optional, OrderedDict, Set, TypeVar, Union import numpy as np @@ -36,8 +36,8 @@ from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup from nncf.common.quantization.structs import QuantizableWeightedLayerNode from nncf.common.quantization.structs import QuantizationConstraints -from nncf.common.quantization.structs import QuantizationMode from nncf.common.quantization.structs import QuantizationPreset +from nncf.common.quantization.structs import QuantizationScheme from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import QuantizerGroup from nncf.common.tensor_statistics.collectors import TensorStatisticCollectorBase @@ -46,11 +46,15 @@ from nncf.common.utils.backend import BackendType from nncf.common.utils.backend import get_backend from nncf.parameters import ModelType +from nncf.parameters import QuantizationMode from nncf.parameters import TargetDevice +from nncf.quantization.advanced_parameters import FP8QuantizationParameters +from nncf.quantization.advanced_parameters import FP8Type from nncf.quantization.advanced_parameters import OverflowFix from nncf.quantization.advanced_parameters import QuantizationParameters from nncf.quantization.advanced_parameters import changes_asdict from nncf.quantization.algorithms.algorithm import Algorithm +from nncf.quantization.fake_quantize import calculate_convert_parameters from nncf.quantization.fake_quantize import calculate_quantizer_parameters from nncf.quantization.fake_quantize import get_quantizer_narrow_range from nncf.quantization.passes import transform_to_inference_graph @@ -62,7 +66,7 @@ TModel = TypeVar("TModel") DEFAULT_QCONFIG = QuantizerConfig( - num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False + num_bits=8, mode=QuantizationScheme.SYMMETRIC, signedness_to_force=None, per_channel=False ) @@ -96,6 +100,7 @@ class MinMaxQuantization(Algorithm): def __init__( self, + mode: Optional[QuantizationMode] = None, preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, @@ -104,19 +109,20 @@ def __init__( overflow_fix: OverflowFix = OverflowFix.FIRST_LAYER, quantize_outputs: bool = False, inplace_statistics: bool = True, - activations_quantization_params: Optional[QuantizationParameters] = None, - weights_quantization_params: Optional[QuantizationParameters] = None, + activations_quantization_params: Union[QuantizationParameters, FP8QuantizationParameters] = None, + weights_quantization_params: Union[QuantizationParameters, FP8QuantizationParameters] = None, activations_range_estimator_params: Optional[RangeEstimatorParameters] = None, weights_range_estimator_params: Optional[RangeEstimatorParameters] = None, backend_params: Optional[Dict[str, Any]] = None, ): """ + :param mode: Defines optimization mode for the algorithm. None by default. :param preset: A preset controls the quantization mode (symmetric and asymmetric). It can take the following values: - `performance`: Symmetric quantization of weights and activations. - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. Default value is None. In this case, `mixed` preset is used for `transformer` - model type otherwise `performace`. + model type otherwise `performance`. :param target_device: A target device the specificity of which will be taken into account while compressing in order to obtain the best performance for this type of device, defaults to TargetDevice.ANY. @@ -144,12 +150,14 @@ def __init__( """ self._target_device = target_device self._subset_size = subset_size + self._mode = mode self._model_type = model_type self._ignored_scope = IgnoredScope() if ignored_scope is None else ignored_scope self._overflow_fix = overflow_fix self._quantize_outputs = quantize_outputs self._inplace_statistics = inplace_statistics self._backend_params = backend_params + self._preset = preset self._quantization_params = { QuantizerGroup.WEIGHTS: weights_quantization_params, @@ -162,22 +170,78 @@ def __init__( } # preset definition - if preset is None: + if self._preset is None: if model_type == ModelType.TRANSFORMER: - preset = QuantizationPreset.MIXED + self._preset = QuantizationPreset.MIXED else: - preset = QuantizationPreset.PERFORMANCE + self._preset = QuantizationPreset.PERFORMANCE + if self._mode is not None: + self._review_defaults_based_on_mode() + self._set_quantization_params_based_on_mode() # Calculates global quantizer constraints self._global_quantizer_constraints = {} for quantizer_group in QuantizerGroup: self._global_quantizer_constraints[quantizer_group] = self._get_quantizer_constraints( - quantizer_group, preset, self._quantization_params[quantizer_group] + quantizer_group, self._preset, self._quantization_params[quantizer_group] ) self._reset_cache() self._algorithm_key = f"MMQ_{hash(self)}" + def _review_defaults_based_on_mode(self): + """ + Reviews default values because mode option doesn't support them. + """ + nncf_logger.warning(f"You're using experimental option mode with {self._mode} value.") + + if self._preset != QuantizationPreset.PERFORMANCE: + raise RuntimeError(f"preset option with {self._preset} value is not supported with the mode option!") + + if self._target_device not in [TargetDevice.CPU, TargetDevice.ANY]: + raise RuntimeError( + f"target_device option with {self._target_device} value is not supported with the mode option!" + ) + + if self._overflow_fix != OverflowFix.DISABLE: + raise RuntimeError( + f"overflow_fix option with {self._overflow_fix} value is not supported with the mode option!" + ) + + if self._quantize_outputs: + raise RuntimeError("quantize_outputs option is not supported with the mode option!") + + if self._backend_params is not None: + raise RuntimeError("backend_params option is not supported with the mode option!") + + if isinstance(self._quantization_params[QuantizerGroup.WEIGHTS], QuantizationParameters): + raise RuntimeError( + "quantization_params option for weights with " + f"{self._quantization_params[QuantizerGroup.WEIGHTS]} " + "value is not supported with the mode option!" + ) + + if isinstance(self._quantization_params[QuantizerGroup.ACTIVATIONS], QuantizationParameters): + raise RuntimeError( + "quantization_params option for activations with " + f"{self._quantization_params[QuantizerGroup.ACTIVATIONS]} " + "value is not supported with the mode option!" + ) + + def _set_quantization_params_based_on_mode(self): + """ + Sets default quantization params based on the self._mode value. + """ + mode_default_option_map = { + QuantizationMode.FP8_E4M3: FP8QuantizationParameters(destination_type=FP8Type.E4M3), + QuantizationMode.FP8_E5M2: FP8QuantizationParameters(destination_type=FP8Type.E5M2), + } + if self._quantization_params[QuantizerGroup.WEIGHTS] is None: + self._quantization_params[QuantizerGroup.WEIGHTS] = mode_default_option_map[self._mode] + + if self._quantization_params[QuantizerGroup.ACTIVATIONS] is None: + self._quantization_params[QuantizerGroup.ACTIVATIONS] = mode_default_option_map[self._mode] + def _reset_cache(self): # It prevents the duplicate weight quantizers from being added. # It can happen when you have layers that share the identical weight tensor. @@ -191,7 +255,10 @@ def available_backends(self) -> List[BackendType]: return [BackendType.ONNX, BackendType.OPENVINO, BackendType.TORCH] def _get_quantizer_constraints( - self, group: QuantizerGroup, preset: QuantizationPreset, quantization_params: Optional[QuantizationParameters] + self, + group: QuantizerGroup, + preset: QuantizationPreset, + quantization_params: Union[QuantizationParameters, FP8QuantizationParameters], ) -> QuantizationConstraints: """ Returns QuantizationConstraints for the provided quantizer group. @@ -205,6 +272,13 @@ def _get_quantizer_constraints( if quantization_params is None: return QuantizationConstraints(**constraints) + if isinstance(quantization_params, FP8QuantizationParameters): + if self._mode is None: + raise RuntimeError( + f"FP8QuantizationParameters for {group.value} can not be used without QuantizationMode option!" + ) + return QuantizationConstraints(**constraints) + if quantization_params.mode is not None: constraints["mode"] = quantization_params.mode if quantization_params.num_bits is not None: @@ -721,10 +795,19 @@ def filter_func(point: StatisticPoint) -> bool: qconfig = quantization_target_points[quantization_target_point] q_group = QuantizerGroup.ACTIVATIONS narrow_range = get_quantizer_narrow_range(qconfig, q_group) - parameters = calculate_quantizer_parameters(unified_values, qconfig, q_group, narrow_range) - command = self._backend_entity.create_quantizer_insertion_command( - graph, quantization_target_point, qconfig, parameters - ) + if self._mode is not None: + destination_type = self._quantization_params[q_group].destination_type + parameters = calculate_convert_parameters( + unified_values, is_per_channel=qconfig.per_channel, destination_type=destination_type + ) + command = self._backend_entity.create_convert_insertion_command( + quantization_target_point, parameters + ) + else: + parameters = calculate_quantizer_parameters(unified_values, qconfig, q_group, narrow_range) + command = self._backend_entity.create_quantizer_insertion_command( + graph, quantization_target_point, qconfig, parameters + ) transformation_layout.register(command) unified_ops_list.add(quantization_target_point) @@ -749,10 +832,21 @@ def filter_func(point: StatisticPoint) -> bool: statistics = tensor_collector.get_statistics() if statistics.min_values is None or statistics.max_values is None: raise RuntimeError(f"Statistics were not collected for the node {target_node_name}") - parameters = calculate_quantizer_parameters(statistics, qconfig, quant_group, narrow_range, half_range) - command = self._backend_entity.create_quantizer_insertion_command( - graph, quantization_target_point, qconfig, parameters - ) + if self._mode is not None: + destination_type = self._quantization_params[quant_group].destination_type + parameters = calculate_convert_parameters( + statistics, is_per_channel=qconfig.per_channel, destination_type=destination_type + ) + command = self._backend_entity.create_convert_insertion_command( + quantization_target_point, parameters + ) + else: + parameters = calculate_quantizer_parameters( + statistics, qconfig, quant_group, narrow_range, half_range + ) + command = self._backend_entity.create_quantizer_insertion_command( + graph, quantization_target_point, qconfig, parameters + ) transformation_layout.register(command) if not transformation_layout.transformations: nncf_logger.info("The model has no operations to apply quantization.") @@ -802,10 +896,10 @@ def _apply_model_type_pass( if node.metatype not in self._backend_entity.mat_mul_metatypes: continue if ( - quantization_point.qconfig.mode != QuantizationMode.SYMMETRIC + quantization_point.qconfig.mode != QuantizationScheme.SYMMETRIC and node.layer_attributes is None ): - quantization_point.qconfig.mode = QuantizationMode.SYMMETRIC + quantization_point.qconfig.mode = QuantizationScheme.SYMMETRIC nncf_logger.debug( f"Update quantization mode for the node {node_name}" f" to the symmetric due to ModelType parameter." diff --git a/nncf/quantization/algorithms/min_max/backend.py b/nncf/quantization/algorithms/min_max/backend.py index dbec3469f28..96ff4be8a68 100644 --- a/nncf/quantization/algorithms/min_max/backend.py +++ b/nncf/quantization/algorithms/min_max/backend.py @@ -25,6 +25,7 @@ from nncf.common.tensor_statistics.statistics import MinMaxTensorStatistic from nncf.parameters import ModelType from nncf.parameters import TargetDevice +from nncf.quantization.fake_quantize import FakeConvertParameters from nncf.quantization.fake_quantize import FakeQuantizeParameters from nncf.quantization.range_estimator import RangeEstimatorParameters @@ -146,6 +147,20 @@ def create_quantizer_insertion_command( :return: Backend-specific TransformationCommand for the quantizer insertion operation. """ + @staticmethod + @abstractmethod + def create_convert_insertion_command( + target_point: TargetPoint, + parameters: FakeConvertParameters, + ) -> TransformationCommand: + """ + Returns backend-specific convert insertion command. + + :param target_point: Target location for the correction. + :param parameters: FakeConvertParameters to calculate activation quantization parameters. + :return: Backend-specific TransformationCommand for the quantizer insertion operation. + """ + @staticmethod @abstractmethod def get_start_nodes_for_activation_path_tracing(nncf_graph: NNCFGraph) -> List[NNCFNode]: @@ -154,6 +169,7 @@ def get_start_nodes_for_activation_path_tracing(nncf_graph: NNCFGraph) -> List[N :param nncf_graph: NNCFGraph to get the start nodes. :return: List of NNCFNodes to use as start nodes for activation path tracing. + """ @staticmethod diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py index 70aca2753fe..1ea7e2b3042 100644 --- a/nncf/quantization/algorithms/min_max/onnx_backend.py +++ b/nncf/quantization/algorithms/min_max/onnx_backend.py @@ -17,8 +17,9 @@ from nncf.common.graph.graph import NNCFNode from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType +from nncf.common.graph.transformations.commands import TransformationCommand from nncf.common.hardware.config import HWConfig -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.onnx.graph.metatypes import onnx_metatypes as om from nncf.onnx.graph.metatypes.groups import MATMUL_METATYPES @@ -39,6 +40,7 @@ from nncf.quantization.advanced_parameters import AggregatorType from nncf.quantization.advanced_parameters import StatisticsType from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend +from nncf.quantization.fake_quantize import FakeConvertParameters from nncf.quantization.fake_quantize import FakeQuantizeParameters from nncf.quantization.range_estimator import RangeEstimatorParameters @@ -120,6 +122,13 @@ def create_quantizer_insertion_command( onnx_parameters = convert_fq_params_to_onnx_params(parameters, quantizer_config.num_bits, tensor_type, axis) return ONNXQuantizerInsertionCommand(target_point, nncf_input_node_next_nodes, onnx_parameters) + @staticmethod + def create_convert_insertion_command( + target_point: ONNXTargetPoint, + parameters: FakeConvertParameters, + ) -> TransformationCommand: + raise RuntimeError("FakeConvert insertion not implemented in ONNX backend!") + @staticmethod def unify_statistics(statistics: List[ONNXMinMaxTensorStatistic]) -> ONNXMinMaxTensorStatistic: max_values, min_values = [], [] diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py index 3cc452ad471..744ea868a94 100644 --- a/nncf/quantization/algorithms/min_max/openvino_backend.py +++ b/nncf/quantization/algorithms/min_max/openvino_backend.py @@ -18,7 +18,7 @@ from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType from nncf.common.hardware.config import HWConfig -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.tensor_statistics.collectors import ReductionAxes from nncf.experimental.common.tensor_statistics.collectors import AGGREGATORS_MAP @@ -29,6 +29,7 @@ from nncf.openvino.graph.model_utils import get_start_nodes_for_activation_path_tracing from nncf.openvino.graph.node_utils import get_channel_agnostic_reduction_axes from nncf.openvino.graph.node_utils import get_weight_channel_axes +from nncf.openvino.graph.transformations.commands import OVConvertInsertionCommand from nncf.openvino.graph.transformations.commands import OVQuantizerInsertionCommand from nncf.openvino.graph.transformations.commands import OVTargetPoint from nncf.openvino.hardware.config import OVHWConfig @@ -41,6 +42,7 @@ from nncf.quantization.advanced_parameters import RangeEstimatorParameters from nncf.quantization.advanced_parameters import StatisticsType from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend +from nncf.quantization.fake_quantize import FakeConvertParameters from nncf.quantization.fake_quantize import FakeQuantizeParameters @@ -120,6 +122,13 @@ def create_quantizer_insertion_command( ) -> OVQuantizerInsertionCommand: return OVQuantizerInsertionCommand(target_point, parameters) + @staticmethod + def create_convert_insertion_command( + target_point: OVTargetPoint, + parameters: FakeConvertParameters, + ) -> OVQuantizerInsertionCommand: + return OVConvertInsertionCommand(target_point, parameters) + @staticmethod def unify_statistics(statistics: List[OVMinMaxTensorStatistic]) -> OVMinMaxTensorStatistic: max_values, min_values = [], [] diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py index 744e6cf948a..96c8ef87af7 100644 --- a/nncf/quantization/algorithms/min_max/torch_backend.py +++ b/nncf/quantization/algorithms/min_max/torch_backend.py @@ -20,8 +20,9 @@ from nncf.common.graph.layer_attributes import WeightedLayerAttributes from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType +from nncf.common.graph.transformations.commands import TransformationCommand from nncf.common.hardware.config import HWConfig -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.experimental.common.tensor_statistics.collectors import AGGREGATORS_MAP from nncf.experimental.common.tensor_statistics.collectors import TensorCollector @@ -29,6 +30,7 @@ from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import StatisticsType from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend +from nncf.quantization.fake_quantize import FakeConvertParameters from nncf.quantization.fake_quantize import FakeQuantizeParameters from nncf.quantization.range_estimator import RangeEstimatorParameters from nncf.torch.graph.graph import PTNNCFGraph @@ -137,6 +139,13 @@ def create_quantizer_insertion_command( nncf_graph, target_point, quantizer_config, parameters ) + @staticmethod + def create_convert_insertion_command( + target_point: PTTargetPoint, + parameters: FakeConvertParameters, + ) -> TransformationCommand: + raise RuntimeError("FakeConvert insertion not implemented in PyTorch backend!") + @staticmethod def unify_statistics(statistics: List[PTMinMaxTensorStatistic]) -> PTMinMaxTensorStatistic: max_values, min_values = [], [] diff --git a/nncf/quantization/algorithms/post_training/algorithm.py b/nncf/quantization/algorithms/post_training/algorithm.py index 4db613c1bb7..ff27f06eadb 100644 --- a/nncf/quantization/algorithms/post_training/algorithm.py +++ b/nncf/quantization/algorithms/post_training/algorithm.py @@ -18,6 +18,7 @@ from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType from nncf.parameters import ModelType +from nncf.parameters import QuantizationMode from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.algorithms.algorithm import Algorithm @@ -38,6 +39,7 @@ class PostTrainingQuantization(Algorithm): def __init__( self, + mode: Optional[QuantizationMode] = None, preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, @@ -47,6 +49,7 @@ def __init__( advanced_parameters: Optional[AdvancedQuantizationParameters] = None, ): """ + :param mode: Special quantization mode that specify different ways of the optimization. :param preset: A preset controls the quantization mode (symmetric and asymmetric). It can take the following values: - `performance`: Symmetric quantization of weights and activations. @@ -69,7 +72,14 @@ def __init__( fine-tuning the quantization algorithm """ self._pipeline = create_ptq_pipeline( - preset, target_device, subset_size, fast_bias_correction, model_type, ignored_scope, advanced_parameters + mode=mode, + preset=preset, + target_device=target_device, + subset_size=subset_size, + fast_bias_correction=fast_bias_correction, + model_type=model_type, + ignored_scope=ignored_scope, + advanced_parameters=advanced_parameters, ) @property diff --git a/nncf/quantization/algorithms/post_training/pipeline.py b/nncf/quantization/algorithms/post_training/pipeline.py index 5bb358e1457..027b03cd07a 100644 --- a/nncf/quantization/algorithms/post_training/pipeline.py +++ b/nncf/quantization/algorithms/post_training/pipeline.py @@ -14,6 +14,7 @@ from nncf.common.deprecation import warning_deprecated from nncf.common.quantization.structs import QuantizationPreset from nncf.parameters import ModelType +from nncf.parameters import QuantizationMode from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.algorithms.bias_correction.algorithm import BIAS_CORRECTION_THRESHOLD @@ -30,6 +31,7 @@ def create_ptq_pipeline( + mode: Optional[QuantizationMode] = None, preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, @@ -47,6 +49,7 @@ def create_ptq_pipeline( 3) MinMaxQuantization 4) FastBiasCorrection or BiasCorrection + :param mode: Special quantization mode that specify different ways of the optimization. :param preset: A preset controls the quantization mode (symmetric and asymmetric). It can take the following values: - `performance`: Symmetric quantization of weights and activations. @@ -106,19 +109,20 @@ def create_ptq_pipeline( pipeline_steps.append( [ MinMaxQuantization( - preset, - target_device, - subset_size, - model_type, - ignored_scope, - advanced_parameters.overflow_fix, - advanced_parameters.quantize_outputs, - advanced_parameters.inplace_statistics, - advanced_parameters.activations_quantization_params, - advanced_parameters.weights_quantization_params, - advanced_parameters.activations_range_estimator_params, - advanced_parameters.weights_range_estimator_params, - advanced_parameters.backend_params, + mode=mode, + preset=preset, + target_device=target_device, + subset_size=subset_size, + model_type=model_type, + ignored_scope=ignored_scope, + overflow_fix=advanced_parameters.overflow_fix, + quantize_outputs=advanced_parameters.quantize_outputs, + inplace_statistics=advanced_parameters.inplace_statistics, + activations_quantization_params=advanced_parameters.activations_quantization_params, + weights_quantization_params=advanced_parameters.weights_quantization_params, + activations_range_estimator_params=advanced_parameters.activations_range_estimator_params, + weights_range_estimator_params=advanced_parameters.weights_range_estimator_params, + backend_params=advanced_parameters.backend_params, ) ] ) diff --git a/nncf/quantization/fake_quantize.py b/nncf/quantization/fake_quantize.py index 38b56c97019..a3a8fe17e54 100644 --- a/nncf/quantization/fake_quantize.py +++ b/nncf/quantization/fake_quantize.py @@ -17,13 +17,14 @@ from nncf.common.quantization.quantizers import calculate_asymmetric_level_ranges from nncf.common.quantization.quantizers import calculate_symmetric_level_ranges from nncf.common.quantization.quantizers import get_num_levels -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import QuantizerGroup from nncf.common.tensor_statistics.statistics import MinMaxTensorStatistic from nncf.experimental.tensor import Tensor from nncf.experimental.tensor import TensorDataType from nncf.experimental.tensor import functions as fns +from nncf.quantization.advanced_parameters import FP8Type @dataclass @@ -45,6 +46,21 @@ class FakeQuantizeParameters: levels: int +@dataclass +class FakeConvertParameters: + """ + Class handles FakeConvert layer attributes. + + :param scale: Tensor with the scale for input value. + :param shift: Tensor with the shift for input value. + :param destination_type: Destination type. + """ + + scale: Tensor + shift: Tensor + destination_type: FP8Type + + def fix_zero_filters_symmetric(max_values: Tensor, eps: float = 0.01) -> Tensor: """ Fixes zero filters for symmetric quantizer. @@ -246,6 +262,37 @@ def calculate_quantizer_parameters( return FakeQuantizeParameters(input_low, input_high, output_low, output_high, levels) +def calculate_convert_parameters( + statistics: MinMaxTensorStatistic, + is_per_channel: False, + destination_type: FP8Type = FP8Type.E4M3, + activation_scale: float = 0.5, +) -> FakeConvertParameters: + """ + Calculates FakeConvert layer attributes for weight/activation quantizer. + + :param statistics: Collected statistics for the quantized insertion. + :param is_activation: Whether is for activation or weights. + :param destination_type: Destination type that regulates maximum value for the formula. + :param activation_scale: Factor for calculated activation scale. + :return: Parameters of the FakeConvert layer. + """ + + destination_type_maximum = {FP8Type.E4M3: 448, FP8Type.E5M2: 57344} + + max_values = Tensor(statistics.max_values) + min_values = Tensor(statistics.min_values) + + max_destination_value = destination_type_maximum[destination_type] + tensor_dtype = fns.finfo(max_values) + scale = max_destination_value / fns.maximum(max_values, fns.abs(min_values) + tensor_dtype.eps) + if not is_per_channel: + scale = fns.squeeze(activation_scale * scale) + shift = fns.zeros_like(scale).astype(TensorDataType.float32) + scale = scale.astype(TensorDataType.float32) + return FakeConvertParameters(scale, shift, destination_type) + + def _calculate_scaled_parameters( min_values: Tensor, max_values: Tensor, diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index 2516b9e0913..01fe759e73f 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -22,6 +22,7 @@ from nncf.parameters import CompressWeightsMode from nncf.parameters import DropType from nncf.parameters import ModelType +from nncf.parameters import QuantizationMode from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedAccuracyRestorerParameters from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters @@ -39,6 +40,7 @@ def quantize( model: TModel, calibration_dataset: Dataset, + mode: Optional[QuantizationMode] = None, preset: Optional[QuantizationPreset] = None, target_device: TargetDevice = TargetDevice.ANY, subset_size: int = 300, @@ -55,6 +57,8 @@ def quantize( :param calibration_dataset: A representative dataset for the calibration process. :type calibration_dataset: nncf.Dataset + :param mode: Special quantization mode that specify different ways of the optimization. + :type mode: Optional[nncf.QuantizationMode] :param preset: A preset controls the quantization mode (symmetric and asymmetric). It can take the following values: - `performance`: Symmetric quantization of weights and activations. @@ -91,60 +95,64 @@ def quantize( from nncf.openvino.quantization.quantize_model import quantize_impl return quantize_impl( - model, - calibration_dataset, - preset, - target_device, - subset_size, - fast_bias_correction, - model_type, - ignored_scope, - advanced_parameters, + model=model, + calibration_dataset=calibration_dataset, + mode=mode, + preset=preset, + target_device=target_device, + subset_size=subset_size, + fast_bias_correction=fast_bias_correction, + model_type=model_type, + ignored_scope=ignored_scope, + advanced_parameters=advanced_parameters, ) if backend == BackendType.ONNX: from nncf.onnx.quantization.quantize_model import quantize_impl return quantize_impl( - model, - calibration_dataset, - preset, - target_device, - subset_size, - fast_bias_correction, - model_type, - ignored_scope, - advanced_parameters, + model=model, + calibration_dataset=calibration_dataset, + mode=mode, + preset=preset, + target_device=target_device, + subset_size=subset_size, + fast_bias_correction=fast_bias_correction, + model_type=model_type, + ignored_scope=ignored_scope, + advanced_parameters=advanced_parameters, ) if backend == BackendType.TENSORFLOW: from nncf.tensorflow.quantization.quantize_model import quantize_impl return quantize_impl( - model, - calibration_dataset, - preset, - target_device, - subset_size, - fast_bias_correction, - model_type, - ignored_scope, - advanced_parameters, + model=model, + calibration_dataset=calibration_dataset, + mode=mode, + preset=preset, + target_device=target_device, + subset_size=subset_size, + fast_bias_correction=fast_bias_correction, + model_type=model_type, + ignored_scope=ignored_scope, + advanced_parameters=advanced_parameters, ) if backend == BackendType.TORCH: from nncf.torch.quantization.quantize_model import quantize_impl return quantize_impl( - model, - calibration_dataset, - preset, - target_device, - subset_size, - fast_bias_correction, - model_type, - ignored_scope, - advanced_parameters, + model=model, + calibration_dataset=calibration_dataset, + mode=mode, + preset=preset, + target_device=target_device, + subset_size=subset_size, + fast_bias_correction=fast_bias_correction, + model_type=model_type, + ignored_scope=ignored_scope, + advanced_parameters=advanced_parameters, ) raise RuntimeError(f"Unsupported type of backend: {backend}") diff --git a/nncf/tensorflow/quantization/algorithm.py b/nncf/tensorflow/quantization/algorithm.py index e977e1debc0..ed8e0a8db67 100644 --- a/nncf/tensorflow/quantization/algorithm.py +++ b/nncf/tensorflow/quantization/algorithm.py @@ -40,8 +40,8 @@ from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup from nncf.common.quantization.structs import QuantizableWeightedLayerNode from nncf.common.quantization.structs import QuantizationConstraints -from nncf.common.quantization.structs import QuantizationMode from nncf.common.quantization.structs import QuantizationPreset +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import QuantizerGroup from nncf.common.schedulers import BaseCompressionScheduler diff --git a/nncf/tensorflow/quantization/collectors.py b/nncf/tensorflow/quantization/collectors.py index 22e9ab9eb85..1f44a72d84e 100644 --- a/nncf/tensorflow/quantization/collectors.py +++ b/nncf/tensorflow/quantization/collectors.py @@ -15,7 +15,7 @@ from nncf.common.quantization.collectors import QuantizationStatisticsCollector from nncf.common.quantization.collectors import QuantizerDescription -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.tensorflow.graph.utils import get_nncf_operations from nncf.tensorflow.quantization.utils import collect_fake_quantize_layers diff --git a/nncf/tensorflow/quantization/layers.py b/nncf/tensorflow/quantization/layers.py index 6c7b06c3c70..901ec287ab2 100644 --- a/nncf/tensorflow/quantization/layers.py +++ b/nncf/tensorflow/quantization/layers.py @@ -11,7 +11,7 @@ import tensorflow as tf -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.tensorflow.layers.custom_objects import NNCF_CUSTOM_OBJECTS from nncf.tensorflow.layers.custom_objects import NNCF_QUANTIZATION_OPERATIONS from nncf.tensorflow.layers.operation import InputType diff --git a/nncf/tensorflow/quantization/quantize_model.py b/nncf/tensorflow/quantization/quantize_model.py index ca72fb301a4..30bfdfe073e 100644 --- a/nncf/tensorflow/quantization/quantize_model.py +++ b/nncf/tensorflow/quantization/quantize_model.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional import tensorflow as tf @@ -21,6 +21,7 @@ from nncf.data import Dataset from nncf.data.dataset import DataProvider from nncf.parameters import ModelType +from nncf.parameters import QuantizationMode from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import apply_advanced_parameters_to_config @@ -133,10 +134,11 @@ def _create_nncf_config( def quantize_impl( model: tf.Module, calibration_dataset: Dataset, - preset: Union[QuantizationPreset, None], - target_device: TargetDevice, - subset_size: int, - fast_bias_correction: bool, + mode: Optional[QuantizationMode] = None, + preset: Optional[QuantizationPreset] = None, + target_device: TargetDevice = TargetDevice.ANY, + subset_size: int = 300, + fast_bias_correction: bool = True, model_type: Optional[ModelType] = None, ignored_scope: Optional[IgnoredScope] = None, advanced_parameters: Optional[AdvancedQuantizationParameters] = None, @@ -157,6 +159,9 @@ def quantize_impl( if target_device == TargetDevice.CPU_SPR: raise RuntimeError("target_device == CPU_SPR is not supported.") + if mode is not None: + raise ValueError(f"mode={mode} is not supported") + if preset is None: preset = QuantizationPreset.PERFORMANCE diff --git a/nncf/tensorflow/quantization/quantizers.py b/nncf/tensorflow/quantization/quantizers.py index f37ec35b940..8d396bb793a 100644 --- a/nncf/tensorflow/quantization/quantizers.py +++ b/nncf/tensorflow/quantization/quantizers.py @@ -14,7 +14,7 @@ import tensorflow as tf -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import QuantizerSpec from nncf.tensorflow.layers.custom_objects import NNCF_CUSTOM_OBJECTS diff --git a/nncf/torch/quantization/adjust_padding.py b/nncf/torch/quantization/adjust_padding.py index 9af55732655..dec435eda07 100644 --- a/nncf/torch/quantization/adjust_padding.py +++ b/nncf/torch/quantization/adjust_padding.py @@ -15,7 +15,7 @@ import torch from nncf.common.graph import NNCFNodeName -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.torch.layers import NNCFConv2d from nncf.torch.module_operations import UpdatePaddingValue from nncf.torch.nncf_network import NNCFNetwork diff --git a/nncf/torch/quantization/init_range.py b/nncf/torch/quantization/init_range.py index 8357ae649da..88a7c03c95f 100644 --- a/nncf/torch/quantization/init_range.py +++ b/nncf/torch/quantization/init_range.py @@ -23,7 +23,7 @@ from nncf.common.quantization.quantizer_setup import QuantizationPointBase from nncf.common.quantization.quantizer_setup import QuantizerSetupBase from nncf.common.quantization.structs import NonWeightQuantizerId -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerGroup from nncf.common.quantization.structs import QuantizerId from nncf.common.quantization.structs import WeightQuantizerId diff --git a/nncf/torch/quantization/layers.py b/nncf/torch/quantization/layers.py index b0f815c55de..39b1f9ce409 100644 --- a/nncf/torch/quantization/layers.py +++ b/nncf/torch/quantization/layers.py @@ -28,7 +28,7 @@ from nncf.common.quantization.quantizers import calculate_asymmetric_level_ranges from nncf.common.quantization.quantizers import calculate_symmetric_level_ranges from nncf.common.quantization.quantizers import get_num_levels -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import QuantizerSpec from nncf.common.utils.debug import is_debug diff --git a/nncf/torch/quantization/quantize_model.py b/nncf/torch/quantization/quantize_model.py index 91487604199..7aeba58802b 100644 --- a/nncf/torch/quantization/quantize_model.py +++ b/nncf/torch/quantization/quantize_model.py @@ -10,7 +10,7 @@ # limitations under the License. from copy import deepcopy -from typing import Optional, Union +from typing import Optional import torch @@ -18,6 +18,7 @@ from nncf.data import Dataset from nncf.parameters import CompressWeightsMode from nncf.parameters import ModelType +from nncf.parameters import QuantizationMode from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization @@ -32,10 +33,11 @@ def quantize_impl( model: torch.nn.Module, calibration_dataset: Dataset, - preset: Union[QuantizationPreset, None], - target_device: TargetDevice, - subset_size: int, - fast_bias_correction: bool, + mode: Optional[QuantizationMode] = None, + preset: Optional[QuantizationPreset] = None, + target_device: TargetDevice = TargetDevice.ANY, + subset_size: int = 300, + fast_bias_correction: bool = True, model_type: Optional[ModelType] = None, ignored_scope: Optional[IgnoredScope] = None, advanced_parameters: Optional[AdvancedQuantizationParameters] = None, @@ -47,6 +49,8 @@ def quantize_impl( raise ValueError(f"fast_bias_correction={fast_bias_correction} is not supported") if target_device == TargetDevice.CPU_SPR: raise RuntimeError("target_device == CPU_SPR is not supported") + if mode is not None: + raise ValueError(f"mode={mode} is not supported") copied_model = deepcopy(model) diff --git a/tests/common/quantization/test_minmax.py b/tests/common/quantization/test_minmax.py index 719012a5a02..7d973987c21 100644 --- a/tests/common/quantization/test_minmax.py +++ b/tests/common/quantization/test_minmax.py @@ -11,22 +11,32 @@ import pytest -from nncf.common.quantization.structs import QuantizationMode from nncf.common.quantization.structs import QuantizationPreset +from nncf.common.quantization.structs import QuantizationScheme from nncf.common.quantization.structs import QuantizerGroup from nncf.parameters import ModelType +from nncf.parameters import QuantizationMode +from nncf.parameters import TargetDevice +from nncf.quantization.advanced_parameters import FP8QuantizationParameters +from nncf.quantization.advanced_parameters import FP8Type +from nncf.quantization.advanced_parameters import OverflowFix from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization @pytest.mark.parametrize( "preset,model_type,activation_mode,weights_mode", [ - (None, None, QuantizationMode.SYMMETRIC, QuantizationMode.SYMMETRIC), - (QuantizationPreset.PERFORMANCE, None, QuantizationMode.SYMMETRIC, QuantizationMode.SYMMETRIC), - (QuantizationPreset.MIXED, None, QuantizationMode.ASYMMETRIC, QuantizationMode.SYMMETRIC), - (None, ModelType.TRANSFORMER, QuantizationMode.ASYMMETRIC, QuantizationMode.SYMMETRIC), - (QuantizationPreset.PERFORMANCE, ModelType.TRANSFORMER, QuantizationMode.SYMMETRIC, QuantizationMode.SYMMETRIC), - (QuantizationPreset.MIXED, ModelType.TRANSFORMER, QuantizationMode.ASYMMETRIC, QuantizationMode.SYMMETRIC), + (None, None, QuantizationScheme.SYMMETRIC, QuantizationScheme.SYMMETRIC), + (QuantizationPreset.PERFORMANCE, None, QuantizationScheme.SYMMETRIC, QuantizationScheme.SYMMETRIC), + (QuantizationPreset.MIXED, None, QuantizationScheme.ASYMMETRIC, QuantizationScheme.SYMMETRIC), + (None, ModelType.TRANSFORMER, QuantizationScheme.ASYMMETRIC, QuantizationScheme.SYMMETRIC), + ( + QuantizationPreset.PERFORMANCE, + ModelType.TRANSFORMER, + QuantizationScheme.SYMMETRIC, + QuantizationScheme.SYMMETRIC, + ), + (QuantizationPreset.MIXED, ModelType.TRANSFORMER, QuantizationScheme.ASYMMETRIC, QuantizationScheme.SYMMETRIC), ], ) def test_quantization_preset(preset, model_type, activation_mode, weights_mode): @@ -38,3 +48,127 @@ def test_quantization_preset(preset, model_type, activation_mode, weights_mode): == activation_mode ) assert global_quantizer_constraints[QuantizerGroup.WEIGHTS].qconf_attr_vs_constraint_dict["mode"] == weights_mode + + +@pytest.mark.parametrize( + "algo_params, is_error", + [ + ({"mode": QuantizationMode.FP8_E4M3}, True), + ( + { + "mode": QuantizationMode.FP8_E4M3, + "preset": QuantizationPreset.PERFORMANCE, + "target_device": TargetDevice.CPU, + "overflow_fix": OverflowFix.DISABLE, + "quantize_outputs": False, + "backend_params": None, + }, + False, + ), + ( + { + "mode": QuantizationMode.FP8_E4M3, + "preset": QuantizationPreset.MIXED, + "target_device": TargetDevice.GPU, + "overflow_fix": OverflowFix.FIRST_LAYER, + "quantize_outputs": True, + }, + True, + ), + ( + { + "mode": QuantizationMode.FP8_E4M3, + "target_device": TargetDevice.CPU_SPR, + "overflow_fix": OverflowFix.ENABLE, + }, + True, + ), + ], +) +def test_mode_against_default_map(algo_params, is_error): + default_values_to_compare = { + "_preset": QuantizationPreset.PERFORMANCE, + "_target_device": TargetDevice.CPU, + "_overflow_fix": OverflowFix.DISABLE, + "_quantize_outputs": False, + "_backend_params": None, + } + + qconf_attr_vs_constraint_dict_to_compare = {"mode": QuantizationScheme.SYMMETRIC} + + if is_error: + try: + minmax = MinMaxQuantization(**algo_params) + except RuntimeError: + pytest.xfail("Caught expected RuntimeError") + minmax = MinMaxQuantization(**algo_params) + for ref_parameter_name, ref_parameter_value in default_values_to_compare.items(): + parameter_value = getattr(minmax, ref_parameter_name) + assert parameter_value == ref_parameter_value + + global_quantizer_constraints = getattr(minmax, "_global_quantizer_constraints") + assert ( + global_quantizer_constraints[QuantizerGroup.ACTIVATIONS].qconf_attr_vs_constraint_dict + == qconf_attr_vs_constraint_dict_to_compare + ) + assert ( + global_quantizer_constraints[QuantizerGroup.WEIGHTS].qconf_attr_vs_constraint_dict + == qconf_attr_vs_constraint_dict_to_compare + ) + + +@pytest.mark.parametrize( + "mode, activations_quantization_params, weights_quantization_params", + [ + ( + QuantizationMode.FP8_E4M3, + None, + None, + ), + ( + QuantizationMode.FP8_E5M2, + None, + None, + ), + ( + QuantizationMode.FP8_E4M3, + FP8QuantizationParameters(destination_type=FP8Type.E4M3), + FP8QuantizationParameters(destination_type=FP8Type.E4M3), + ), + (QuantizationMode.FP8_E4M3, FP8QuantizationParameters(destination_type=FP8Type.E5M2), None), + ( + QuantizationMode.FP8_E5M2, + None, + FP8QuantizationParameters(destination_type=FP8Type.E4M3), + ), + ( + QuantizationMode.FP8_E5M2, + FP8QuantizationParameters(destination_type=FP8Type.E4M3), + FP8QuantizationParameters(destination_type=FP8Type.E4M3), + ), + ], +) +def test_mode_with_quantization_params(mode, activations_quantization_params, weights_quantization_params): + minmax = MinMaxQuantization( + mode=mode, + activations_quantization_params=activations_quantization_params, + weights_quantization_params=weights_quantization_params, + overflow_fix=OverflowFix.DISABLE, + preset=QuantizationPreset.PERFORMANCE, + ) + default_configuration_map = { + QuantizationMode.FP8_E4M3: FP8QuantizationParameters(destination_type=FP8Type.E4M3), + QuantizationMode.FP8_E5M2: FP8QuantizationParameters(destination_type=FP8Type.E5M2), + } + + quantization_params = getattr(minmax, "_quantization_params") + assert ( + quantization_params[QuantizerGroup.ACTIVATIONS] == default_configuration_map[mode] + if activations_quantization_params is None + else activations_quantization_params + ) + assert ( + quantization_params[QuantizerGroup.WEIGHTS] == default_configuration_map[mode] + if weights_quantization_params is None + else weights_quantization_params + ) diff --git a/tests/common/quantization/test_quantizer_propagation_graph.py b/tests/common/quantization/test_quantizer_propagation_graph.py index affc7df8bc2..f80541d5e39 100644 --- a/tests/common/quantization/test_quantizer_propagation_graph.py +++ b/tests/common/quantization/test_quantizer_propagation_graph.py @@ -33,7 +33,7 @@ from nncf.common.quantization.quantizer_setup import MultiConfigQuantizationPoint from nncf.common.quantization.quantizer_setup import MultiConfigQuantizerSetup from nncf.common.quantization.quantizer_setup import WeightQuantizationInsertionPoint -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import UnifiedScaleType from tests.common.quantization.metatypes import WEIGHT_LAYER_METATYPES diff --git a/tests/common/quantization/test_quantizer_propagation_solver.py b/tests/common/quantization/test_quantizer_propagation_solver.py index f50b7a484eb..6fd3ae45001 100644 --- a/tests/common/quantization/test_quantizer_propagation_solver.py +++ b/tests/common/quantization/test_quantizer_propagation_solver.py @@ -39,7 +39,7 @@ from nncf.common.quantization.quantizer_setup import MultiConfigQuantizationPoint from nncf.common.quantization.quantizer_setup import QuantizationPointId from nncf.common.quantization.quantizer_setup import WeightQuantizationInsertionPoint -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from tests.common.quantization.metatypes import DEFAULT_TEST_QUANT_TRAIT_MAP from tests.common.quantization.metatypes import BatchNormTestMetatype diff --git a/tests/common/test_statistics_aggregator.py b/tests/common/test_statistics_aggregator.py index 36127596e5f..bc37d9b9656 100644 --- a/tests/common/test_statistics_aggregator.py +++ b/tests/common/test_statistics_aggregator.py @@ -21,7 +21,7 @@ from nncf.common.factory import NNCFGraphFactory from nncf.common.graph.transformations.commands import TargetPoint from nncf.common.graph.transformations.commands import TargetType -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer diff --git a/tests/openvino/native/data/2023.3/reference_graphs/quantized/ConvModel_FC.dot b/tests/openvino/native/data/2023.3/reference_graphs/quantized/ConvModel_FC.dot new file mode 100644 index 00000000000..d3cda801b96 --- /dev/null +++ b/tests/openvino/native/data/2023.3/reference_graphs/quantized/ConvModel_FC.dot @@ -0,0 +1,47 @@ +strict digraph { +"0 Input_1" [id=0, type=Parameter]; +"1 Input_2" [id=1, type=Parameter]; +"2 Sub" [id=2, type=Subtract]; +"3 Add" [id=3, type=Add]; +"4 Sub/fc_output_0" [id=4, type=FakeConvert]; +"5 Mul" [id=5, type=Multiply]; +"6 Conv" [id=6, type=Convolution]; +"7 Transpose" [id=7, type=Transpose]; +"8 Conv_Add" [id=8, type=Add]; +"9 Concat_2666" [id=9, type=Concat]; +"10 Relu" [id=10, type=Relu]; +"11 Result" [id=11, type=Result]; +"12 Constant_2664" [id=12, type=Constant]; +"13 Constant_2662" [id=13, type=Constant]; +"14 Constant_2660" [id=14, type=Constant]; +"15 Bias" [id=15, type=Constant]; +"16 Conv/fc_weights_1" [id=16, type=FakeConvert]; +"17 Constant_5380" [id=17, type=Constant]; +"18 Constant_5379" [id=18, type=Constant]; +"19 Constant_2654" [id=19, type=Constant]; +"20 Constant_5377" [id=20, type=Constant]; +"21 Constant_5376" [id=21, type=Constant]; +"22 Constant_2652" [id=22, type=Constant]; +"0 Input_1" -> "2 Sub" [label="[1, 3, 4, 2]", style=solid]; +"1 Input_2" -> "3 Add" [label="[1, 3, 2, 4]", style=solid]; +"2 Sub" -> "4 Sub/fc_output_0" [label="[1, 3, 4, 2]", style=solid]; +"3 Add" -> "5 Mul" [label="[1, 3, 2, 4]", style=solid]; +"4 Sub/fc_output_0" -> "6 Conv" [label="[1, 3, 4, 2]", style=solid]; +"5 Mul" -> "7 Transpose" [label="[1, 3, 2, 4]", style=solid]; +"6 Conv" -> "8 Conv_Add" [label="[1, 3, 4, 2]", style=solid]; +"7 Transpose" -> "9 Concat_2666" [label="[1, 3, 4, 2]", style=solid]; +"8 Conv_Add" -> "10 Relu" [label="[1, 3, 4, 2]", style=solid]; +"9 Concat_2666" -> "11 Result" [label="[2, 3, 4, 2]", style=solid]; +"10 Relu" -> "9 Concat_2666" [label="[1, 3, 4, 2]", style=solid]; +"12 Constant_2664" -> "7 Transpose" [label="[4]", style=dashed]; +"13 Constant_2662" -> "5 Mul" [label="[1, 3, 1, 1]", style=solid]; +"14 Constant_2660" -> "3 Add" [label="[1, 3, 1, 1]", style=solid]; +"15 Bias" -> "8 Conv_Add" [label="[1, 3, 1, 1]", style=solid]; +"16 Conv/fc_weights_1" -> "6 Conv" [label="[3, 3, 1, 1]", style=solid]; +"17 Constant_5380" -> "16 Conv/fc_weights_1" [label="[3, 1, 1, 1]", style=solid]; +"18 Constant_5379" -> "16 Conv/fc_weights_1" [label="[3, 1, 1, 1]", style=solid]; +"19 Constant_2654" -> "16 Conv/fc_weights_1" [label="[3, 3, 1, 1]", style=solid]; +"20 Constant_5377" -> "4 Sub/fc_output_0" [label="[]", style=solid]; +"21 Constant_5376" -> "4 Sub/fc_output_0" [label="[]", style=solid]; +"22 Constant_2652" -> "2 Sub" [label="[1, 3, 1, 1]", style=solid]; +} diff --git a/tests/openvino/native/data/2023.3/reference_graphs/quantized/LinearModel_FC.dot b/tests/openvino/native/data/2023.3/reference_graphs/quantized/LinearModel_FC.dot new file mode 100644 index 00000000000..004302766d0 --- /dev/null +++ b/tests/openvino/native/data/2023.3/reference_graphs/quantized/LinearModel_FC.dot @@ -0,0 +1,31 @@ +strict digraph { +"0 Input" [id=0, type=Parameter]; +"1 Input/fc_output_0" [id=1, type=FakeConvert]; +"2 Reshape" [id=2, type=Reshape]; +"3 Add" [id=3, type=Add]; +"4 MatMul" [id=4, type=MatMul]; +"5 Result_Add" [id=5, type=Result]; +"6 Result_MatMul" [id=6, type=Result]; +"7 Constant_6" [id=7, type=Constant]; +"8 Constant_2" [id=8, type=Constant]; +"9 Constant_2646" [id=9, type=Constant]; +"10 Constant_2645" [id=10, type=Constant]; +"11 MatMul/fc_weights_1" [id=11, type=FakeConvert]; +"12 Constant_2649" [id=12, type=Constant]; +"13 Constant_2648" [id=13, type=Constant]; +"14 Constant_4" [id=14, type=Constant]; +"0 Input" -> "1 Input/fc_output_0" [label="[1, 3, 4, 2]", style=solid]; +"1 Input/fc_output_0" -> "2 Reshape" [label="[1, 3, 4, 2]", style=solid]; +"2 Reshape" -> "3 Add" [label="[1, 3, 2, 4]", style=solid]; +"2 Reshape" -> "4 MatMul" [label="[1, 3, 2, 4]", style=solid]; +"3 Add" -> "5 Result_Add" [label="[1, 3, 2, 4]", style=solid]; +"4 MatMul" -> "6 Result_MatMul" [label="[1, 3, 2, 5]", style=solid]; +"7 Constant_6" -> "3 Add" [label="[1, 3, 2, 4]", style=solid]; +"8 Constant_2" -> "2 Reshape" [label="[4]", style=dashed]; +"9 Constant_2646" -> "1 Input/fc_output_0" [label="[]", style=solid]; +"10 Constant_2645" -> "1 Input/fc_output_0" [label="[]", style=solid]; +"11 MatMul/fc_weights_1" -> "4 MatMul" [label="[4, 5]", style=solid]; +"12 Constant_2649" -> "11 MatMul/fc_weights_1" [label="[1, 5]", style=solid]; +"13 Constant_2648" -> "11 MatMul/fc_weights_1" [label="[1, 5]", style=solid]; +"14 Constant_4" -> "11 MatMul/fc_weights_1" [label="[4, 5]", style=solid]; +} diff --git a/tests/openvino/native/data/2023.3/reference_graphs/quantized/SharedConvModel_FC.dot b/tests/openvino/native/data/2023.3/reference_graphs/quantized/SharedConvModel_FC.dot new file mode 100644 index 00000000000..c66a76ebaa5 --- /dev/null +++ b/tests/openvino/native/data/2023.3/reference_graphs/quantized/SharedConvModel_FC.dot @@ -0,0 +1,26 @@ +strict digraph { +"0 Input" [id=0, type=Parameter]; +"1 Input/fc_output_0" [id=1, type=FakeConvert]; +"2 Conv_1" [id=2, type=Convolution]; +"3 Conv_2" [id=3, type=Convolution]; +"4 Result_1" [id=4, type=Result]; +"5 Result_2" [id=5, type=Result]; +"6 Conv_1/fc_weights_1" [id=6, type=FakeConvert]; +"7 Constant_7938" [id=7, type=Constant]; +"8 Constant_7937" [id=8, type=Constant]; +"9 Shared_conv_w" [id=9, type=Constant]; +"10 Constant_7935" [id=10, type=Constant]; +"11 Constant_7934" [id=11, type=Constant]; +"0 Input" -> "1 Input/fc_output_0" [label="[1, 3, 3, 3]", style=solid]; +"1 Input/fc_output_0" -> "2 Conv_1" [label="[1, 3, 3, 3]", style=solid]; +"1 Input/fc_output_0" -> "3 Conv_2" [label="[1, 3, 3, 3]", style=solid]; +"2 Conv_1" -> "4 Result_1" [label="[1, 3, 3, 3]", style=solid]; +"3 Conv_2" -> "5 Result_2" [label="[1, 3, 3, 3]", style=solid]; +"6 Conv_1/fc_weights_1" -> "2 Conv_1" [label="[3, 3, 1, 1]", style=solid]; +"6 Conv_1/fc_weights_1" -> "3 Conv_2" [label="[3, 3, 1, 1]", style=solid]; +"7 Constant_7938" -> "6 Conv_1/fc_weights_1" [label="[3, 1, 1, 1]", style=solid]; +"8 Constant_7937" -> "6 Conv_1/fc_weights_1" [label="[3, 1, 1, 1]", style=solid]; +"9 Shared_conv_w" -> "6 Conv_1/fc_weights_1" [label="[3, 3, 1, 1]", style=solid]; +"10 Constant_7935" -> "1 Input/fc_output_0" [label="[]", style=solid]; +"11 Constant_7934" -> "1 Input/fc_output_0" [label="[]", style=solid]; +} diff --git a/tests/openvino/native/quantization/test_graphs.py b/tests/openvino/native/quantization/test_graphs.py index 34ad51886a2..268f7e4b501 100644 --- a/tests/openvino/native/quantization/test_graphs.py +++ b/tests/openvino/native/quantization/test_graphs.py @@ -22,7 +22,9 @@ from nncf.openvino.quantization.quantize_model import quantize_impl from nncf.openvino.statistics.aggregator import OVStatisticsAggregator from nncf.parameters import ModelType +from nncf.parameters import QuantizationMode from nncf.parameters import TargetDevice +from nncf.quantization.advanced_parameters import OverflowFix from nncf.quantization.algorithms.smooth_quant.algorithm import SmoothQuant from tests.openvino.conftest import OPENVINO_NATIVE_TEST_ROOT from tests.openvino.native.common import compare_nncf_graphs @@ -219,3 +221,26 @@ def test_scaled_dot_product_attention_placement(q_params, tmp_path): bin_path = tmp_path / (result_name + ".bin") dump_model(quantized_model, str(xml_path), str(bin_path)) compare_nncf_graphs(quantized_model, path_ref_graph) + + +@pytest.mark.parametrize( + "model_creator_func", + [SYNTHETIC_MODELS.get("LinearModel"), SYNTHETIC_MODELS.get("ConvModel"), SYNTHETIC_MODELS.get("SharedConvModel")], +) +def test_synthetic_models_fc_placement(model_creator_func): + ov_major_version, ov_minor_version = get_openvino_major_minor_version() + if ov_major_version < 2023 or (ov_major_version == 2023 and ov_minor_version < 3): + pytest.xfail("FakeConvert is not supported until 2023.3") + model = model_creator_func() + quantized_model = quantize_model( + model.ov_model, + { + "preset": QuantizationPreset.PERFORMANCE, + "inplace_statistics": True, + "mode": QuantizationMode.FP8_E4M3, + "overflow_fix": OverflowFix.DISABLE, + }, + ) + + path_ref_graph = QUANTIZED_REF_GRAPHS_DIR / f"{model.ref_model_name}_FC.dot" + compare_nncf_graphs(quantized_model, path_ref_graph) diff --git a/tests/openvino/native/quantization/test_quantization_pipeline.py b/tests/openvino/native/quantization/test_quantization_pipeline.py index 34cc986e048..7395b4a0e28 100644 --- a/tests/openvino/native/quantization/test_quantization_pipeline.py +++ b/tests/openvino/native/quantization/test_quantization_pipeline.py @@ -21,7 +21,7 @@ from tests.openvino.native.models import ConvModel from tests.openvino.native.models import LinearModel from tests.openvino.native.models import MatMul2DModel -from tests.openvino.native.test_model_transformer import get_fq_nodes +from tests.openvino.native.test_model_transformer import get_nodes_by_type REF_FQ_NODES = [ (("MatMul", 1), ["Input/fq_output_0"]), @@ -44,7 +44,7 @@ def test_compress_weights(model_creator_func, ref_nodes): fast_bias_correction=True, ) - fq_nodes = get_fq_nodes(quantized_model) + fq_nodes = get_nodes_by_type(quantized_model, type_name="FakeQuantize") assert len(fq_nodes) == len(ref_fqs_names) for fq_name in fq_nodes: assert fq_name in ref_fqs_names @@ -72,7 +72,7 @@ def test_overflow_fix_applied(model_creator_func, ref_nodes): fast_bias_correction=True, ) - fq_nodes = get_fq_nodes(quantized_model) + fq_nodes = get_nodes_by_type(quantized_model, type_name="FakeQuantize") assert len(fq_nodes) == len(ref_fqs_names) for fq_name in fq_nodes: assert fq_name in ref_fqs_names diff --git a/tests/openvino/native/quantization/test_sanity.py b/tests/openvino/native/quantization/test_sanity.py index 911ce9a3854..088ce3f9c8d 100644 --- a/tests/openvino/native/quantization/test_sanity.py +++ b/tests/openvino/native/quantization/test_sanity.py @@ -66,8 +66,8 @@ def test_compression(data_dir, tmp_path, model, dataset, ref_metrics, advanced_p quantized_model = nncf.quantize( ov_model, calibration_dataset, - QuantizationPreset.PERFORMANCE, - TargetDevice.ANY, + preset=QuantizationPreset.PERFORMANCE, + target_device=TargetDevice.ANY, subset_size=300, fast_bias_correction=True, advanced_parameters=advanced_params, diff --git a/tests/openvino/native/test_model_transformer.py b/tests/openvino/native/test_model_transformer.py index 493c266ed14..84415f609aa 100644 --- a/tests/openvino/native/test_model_transformer.py +++ b/tests/openvino/native/test_model_transformer.py @@ -19,6 +19,7 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout +from nncf.experimental.tensor import Tensor from nncf.openvino.graph.model_transformer import OVModelTransformer from nncf.openvino.graph.node_utils import get_inplace_batch_mean_op from nncf.openvino.graph.node_utils import get_inplace_max_op @@ -29,6 +30,7 @@ from nncf.openvino.graph.node_utils import get_result_node_name from nncf.openvino.graph.transformations.commands import OVBiasCorrectionCommand from nncf.openvino.graph.transformations.commands import OVBiasInsertionCommand +from nncf.openvino.graph.transformations.commands import OVConvertInsertionCommand from nncf.openvino.graph.transformations.commands import OVFQNodeRemovingCommand from nncf.openvino.graph.transformations.commands import OVInplaceFnInsertionCommand from nncf.openvino.graph.transformations.commands import OVModelExtractionCommand @@ -36,9 +38,12 @@ from nncf.openvino.graph.transformations.commands import OVOutputInsertionCommand from nncf.openvino.graph.transformations.commands import OVQuantizerInsertionCommand from nncf.openvino.graph.transformations.commands import OVTargetPoint +from nncf.quantization.advanced_parameters import FP8Type +from nncf.quantization.fake_quantize import FakeConvertParameters from nncf.quantization.fake_quantize import FakeQuantizeParameters from tests.openvino.conftest import OPENVINO_NATIVE_TEST_ROOT from tests.openvino.native.common import compare_nncf_graphs +from tests.openvino.native.common import get_openvino_major_minor_version from tests.openvino.native.common import get_openvino_version from tests.openvino.native.models import ConvModel from tests.openvino.native.models import ConvNotBiasModel @@ -54,8 +59,11 @@ TARGET_INSERT_LAYERS = [["Add"], ["MatMul"], ["Add", "MatMul"]] TARGET_PRE_LAYER_FQS = [["Add/fq_input_0"], ["MatMul/fq_input_0"], ["Add/fq_input_0", "MatMul/fq_input_0"]] +TARGET_PRE_LAYER_FCS = [["Add/fc_input_0"], ["MatMul/fc_input_0"], ["Add/fc_input_0", "MatMul/fc_input_0"]] TARGET_POST_LAYER_FQS = [["Add/fq_output_0"], ["MatMul/fq_output_0"], ["Add/fq_output_0", "MatMul/fq_output_0"]] +TARGET_POST_LAYER_FCS = [["Add/fc_output_0"], ["MatMul/fc_output_0"], ["Add/fc_output_0", "MatMul/fc_output_0"]] TARGET_WEIGHTS_FQS = [["Add/fq_weights_1"], ["MatMul/fq_weights_1"], ["Add/fq_weights_1", "MatMul/fq_weights_1"]] +TARGET_WEIGHTS_FCS = [["Add/fc_weights_1"], ["MatMul/fc_weights_1"], ["Add/fc_weights_1", "MatMul/fc_weights_1"]] def create_transformed_model(model, target_layers, target_type, command_type, port_id=0, command_kwargs=None): @@ -84,15 +92,27 @@ def get_extra_outputs(original_model, transformed_model): return extra_outputs -def get_fq_nodes(model): +def get_nodes_by_type(model: ov.Model, type_name: str) -> List[ov.Node]: fq_nodes = [] for op in model.get_ops(): - if op.get_type_name() == "FakeQuantize": + if op.get_type_name() == type_name: fq_nodes.append(op.get_friendly_name()) return fq_nodes +def create_fake_quantize_params() -> FakeQuantizeParameters: + min_values = Tensor(np.zeros((1, 1, 1, 1)).astype(np.float32)) + max_values = Tensor(np.ones((1, 1, 1, 1)).astype(np.float32)) + return FakeQuantizeParameters(min_values, max_values, min_values, max_values, levels=256) + + +def create_fake_convert_params(destination_type: FP8Type) -> FakeConvertParameters: + scale = Tensor(np.ones((1)).astype(np.float32)) + shift = Tensor(np.zeros((1)).astype(np.float32)) + return FakeConvertParameters(scale, shift, destination_type) + + @dataclass class InplaceOpTestCase: name: str @@ -433,67 +453,121 @@ def test_node_removing(target_layers): def test_fq_insertion_pre_layer(target_layers, ref_fq_names): model = LinearModel().ov_model - min_values = np.zeros((1, 1, 1, 1)).astype(np.float32) - max_values = np.ones((1, 1, 1, 1)).astype(np.float32) - quantizer_parameters = FakeQuantizeParameters(min_values, max_values, min_values, max_values, levels=256) - transformed_model = create_transformed_model( model, target_layers, TargetType.PRE_LAYER_OPERATION, OVQuantizerInsertionCommand, - command_kwargs={"quantizer_parameters": quantizer_parameters}, + command_kwargs={"quantizer_parameters": create_fake_quantize_params()}, ) - fq_nodes = get_fq_nodes(transformed_model) + fq_nodes = get_nodes_by_type(transformed_model, type_name="FakeQuantize") assert len(fq_nodes) == len(ref_fq_names) for fq_name in fq_nodes: assert fq_name in ref_fq_names +@pytest.mark.parametrize("target_layers, ref_fс_names", zip(TARGET_INSERT_LAYERS, TARGET_PRE_LAYER_FCS)) +def test_fc_insertion_pre_layer(target_layers, ref_fс_names): + ov_major_version, ov_minor_version = get_openvino_major_minor_version() + if ov_major_version < 2023 or (ov_major_version == 2023 and ov_minor_version < 3): + pytest.xfail("FakeConvert is not supported until 2023.3") + model = LinearModel().ov_model + + transformed_model = create_transformed_model( + model, + target_layers, + TargetType.PRE_LAYER_OPERATION, + OVConvertInsertionCommand, + command_kwargs={"convert_parameters": create_fake_convert_params(destination_type=FP8Type.E4M3)}, + ) + fc_nodes = get_nodes_by_type(transformed_model, type_name="FakeConvert") + + assert len(fc_nodes) == len(ref_fс_names) + for fc_name in fc_nodes: + assert fc_name in ref_fс_names + + @pytest.mark.parametrize("target_layers, ref_fq_names", zip(TARGET_INSERT_LAYERS, TARGET_POST_LAYER_FQS)) def test_fq_insertion_post_layer(target_layers, ref_fq_names): model = LinearModel().ov_model - min_values = np.zeros((1, 1, 1, 1)).astype(np.float32) - max_values = np.ones((1, 1, 1, 1)).astype(np.float32) - quantizer_parameters = FakeQuantizeParameters(min_values, max_values, min_values, max_values, levels=256) transformed_model = create_transformed_model( model, target_layers, TargetType.POST_LAYER_OPERATION, OVQuantizerInsertionCommand, - command_kwargs={"quantizer_parameters": quantizer_parameters}, + command_kwargs={"quantizer_parameters": create_fake_quantize_params()}, ) - fq_nodes = get_fq_nodes(transformed_model) + fq_nodes = get_nodes_by_type(transformed_model, type_name="FakeQuantize") assert len(fq_nodes) == len(ref_fq_names) for fq_name in fq_nodes: assert fq_name in ref_fq_names +@pytest.mark.parametrize("target_layers, ref_fс_names", zip(TARGET_INSERT_LAYERS, TARGET_POST_LAYER_FCS)) +def test_fc_insertion_post_layer(target_layers, ref_fс_names): + ov_major_version, ov_minor_version = get_openvino_major_minor_version() + if ov_major_version < 2023 or (ov_major_version == 2023 and ov_minor_version < 3): + pytest.xfail("FakeConvert is not supported until 2023.3") + model = LinearModel().ov_model + + transformed_model = create_transformed_model( + model, + target_layers, + TargetType.POST_LAYER_OPERATION, + OVConvertInsertionCommand, + command_kwargs={"convert_parameters": create_fake_convert_params(destination_type=FP8Type.E4M3)}, + ) + fc_nodes = get_nodes_by_type(transformed_model, type_name="FakeConvert") + + assert len(fc_nodes) == len(ref_fс_names) + for fc_name in fc_nodes: + assert fc_name in ref_fс_names + + @pytest.mark.parametrize("target_layers, ref_fq_names", zip(TARGET_INSERT_LAYERS, TARGET_WEIGHTS_FQS)) def test_fq_insertion_weights(target_layers, ref_fq_names): model = LinearModel().ov_model - min_values = np.zeros((1, 1, 1, 1)).astype(np.float32) - max_values = np.ones((1, 1, 1, 1)).astype(np.float32) - quantizer_parameters = FakeQuantizeParameters(min_values, max_values, min_values, max_values, levels=256) transformed_model = create_transformed_model( model, target_layers, TargetType.OPERATION_WITH_WEIGHTS, OVQuantizerInsertionCommand, - 1, - {"quantizer_parameters": quantizer_parameters}, + port_id=1, + command_kwargs={"quantizer_parameters": create_fake_quantize_params()}, ) - fq_nodes = get_fq_nodes(transformed_model) + fq_nodes = get_nodes_by_type(transformed_model, type_name="FakeQuantize") assert len(fq_nodes) == len(ref_fq_names) for fq_name in fq_nodes: assert fq_name in ref_fq_names +@pytest.mark.parametrize("target_layers, ref_fс_names", zip(TARGET_INSERT_LAYERS, TARGET_WEIGHTS_FCS)) +def test_fc_insertion_weights(target_layers, ref_fс_names): + ov_major_version, ov_minor_version = get_openvino_major_minor_version() + if ov_major_version < 2023 or (ov_major_version == 2023 and ov_minor_version < 3): + pytest.xfail("FakeConvert is not supported until 2023.3") + model = LinearModel().ov_model + + transformed_model = create_transformed_model( + model, + target_layers, + TargetType.OPERATION_WITH_WEIGHTS, + OVConvertInsertionCommand, + port_id=1, + command_kwargs={"convert_parameters": create_fake_convert_params(destination_type=FP8Type.E4M3)}, + ) + fc_nodes = get_nodes_by_type(transformed_model, type_name="FakeConvert") + + assert len(fc_nodes) == len(ref_fс_names) + for fc_name in fc_nodes: + assert fc_name in ref_fс_names + + MODELS_WITH_PARAMETERS = [ { "model": ConvModel().ov_model, diff --git a/tests/openvino/tools/calibrate.py b/tests/openvino/tools/calibrate.py index 5e86a329caa..15200ff3b5c 100644 --- a/tests/openvino/tools/calibrate.py +++ b/tests/openvino/tools/calibrate.py @@ -37,14 +37,15 @@ import nncf from nncf.common.deprecation import warning_deprecated from nncf.common.logging.logger import set_log_file -from nncf.common.quantization.structs import QuantizationMode from nncf.common.quantization.structs import QuantizationPreset +from nncf.common.quantization.structs import QuantizationScheme from nncf.data.dataset import DataProvider from nncf.openvino.pot.quantization.quantize_model import ( quantize_with_accuracy_control_impl as pot_quantize_with_native_accuracy_control, ) from nncf.parameters import DropType from nncf.parameters import ModelType +from nncf.parameters import QuantizationMode from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedAccuracyRestorerParameters from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters @@ -105,7 +106,17 @@ def parse_args(): class CustomJSONEncoder(json.JSONEncoder): def default(self, o): if isinstance( - o, (TargetDevice, ModelType, QuantizationPreset, OverflowFix, StatisticsType, AggregatorType, DropType) + o, + ( + TargetDevice, + ModelType, + QuantizationPreset, + OverflowFix, + StatisticsType, + AggregatorType, + DropType, + QuantizationMode, + ), ): return o.value if isinstance(o, (IgnoredScope, AdvancedQuantizationParameters, AdvancedAccuracyRestorerParameters)): @@ -431,9 +442,9 @@ def update_quantization_parameters(quantization_params, pot_config): mode = pot_config.get("mode") if mode is not None: if mode == "symmetric": - quantization_params.mode = QuantizationMode.SYMMETRIC + quantization_params.mode = QuantizationScheme.SYMMETRIC elif mode == "asymmetric": - quantization_params.mode = QuantizationMode.ASYMMETRIC + quantization_params.mode = QuantizationScheme.ASYMMETRIC else: raise ValueError(f"mode = {mode} is not supported") granularity = pot_config.get("granularity") diff --git a/tests/post_training/test_templates/test_calculate_quantizer_parameters.py b/tests/post_training/test_templates/test_calculate_quantizer_parameters.py index ebcc5df7e75..9c643fb0a63 100644 --- a/tests/post_training/test_templates/test_calculate_quantizer_parameters.py +++ b/tests/post_training/test_templates/test_calculate_quantizer_parameters.py @@ -16,7 +16,7 @@ import numpy as np import pytest -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import QuantizerGroup from nncf.experimental.tensor import functions as fns diff --git a/tests/post_training/test_templates/test_ptq_params.py b/tests/post_training/test_templates/test_ptq_params.py index 6d4ad6fe99d..ec8c2155da6 100644 --- a/tests/post_training/test_templates/test_ptq_params.py +++ b/tests/post_training/test_templates/test_ptq_params.py @@ -19,8 +19,8 @@ from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.operator_metatypes import OutputNoopMetatype from nncf.common.graph.transformations.commands import TargetType -from nncf.common.quantization.structs import QuantizationMode from nncf.common.quantization.structs import QuantizationPreset +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import QuantizerGroup from nncf.common.tensor_statistics.statistic_point import StatisticPoint diff --git a/tests/post_training/test_templates/test_quantizer_config.py b/tests/post_training/test_templates/test_quantizer_config.py index 9f026250769..b058d1cbe7c 100644 --- a/tests/post_training/test_templates/test_quantizer_config.py +++ b/tests/post_training/test_templates/test_quantizer_config.py @@ -21,8 +21,8 @@ from nncf.common.quantization.quantizer_setup import ActivationQuantizationInsertionPoint from nncf.common.quantization.quantizer_setup import SingleConfigQuantizationPoint from nncf.common.quantization.quantizer_setup import WeightQuantizationInsertionPoint -from nncf.common.quantization.structs import QuantizationMode from nncf.common.quantization.structs import QuantizationPreset +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import QuantizerGroup from nncf.common.tensor_statistics.collectors import ReductionAxes diff --git a/tests/tensorflow/quantization/test_algorithm_quantization.py b/tests/tensorflow/quantization/test_algorithm_quantization.py index b26438ca564..2964efe7e88 100644 --- a/tests/tensorflow/quantization/test_algorithm_quantization.py +++ b/tests/tensorflow/quantization/test_algorithm_quantization.py @@ -17,7 +17,7 @@ # TODO(nlyalyus): WA for the bug 58886, QuantizationMode should be imported after nncf.tensorflow. # Otherwise test_quantize_inputs and test_quantize_outputs_removal will fail, because of invalid inputs quantization -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.tensorflow.graph.metatypes.matcher import get_keras_layer_metatype from nncf.tensorflow.layers.custom_objects import NNCF_QUANTIZATION_OPERATIONS from nncf.tensorflow.layers.data_layout import get_channel_axis diff --git a/tests/tensorflow/quantization/test_builder_state.py b/tests/tensorflow/quantization/test_builder_state.py index 70b01b0221d..482b421ac5b 100644 --- a/tests/tensorflow/quantization/test_builder_state.py +++ b/tests/tensorflow/quantization/test_builder_state.py @@ -17,7 +17,7 @@ from examples.tensorflow.classification.main import load_compression_state from nncf.common.graph.transformations.commands import TargetPoint from nncf.common.graph.transformations.commands import TargetType -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.tensorflow import create_compression_callbacks from nncf.tensorflow import register_default_init_args from nncf.tensorflow.callbacks.checkpoint_callback import CheckpointManagerCallback diff --git a/tests/tensorflow/quantization/test_overflow_issue.py b/tests/tensorflow/quantization/test_overflow_issue.py index 83cd2df8728..73e4832eb3e 100644 --- a/tests/tensorflow/quantization/test_overflow_issue.py +++ b/tests/tensorflow/quantization/test_overflow_issue.py @@ -13,7 +13,7 @@ import pytest import tensorflow as tf -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.tensorflow.layers.custom_objects import NNCF_QUANTIZATION_OPERATIONS from nncf.tensorflow.layers.wrapper import NNCFWrapper from nncf.tensorflow.quantization.quantizers import Quantizer diff --git a/tests/tensorflow/quantization/test_ptq_params.py b/tests/tensorflow/quantization/test_ptq_params.py index d42ff974815..acc38abf5a6 100644 --- a/tests/tensorflow/quantization/test_ptq_params.py +++ b/tests/tensorflow/quantization/test_ptq_params.py @@ -14,10 +14,10 @@ from nncf import NNCFConfig from nncf.common.quantization.structs import QuantizationPreset +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import OverflowFix -from nncf.quantization.advanced_parameters import QuantizationMode from nncf.quantization.advanced_parameters import QuantizationParameters from nncf.quantization.range_estimator import RangeEstimatorParametersSet from nncf.scopes import IgnoredScope diff --git a/tests/tensorflow/quantization/test_range_init.py b/tests/tensorflow/quantization/test_range_init.py index ad3cdf893e0..0de8330da62 100644 --- a/tests/tensorflow/quantization/test_range_init.py +++ b/tests/tensorflow/quantization/test_range_init.py @@ -16,7 +16,7 @@ from nncf.common.quantization.initialization.range import PerLayerRangeInitConfig from nncf.common.quantization.initialization.range import RangeInitConfig -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.tensorflow.layers.operation import InputType from nncf.tensorflow.layers.wrapper import NNCFWrapper diff --git a/tests/tensorflow/test_transformations.py b/tests/tensorflow/test_transformations.py index 40811afd1cc..08eec219511 100644 --- a/tests/tensorflow/test_transformations.py +++ b/tests/tensorflow/test_transformations.py @@ -20,7 +20,7 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.commands import TransformationPriority from nncf.common.graph.transformations.commands import TransformationType -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.utils.dot_file_rw import write_dot_graph from nncf.tensorflow import tf_internals diff --git a/tests/torch/conftest.py b/tests/torch/conftest.py index 1a428014f57..203c014c32f 100644 --- a/tests/torch/conftest.py +++ b/tests/torch/conftest.py @@ -22,7 +22,7 @@ import torch except: # noqa: E722 torch = None -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from tests.shared.case_collection import COMMON_SCOPE_MARKS_VS_OPTIONS from tests.shared.case_collection import skip_marked_cases_if_options_not_specified from tests.shared.install_fixtures import tmp_venv_with_nncf # noqa: F401 diff --git a/tests/torch/ptq/test_calculation_quantizer_params.py b/tests/torch/ptq/test_calculation_quantizer_params.py index 5b37790349f..811228b5d64 100644 --- a/tests/torch/ptq/test_calculation_quantizer_params.py +++ b/tests/torch/ptq/test_calculation_quantizer_params.py @@ -21,8 +21,8 @@ from nncf import Dataset from nncf import NNCFConfig from nncf.common.graph.transformations.commands import TargetType -from nncf.common.quantization.structs import QuantizationMode from nncf.common.quantization.structs import QuantizationPreset +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import QuantizerGroup from nncf.experimental.tensor import Tensor diff --git a/tests/torch/quantization/test_algo_quantization.py b/tests/torch/quantization/test_algo_quantization.py index c3e42f11173..89a95d164c8 100644 --- a/tests/torch/quantization/test_algo_quantization.py +++ b/tests/torch/quantization/test_algo_quantization.py @@ -25,7 +25,7 @@ from nncf.api.compression import CompressionScheduler from nncf.common.hardware.config import HWConfigType from nncf.common.quantization.structs import NonWeightQuantizerId -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import WeightQuantizerId from nncf.common.utils.debug import nncf_debug diff --git a/tests/torch/quantization/test_functions.py b/tests/torch/quantization/test_functions.py index fa17bbe69fc..edec62bb1b7 100644 --- a/tests/torch/quantization/test_functions.py +++ b/tests/torch/quantization/test_functions.py @@ -16,7 +16,7 @@ from torch.autograd import Variable from torch.distributions.uniform import Uniform -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.torch.quantization.quantize_functions import asymmetric_quantize from nncf.torch.quantization.quantize_functions import get_scale_zp_from_input_low_input_high from nncf.torch.quantization.quantize_functions import symmetric_quantize diff --git a/tests/torch/quantization/test_hw_config.py b/tests/torch/quantization/test_hw_config.py index 4cdbdedaa54..ee91a648732 100644 --- a/tests/torch/quantization/test_hw_config.py +++ b/tests/torch/quantization/test_hw_config.py @@ -12,7 +12,7 @@ import torch from nncf.common.quantization.quantizer_setup import DEFAULT_QUANTIZER_CONFIG -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.torch.dynamic_graph.io_handling import FillerInputElement from nncf.torch.dynamic_graph.io_handling import FillerInputInfo from nncf.torch.hardware.config import PTHWConfig diff --git a/tests/torch/quantization/test_onnx_export.py b/tests/torch/quantization/test_onnx_export.py index dfae2ea966f..7b875573617 100644 --- a/tests/torch/quantization/test_onnx_export.py +++ b/tests/torch/quantization/test_onnx_export.py @@ -17,7 +17,7 @@ from torch import nn from nncf import NNCFConfig -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.torch.quantization.layers import QUANTIZATION_MODULES from nncf.torch.quantization.layers import AsymmetricQuantizer from nncf.torch.quantization.layers import BaseQuantizer diff --git a/tests/torch/quantization/test_overflow_issue_export.py b/tests/torch/quantization/test_overflow_issue_export.py index 04d24f9eea9..2c12987a205 100644 --- a/tests/torch/quantization/test_overflow_issue_export.py +++ b/tests/torch/quantization/test_overflow_issue_export.py @@ -16,10 +16,10 @@ import torch from torch import nn +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.torch.checkpoint_loading import load_state from nncf.torch.quantization.layers import AsymmetricQuantizer from nncf.torch.quantization.layers import PTQuantizerSpec -from nncf.torch.quantization.layers import QuantizationMode from nncf.torch.quantization.layers import SymmetricQuantizer from tests.torch.helpers import TwoConvTestModel from tests.torch.helpers import create_compressed_model_and_algo_for_test diff --git a/tests/torch/quantization/test_range_init.py b/tests/torch/quantization/test_range_init.py index 8d0152e0a63..84c20da89bb 100644 --- a/tests/torch/quantization/test_range_init.py +++ b/tests/torch/quantization/test_range_init.py @@ -30,7 +30,7 @@ from nncf.common.quantization.quantizer_setup import ActivationQuantizationInsertionPoint from nncf.common.quantization.quantizer_setup import SingleConfigQuantizationPoint from nncf.common.quantization.quantizer_setup import WeightQuantizationInsertionPoint -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import QuantizerGroup from nncf.config import NNCFConfig diff --git a/tests/torch/quantization/test_strip.py b/tests/torch/quantization/test_strip.py index 3b1f1940ce2..e1127572555 100644 --- a/tests/torch/quantization/test_strip.py +++ b/tests/torch/quantization/test_strip.py @@ -20,7 +20,7 @@ from nncf.common.quantization.quantizers import calculate_asymmetric_level_ranges from nncf.common.quantization.quantizers import calculate_symmetric_level_ranges from nncf.common.quantization.quantizers import get_num_levels -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.config import NNCFConfig from nncf.torch.nncf_network import ExtraCompressionModuleType from nncf.torch.quantization.layers import AsymmetricQuantizer diff --git a/tests/torch/quantization/test_tracing.py b/tests/torch/quantization/test_tracing.py index 06068f6b5e4..d054c175a92 100644 --- a/tests/torch/quantization/test_tracing.py +++ b/tests/torch/quantization/test_tracing.py @@ -11,7 +11,7 @@ import torch from torch import nn -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.torch.quantization.layers import AsymmetricQuantizer from nncf.torch.quantization.layers import PTQuantizerSpec from nncf.torch.quantization.layers import SymmetricQuantizer diff --git a/tests/torch/test_model_transformer.py b/tests/torch/test_model_transformer.py index 7aab94c2b08..992a99ad914 100644 --- a/tests/torch/test_model_transformer.py +++ b/tests/torch/test_model_transformer.py @@ -31,7 +31,7 @@ from nncf.common.insertion_point_graph import InsertionPointGraphNodeType from nncf.common.insertion_point_graph import PostHookInsertionPoint from nncf.common.insertion_point_graph import PreHookInsertionPoint -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.utils.backend import BackendType from nncf.common.utils.dot_file_rw import get_graph_without_data from nncf.common.utils.dot_file_rw import read_dot_graph diff --git a/tools/benchmark_quantize_layers.py b/tools/benchmark_quantize_layers.py index e1554e2a131..3ffeeb78c05 100644 --- a/tools/benchmark_quantize_layers.py +++ b/tools/benchmark_quantize_layers.py @@ -21,7 +21,7 @@ import torch.multiprocessing as mp from tqdm import tqdm -from nncf.common.quantization.structs import QuantizationMode +from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.torch.quantization.layers import AsymmetricQuantizer from nncf.torch.quantization.layers import BaseQuantizer from nncf.torch.quantization.layers import PTQuantizerSpec