diff --git a/model_compression_toolkit/constants.py b/model_compression_toolkit/constants.py index 7d5817036..ec335a608 100644 --- a/model_compression_toolkit/constants.py +++ b/model_compression_toolkit/constants.py @@ -18,9 +18,6 @@ TENSORFLOW = 'tensorflow' PYTORCH = 'pytorch' -# Metadata fields -MCT_VERSION = 'mct_version' -TPC_VERSION = 'tpc_version' WEIGHTS_SIGNED = True # Minimal threshold to use for quantization ranges: diff --git a/model_compression_toolkit/core/common/graph/base_node.py b/model_compression_toolkit/core/common/graph/base_node.py index b90bc6a87..4b560f301 100644 --- a/model_compression_toolkit/core/common/graph/base_node.py +++ b/model_compression_toolkit/core/common/graph/base_node.py @@ -22,8 +22,9 @@ ACTIVATION_N_BITS_ATTRIBUTE, FP32_BYTES_PER_PARAMETER from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig from model_compression_toolkit.logger import Logger -from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationConfigOptions, \ - TargetPlatformCapabilities, LayerFilterParams, OpQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import QuantizationConfigOptions, \ + OpQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities, LayerFilterParams class BaseNode: diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py index ff3a27c87..b826b2c19 100644 --- a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py @@ -23,8 +23,8 @@ from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner -from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities, \ - QuantizationConfigOptions +from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import QuantizationConfigOptions def compute_resource_utilization_data(in_model: Any, diff --git a/model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py b/model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py index 41c993e6a..0fe5c7c94 100644 --- a/model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py +++ b/model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py @@ -17,8 +17,8 @@ from model_compression_toolkit.core import QuantizationConfig from model_compression_toolkit.core.common.quantization.node_quantization_config import BaseNodeQuantizationConfig, \ NodeWeightsQuantizationConfig, NodeActivationQuantizationConfig -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import AttributeQuantizationConfig, \ + OpQuantizationConfig from model_compression_toolkit.logger import Logger diff --git a/model_compression_toolkit/core/common/quantization/node_quantization_config.py b/model_compression_toolkit/core/common/quantization/node_quantization_config.py index a790cbc77..cfc36698e 100644 --- a/model_compression_toolkit/core/common/quantization/node_quantization_config.py +++ b/model_compression_toolkit/core/common/quantization/node_quantization_config.py @@ -25,8 +25,8 @@ from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig, \ QuantizationErrorMethod -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import AttributeQuantizationConfig, \ + OpQuantizationConfig ########################################## diff --git a/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py b/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py index 8c9c759d2..ca3d7c733 100644 --- a/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py +++ b/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py @@ -15,7 +15,8 @@ import numpy as np from typing import Dict, Union -from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod, Signedness +from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import Signedness from model_compression_toolkit.core.common.collectors.statistics_collector import BaseStatsCollector from model_compression_toolkit.core.common.quantization import quantization_params_generation from model_compression_toolkit.core.common.node_prior_info import NodePriorInfo diff --git a/model_compression_toolkit/core/common/quantization/set_node_quantization_config.py b/model_compression_toolkit/core/common/quantization/set_node_quantization_config.py index e005f1a2e..2e6cc8d9d 100644 --- a/model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +++ b/model_compression_toolkit/core/common/quantization/set_node_quantization_config.py @@ -33,7 +33,7 @@ from model_compression_toolkit.core.common.quantization.quantization_fn_selection import \ get_weights_quantization_fn from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import OpQuantizationConfig, \ +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import OpQuantizationConfig, \ QuantizationConfigOptions diff --git a/model_compression_toolkit/core/common/statistics_correction/apply_activation_bias_correction_to_graph.py b/model_compression_toolkit/core/common/statistics_correction/apply_activation_bias_correction_to_graph.py index 293e3dcce..5e3945ca1 100644 --- a/model_compression_toolkit/core/common/statistics_correction/apply_activation_bias_correction_to_graph.py +++ b/model_compression_toolkit/core/common/statistics_correction/apply_activation_bias_correction_to_graph.py @@ -17,7 +17,7 @@ from model_compression_toolkit.core.common import BaseNode, Graph from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig -from model_compression_toolkit.target_platform_capabilities.target_platform import AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import AttributeQuantizationConfig def apply_activation_bias_correction_to_graph(graph: Graph, diff --git a/model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py b/model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py index 3a37a97d9..72dd834bd 100644 --- a/model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py +++ b/model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py @@ -19,7 +19,7 @@ from model_compression_toolkit.core.common import Graph, BaseNode from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig -from model_compression_toolkit.target_platform_capabilities.target_platform import AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import AttributeQuantizationConfig def apply_bias_correction_to_graph(graph_to_apply_bias_correction: Graph, diff --git a/model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py b/model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py index 482fa4fce..256501365 100644 --- a/model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py +++ b/model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py @@ -26,8 +26,8 @@ from model_compression_toolkit.core.common.graph.base_graph import Graph from model_compression_toolkit.core.common.graph.base_node import BaseNode from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher -from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod, \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import AttributeQuantizationConfig class BatchNormalizationReconstruction(common.BaseSubstitution): diff --git a/model_compression_toolkit/core/common/substitutions/shift_negative_activation.py b/model_compression_toolkit/core/common/substitutions/shift_negative_activation.py index f5f643295..73e216885 100644 --- a/model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +++ b/model_compression_toolkit/core/common/substitutions/shift_negative_activation.py @@ -22,8 +22,8 @@ from model_compression_toolkit.core.common import FrameworkInfo, Graph, BaseNode from model_compression_toolkit.constants import THRESHOLD, SIGNED, SHIFT_NEGATIVE_NON_LINEAR_NUM_BITS from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher -from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod, \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import AttributeQuantizationConfig from model_compression_toolkit.core.common.quantization.set_node_quantization_config import create_node_activation_qc, \ set_quantization_configs_to_node from model_compression_toolkit.core.common.quantization.core_config import CoreConfig diff --git a/model_compression_toolkit/metadata.py b/model_compression_toolkit/metadata.py index bcbd73e76..192baad82 100644 --- a/model_compression_toolkit/metadata.py +++ b/model_compression_toolkit/metadata.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== +from dataclasses import dataclass, asdict from typing import Dict, Any -from model_compression_toolkit.constants import MCT_VERSION, TPC_VERSION, OPERATORS_SCHEDULING, FUSED_NODES_MAPPING, \ - CUTS, MAX_CUT, OP_ORDER, OP_RECORD, SHAPE, NODE_OUTPUT_INDEX, NODE_NAME, TOTAL_SIZE, MEM_ELEMENTS +from model_compression_toolkit.constants import OPERATORS_SCHEDULING, FUSED_NODES_MAPPING, CUTS, MAX_CUT, OP_ORDER, \ + OP_RECORD, SHAPE, NODE_OUTPUT_INDEX, NODE_NAME, TOTAL_SIZE, MEM_ELEMENTS from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import SchedulerInfo from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities @@ -43,13 +44,21 @@ def create_model_metadata(tpc: TargetPlatformCapabilities, def get_versions_dict(tpc) -> Dict: """ - Returns: A dictionary with TPC and MCT versions. + Returns: A dictionary with TPC, MCT and TPC-Schema versions. """ # imported inside to avoid circular import error from model_compression_toolkit import __version__ as mct_version - tpc_version = f'{tpc.name}.{tpc.version}' - return {MCT_VERSION: mct_version, TPC_VERSION: tpc_version} + + @dataclass + class TPCVersions: + mct_version: str + tpc_minor_version: str = f'{tpc.tp_model.tpc_minor_version}' + tpc_patch_version: str = f'{tpc.tp_model.tpc_patch_version}' + tpc_platform_type: str = f'{tpc.tp_model.tpc_platform_type}' + tpc_schema: str = f'{tpc.tp_model.SCHEMA_VERSION}' + + return asdict(TPCVersions(mct_version)) def get_scheduler_metadata(scheduler_info: SchedulerInfo) -> Dict[str, Any]: diff --git a/model_compression_toolkit/qat/keras/quantizer/README.md b/model_compression_toolkit/qat/keras/quantizer/README.md index afa5ffcd6..7f0252cb6 100644 --- a/model_compression_toolkit/qat/keras/quantizer/README.md +++ b/model_compression_toolkit/qat/keras/quantizer/README.md @@ -5,7 +5,7 @@ Several training methods may be applied by the user to train the QAT ready model created by `keras_quantization_aware_training_init` method in [`keras/quantization_facade`](../quantization_facade.py). Each `TrainingMethod` (an enum defined in the [`qat_config`](../../common/qat_config.py)) -and [`QuantizationMethod`](../../../target_platform_capabilities/target_platform/op_quantization_config.py) +and `QuantizationMethod` selects a quantizer for weights and a quantizer for activations. Currently, only the STE (straight through estimator) training method is implemented by the MCT. diff --git a/model_compression_toolkit/qat/pytorch/quantizer/README.md b/model_compression_toolkit/qat/pytorch/quantizer/README.md index 9a0d911d0..169e335c2 100644 --- a/model_compression_toolkit/qat/pytorch/quantizer/README.md +++ b/model_compression_toolkit/qat/pytorch/quantizer/README.md @@ -5,7 +5,7 @@ Several training methods may be applied by the user to train the QAT ready model created by `pytorch_quantization_aware_training_init` method in [`pytorch/quantization_facade`](../quantization_facade.py). Each [`TrainingMethod`](../../../trainable_infrastructure/common/training_method.py) -and [`QuantizationMethod`](../../../target_platform_capabilities/target_platform/op_quantization_config.py) +and `QuantizationMethod` selects a quantizer for weights and a quantizer for activations. ## Make your own training method diff --git a/model_compression_toolkit/target_platform_capabilities/schema/__init__.py b/model_compression_toolkit/target_platform_capabilities/schema/__init__.py new file mode 100644 index 000000000..ea3047f32 --- /dev/null +++ b/model_compression_toolkit/target_platform_capabilities/schema/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== \ No newline at end of file diff --git a/model_compression_toolkit/target_platform_capabilities/schema/mct_current_schema.py b/model_compression_toolkit/target_platform_capabilities/schema/mct_current_schema.py new file mode 100644 index 000000000..ee6c85ea1 --- /dev/null +++ b/model_compression_toolkit/target_platform_capabilities/schema/mct_current_schema.py @@ -0,0 +1,11 @@ +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema + +Signedness = schema.Signedness +AttributeQuantizationConfig = schema.AttributeQuantizationConfig +OpQuantizationConfig = schema.OpQuantizationConfig +QuantizationConfigOptions = schema.QuantizationConfigOptions +OperatorsSetBase = schema.OperatorsSetBase +OperatorsSet = schema.OperatorsSet +OperatorSetConcat= schema.OperatorSetConcat +Fusing = schema.Fusing +TargetPlatformModel = schema.TargetPlatformModel diff --git a/model_compression_toolkit/target_platform_capabilities/schema/schema_functions.py b/model_compression_toolkit/target_platform_capabilities/schema/schema_functions.py new file mode 100644 index 000000000..105136647 --- /dev/null +++ b/model_compression_toolkit/target_platform_capabilities/schema/schema_functions.py @@ -0,0 +1,37 @@ +# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +import copy +from typing import Any, Dict + + +def clone_and_edit_object_params(obj: Any, **kwargs: Dict) -> Any: + """ + Clones the given object and edit some of its parameters. + + Args: + obj: An object to clone. + **kwargs: Keyword arguments to edit in the cloned object. + + Returns: + Edited copy of the given object. + """ + + obj_copy = copy.deepcopy(obj) + for k, v in kwargs.items(): + assert hasattr(obj_copy, + k), f'Edit parameter is possible only for existing parameters in the given object, ' \ + f'but {k} is not a parameter of {obj_copy}.' + setattr(obj_copy, k, v) + return obj_copy diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py b/model_compression_toolkit/target_platform_capabilities/schema/v1.py similarity index 53% rename from model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py rename to model_compression_toolkit/target_platform_capabilities/schema/v1.py index 1ee257ec3..019454642 100644 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +++ b/model_compression_toolkit/target_platform_capabilities/schema/v1.py @@ -1,4 +1,4 @@ -# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved. +# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,14 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - import copy -from typing import List, Dict, Union, Any, Tuple + from enum import Enum +import pprint + +from typing import Dict, Any, Union, Tuple, List, Optional + from mct_quantizers import QuantizationMethod from model_compression_toolkit.constants import FLOAT_BITWIDTH + from model_compression_toolkit.logger import Logger +from model_compression_toolkit.target_platform_capabilities.constants import OPS_SET_LIST +from model_compression_toolkit.target_platform_capabilities.immutable import ImmutableClass +from model_compression_toolkit.target_platform_capabilities.target_platform.current_tp_model import \ + get_current_tp_model, _current_tp_model +from model_compression_toolkit.target_platform_capabilities.schema.schema_functions import clone_and_edit_object_params class Signedness(Enum): @@ -35,27 +44,6 @@ class Signedness(Enum): UNSIGNED = 2 -def clone_and_edit_object_params(obj: Any, **kwargs: Dict) -> Any: - """ - Clones the given object and edit some of its parameters. - - Args: - obj: An object to clone. - **kwargs: Keyword arguments to edit in the cloned object. - - Returns: - Edited copy of the given object. - """ - - obj_copy = copy.deepcopy(obj) - for k, v in kwargs.items(): - assert hasattr(obj_copy, - k), f'Edit parameter is possible only for existing parameters in the given object, ' \ - f'but {k} is not a parameter of {obj_copy}.' - setattr(obj_copy, k, v) - return obj_copy - - class AttributeQuantizationConfig: """ Hold the quantization configuration of a weight attribute of a layer. @@ -376,3 +364,368 @@ def __edit_quantization_configuration(self, qc, kwargs): def get_info(self): return {f'option {i}': cfg.get_info() for i, cfg in enumerate(self.quantization_config_list)} + +class TargetPlatformModelComponent: + """ + Component of TargetPlatformModel (Fusing, OperatorsSet, etc.) + """ + def __init__(self, name: str): + """ + + Args: + name: Name of component. + """ + self.name = name + _current_tp_model.get().append_component(self) + + def get_info(self) -> Dict[str, Any]: + """ + + Returns: Get information about the component to display (return an empty dictionary. + the actual component should fill it with info). + + """ + return {} + + +class OperatorsSetBase(TargetPlatformModelComponent): + """ + Base class to represent a set of operators. + """ + def __init__(self, name: str): + """ + + Args: + name: Name of OperatorsSet. + """ + super().__init__(name=name) + + +class OperatorsSet(OperatorsSetBase): + def __init__(self, + name: str, + qc_options: QuantizationConfigOptions = None): + """ + Set of operators that are represented by a unique label. + + Args: + name (str): Set's label (must be unique in a TargetPlatformModel). + qc_options (QuantizationConfigOptions): Configuration options to use for this set of operations. + """ + + super().__init__(name) + self.qc_options = qc_options + is_fusing_set = qc_options is None + self.is_default = _current_tp_model.get().default_qco == self.qc_options or is_fusing_set + + + def get_info(self) -> Dict[str,Any]: + """ + + Returns: Info about the set as a dictionary. + + """ + return {"name": self.name, + "is_default_qc": self.is_default} + + +class OperatorSetConcat(OperatorsSetBase): + """ + Concatenate a list of operator sets to treat them similarly in different places (like fusing). + """ + def __init__(self, *opsets: OperatorsSet): + """ + Group a list of operation sets. + + Args: + *opsets (OperatorsSet): List of operator sets to group. + """ + name = "_".join([a.name for a in opsets]) + super().__init__(name=name) + self.op_set_list = opsets + self.qc_options = None # Concat have no qc options + + def get_info(self) -> Dict[str,Any]: + """ + + Returns: Info about the sets group as a dictionary. + + """ + return {"name": self.name, + OPS_SET_LIST: [s.name for s in self.op_set_list]} + + +class Fusing(TargetPlatformModelComponent): + """ + Fusing defines a list of operators that should be combined and treated as a single operator, + hence no quantization is applied between them. + """ + + def __init__(self, + operator_groups_list: List[Union[OperatorsSet, OperatorSetConcat]], + name: str = None): + """ + Args: + operator_groups_list (List[Union[OperatorsSet, OperatorSetConcat]]): A list of operator groups, each being either an OperatorSetConcat or an OperatorsSet. + name (str): The name for the Fusing instance. If not provided, it's generated from the operator groups' names. + """ + assert isinstance(operator_groups_list, + list), f'List of operator groups should be of type list but is {type(operator_groups_list)}' + assert len(operator_groups_list) >= 2, f'Fusing can not be created for a single operators group' + + # Generate a name from the operator groups if no name is provided + if name is None: + name = '_'.join([x.name for x in operator_groups_list]) + + super().__init__(name) + self.operator_groups_list = operator_groups_list + + def contains(self, other: Any) -> bool: + """ + Determines if the current Fusing instance contains another Fusing instance. + + Args: + other: The other Fusing instance to check against. + + Returns: + A boolean indicating whether the other instance is contained within this one. + """ + if not isinstance(other, Fusing): + return False + + # Check for containment by comparing operator groups + for i in range(len(self.operator_groups_list) - len(other.operator_groups_list) + 1): + for j in range(len(other.operator_groups_list)): + if self.operator_groups_list[i + j] != other.operator_groups_list[j] and not ( + isinstance(self.operator_groups_list[i + j], OperatorSetConcat) and ( + other.operator_groups_list[j] in self.operator_groups_list[i + j].op_set_list)): + break + else: + # If all checks pass, the other Fusing instance is contained + return True + # Other Fusing instance is not contained + return False + + def get_info(self): + """ + Retrieves information about the Fusing instance, including its name and the sequence of operator groups. + + Returns: + A dictionary with the Fusing instance's name as the key and the sequence of operator groups as the value, + or just the sequence of operator groups if no name is set. + """ + if self.name is not None: + return {self.name: ' -> '.join([x.name for x in self.operator_groups_list])} + return ' -> '.join([x.name for x in self.operator_groups_list]) + + +class TargetPlatformModel(ImmutableClass): + """ + Represents the hardware configuration used for quantized model inference. + + This model defines: + - The operators and their associated quantization configurations. + - Fusing patterns, enabling multiple operators to be combined into a single operator + for optimization during inference. + - Versioning support through minor and patch versions for backward compatibility. + + Attributes: + SCHEMA_VERSION (int): The schema version of the target platform model. + """ + SCHEMA_VERSION = 1 + def __init__(self, + default_qco: QuantizationConfigOptions, + tpc_minor_version: Optional[int], + tpc_patch_version: Optional[int], + tpc_platform_type: Optional[str], + add_metadata: bool = True, + name="default_tp_model"): + """ + + Args: + default_qco (QuantizationConfigOptions): Default QuantizationConfigOptions to use for operators that their QuantizationConfigOptions are not defined in the model. + tpc_minor_version (Optional[int]): The minor version of the target platform capabilities. + tpc_patch_version (Optional[int]): The patch version of the target platform capabilities. + tpc_platform_type (Optional[str]): The platform type of the target platform capabilities. + add_metadata (bool): Whether to add metadata to the model or not. + name (str): Name of the model. + + Raises: + AssertionError: If the provided `default_qco` does not contain exactly one quantization configuration. + """ + + super().__init__() + self.tpc_minor_version = tpc_minor_version + self.tpc_patch_version = tpc_patch_version + self.tpc_platform_type = tpc_platform_type + self.add_metadata = add_metadata + self.name = name + self.operator_set = [] + assert isinstance(default_qco, QuantizationConfigOptions), \ + "default_qco must be an instance of QuantizationConfigOptions" + assert len(default_qco.quantization_config_list) == 1, \ + "Default QuantizationConfigOptions must contain exactly one option." + + self.default_qco = default_qco + self.fusing_patterns = [] + self.is_simd_padding = False + + def get_config_options_by_operators_set(self, + operators_set_name: str) -> QuantizationConfigOptions: + """ + Get the QuantizationConfigOptions of a OperatorsSet by the OperatorsSet name. + If the name is not in the model, the default QuantizationConfigOptions is returned. + + Args: + operators_set_name: Name of OperatorsSet to get. + + Returns: + QuantizationConfigOptions to use for ops in OperatorsSet named operators_set_name. + """ + for op_set in self.operator_set: + if operators_set_name == op_set.name: + return op_set.qc_options + return self.default_qco + + def get_default_op_quantization_config(self) -> OpQuantizationConfig: + """ + + Returns: The default OpQuantizationConfig of the TargetPlatformModel. + + """ + assert len(self.default_qco.quantization_config_list) == 1, \ + f'Default quantization configuration options must contain only one option,' \ + f' but found {len(get_current_tp_model().default_qco.quantization_config_list)} configurations.' + return self.default_qco.quantization_config_list[0] + + def is_opset_in_model(self, + opset_name: str) -> bool: + """ + Check whether an operators set is defined in the model or not. + + Args: + opset_name: Operators set name to check. + + Returns: + Whether an operators set is defined in the model or not. + """ + return opset_name in [x.name for x in self.operator_set] + + def get_opset_by_name(self, + opset_name: str) -> OperatorsSetBase: + """ + Get an OperatorsSet object from the model by its name. + If name is not in the model - None is returned. + + Args: + opset_name: OperatorsSet name to retrieve. + + Returns: + OperatorsSet object with the name opset_name, or None if opset_name is not in the model. + """ + + opset_list = [x for x in self.operator_set if x.name == opset_name] + assert len(opset_list) <= 1, f'Found more than one OperatorsSet in' \ + f' TargetPlatformModel with the name {opset_name}. ' \ + f'OperatorsSet name must be unique.' + if len(opset_list) == 0: # opset_name is not in the model. + return None + + return opset_list[0] # There's one opset with that name + + def append_component(self, + tp_model_component: TargetPlatformModelComponent): + """ + Attach a TargetPlatformModel component to the model. Components can be for example: + Fusing, OperatorsSet, etc. + + Args: + tp_model_component: Component to attach to the model. + + """ + if isinstance(tp_model_component, Fusing): + self.fusing_patterns.append(tp_model_component) + elif isinstance(tp_model_component, OperatorsSetBase): + self.operator_set.append(tp_model_component) + else: # pragma: no cover + Logger.critical(f'Attempted to append an unrecognized TargetPlatformModelComponent of type: {type(tp_model_component)}.') + + def __enter__(self): + """ + Start defining the TargetPlatformModel using 'with'. + + Returns: Initialized TargetPlatformModel object. + + """ + _current_tp_model.set(self) + return self + + def __exit__(self, exc_type, exc_value, tb): + """ + Finish defining the TargetPlatformModel at the end of the 'with' clause. + Returns the final and immutable TargetPlatformModel instance. + """ + + if exc_value is not None: + print(exc_value, exc_value.args) + raise exc_value + self.__validate_model() # Assert that model is valid. + _current_tp_model.reset() + self.initialized_done() # Make model immutable. + return self + + def __validate_model(self): + """ + + Assert model is valid. + Model is invalid if, for example, it contains multiple operator sets with the same name, + as their names should be unique. + + """ + opsets_names = [op.name for op in self.operator_set] + if len(set(opsets_names)) != len(opsets_names): + Logger.critical(f'Operator Sets must have unique names.') + + def get_default_config(self) -> OpQuantizationConfig: + """ + + Returns: + + """ + assert len(self.default_qco.quantization_config_list) == 1, \ + f'Default quantization configuration options must contain only one option,' \ + f' but found {len(self.default_qco.quantization_config_list)} configurations.' + return self.default_qco.quantization_config_list[0] + + def get_info(self) -> Dict[str, Any]: + """ + + Returns: Dictionary that summarizes the TargetPlatformModel properties (for display purposes). + + """ + return {"Model name": self.name, + "Default quantization config": self.get_default_config().get_info(), + "Operators sets": [o.get_info() for o in self.operator_set], + "Fusing patterns": [f.get_info() for f in self.fusing_patterns] + } + + def show(self): + """ + + Display the TargetPlatformModel. + + """ + pprint.pprint(self.get_info(), sort_dicts=False) + + def set_simd_padding(self, + is_simd_padding: bool): + """ + Set flag is_simd_padding to indicate whether this TP model defines + that padding due to SIMD constrains occurs. + + Args: + is_simd_padding: Whether this TP model defines that padding due to SIMD constrains occurs. + + """ + self.is_simd_padding = is_simd_padding + diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py b/model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py index 7767a9a11..e01363b71 100644 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +++ b/model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py @@ -13,13 +13,11 @@ # limitations under the License. # ============================================================================== -from model_compression_toolkit.target_platform_capabilities.target_platform.fusing import Fusing from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attribute_filter import AttributeFilter from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities, OperationsSetToLayers, Smaller, SmallerEq, NotEq, Eq, GreaterEq, Greater, LayerFilterParams, OperationsToLayers, get_current_tpc -from model_compression_toolkit.target_platform_capabilities.target_platform.target_platform_model import get_default_quantization_config_options, TargetPlatformModel -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - OpQuantizationConfig, QuantizationConfigOptions, AttributeQuantizationConfig, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.operators import OperatorsSet, OperatorSetConcat +from model_compression_toolkit.target_platform_capabilities.target_platform.target_platform_model import get_default_quantization_config_options +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, OperatorsSet, \ + OperatorSetConcat, Signedness, AttributeQuantizationConfig, OpQuantizationConfig, QuantizationConfigOptions, Fusing from mct_quantizers import QuantizationMethod diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py b/model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py deleted file mode 100644 index 6ceca6e3a..000000000 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -from typing import Any, List, Union - -from model_compression_toolkit.target_platform_capabilities.target_platform.operators import OperatorSetConcat, \ - OperatorsSet -from model_compression_toolkit.target_platform_capabilities.target_platform.target_platform_model_component import TargetPlatformModelComponent - - -class Fusing(TargetPlatformModelComponent): - """ - Fusing defines a list of operators that should be combined and treated as a single operator, - hence no quantization is applied between them. - """ - - def __init__(self, - operator_groups_list: List[Union[OperatorsSet, OperatorSetConcat]], - name: str = None): - """ - Args: - operator_groups_list (List[Union[OperatorsSet, OperatorSetConcat]]): A list of operator groups, each being either an OperatorSetConcat or an OperatorsSet. - name (str): The name for the Fusing instance. If not provided, it's generated from the operator groups' names. - """ - assert isinstance(operator_groups_list, - list), f'List of operator groups should be of type list but is {type(operator_groups_list)}' - assert len(operator_groups_list) >= 2, f'Fusing can not be created for a single operators group' - - # Generate a name from the operator groups if no name is provided - if name is None: - name = '_'.join([x.name for x in operator_groups_list]) - - super().__init__(name) - self.operator_groups_list = operator_groups_list - - def contains(self, other: Any) -> bool: - """ - Determines if the current Fusing instance contains another Fusing instance. - - Args: - other: The other Fusing instance to check against. - - Returns: - A boolean indicating whether the other instance is contained within this one. - """ - if not isinstance(other, Fusing): - return False - - # Check for containment by comparing operator groups - for i in range(len(self.operator_groups_list) - len(other.operator_groups_list) + 1): - for j in range(len(other.operator_groups_list)): - if self.operator_groups_list[i + j] != other.operator_groups_list[j] and not ( - isinstance(self.operator_groups_list[i + j], OperatorSetConcat) and ( - other.operator_groups_list[j] in self.operator_groups_list[i + j].op_set_list)): - break - else: - # If all checks pass, the other Fusing instance is contained - return True - # Other Fusing instance is not contained - return False - - def get_info(self): - """ - Retrieves information about the Fusing instance, including its name and the sequence of operator groups. - - Returns: - A dictionary with the Fusing instance's name as the key and the sequence of operator groups as the value, - or just the sequence of operator groups if no name is set. - """ - if self.name is not None: - return {self.name: ' -> '.join([x.name for x in self.operator_groups_list])} - return ' -> '.join([x.name for x in self.operator_groups_list]) \ No newline at end of file diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/operators.py b/model_compression_toolkit/target_platform_capabilities/target_platform/operators.py deleted file mode 100644 index 1b737b3bd..000000000 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/operators.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -from typing import Dict, Any - -from model_compression_toolkit.target_platform_capabilities.constants import OPS_SET_LIST -from model_compression_toolkit.target_platform_capabilities.target_platform.target_platform_model_component import TargetPlatformModelComponent -from model_compression_toolkit.target_platform_capabilities.target_platform.current_tp_model import _current_tp_model -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import QuantizationConfigOptions - - -class OperatorsSetBase(TargetPlatformModelComponent): - """ - Base class to represent a set of operators. - """ - def __init__(self, name: str): - """ - - Args: - name: Name of OperatorsSet. - """ - super().__init__(name=name) - - -class OperatorsSet(OperatorsSetBase): - def __init__(self, - name: str, - qc_options: QuantizationConfigOptions = None): - """ - Set of operators that are represented by a unique label. - - Args: - name (str): Set's label (must be unique in a TargetPlatformModel). - qc_options (QuantizationConfigOptions): Configuration options to use for this set of operations. - """ - - super().__init__(name) - self.qc_options = qc_options - is_fusing_set = qc_options is None - self.is_default = _current_tp_model.get().default_qco == self.qc_options or is_fusing_set - - - def get_info(self) -> Dict[str,Any]: - """ - - Returns: Info about the set as a dictionary. - - """ - return {"name": self.name, - "is_default_qc": self.is_default} - - -class OperatorSetConcat(OperatorsSetBase): - """ - Concatenate a list of operator sets to treat them similarly in different places (like fusing). - """ - def __init__(self, *opsets: OperatorsSet): - """ - Group a list of operation sets. - - Args: - *opsets (OperatorsSet): List of operator sets to group. - """ - name = "_".join([a.name for a in opsets]) - super().__init__(name=name) - self.op_set_list = opsets - self.qc_options = None # Concat have no qc options - - def get_info(self) -> Dict[str,Any]: - """ - - Returns: Info about the sets group as a dictionary. - - """ - return {"name": self.name, - OPS_SET_LIST: [s.name for s in self.op_set_list]} diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py b/model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py index 4fbf68ea6..f2b6dec49 100644 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py +++ b/model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py @@ -13,19 +13,8 @@ # limitations under the License. # ============================================================================== -import pprint -from typing import Any, Dict - -from model_compression_toolkit.target_platform_capabilities.target_platform.current_tp_model import _current_tp_model, \ - get_current_tp_model -from model_compression_toolkit.target_platform_capabilities.target_platform.fusing import Fusing -from model_compression_toolkit.target_platform_capabilities.target_platform.target_platform_model_component import \ - TargetPlatformModelComponent -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import OpQuantizationConfig, \ - QuantizationConfigOptions -from model_compression_toolkit.target_platform_capabilities.target_platform.operators import OperatorsSetBase -from model_compression_toolkit.target_platform_capabilities.immutable import ImmutableClass -from model_compression_toolkit.logger import Logger +from model_compression_toolkit.target_platform_capabilities.target_platform.current_tp_model import get_current_tp_model +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import QuantizationConfigOptions def get_default_quantization_config_options() -> QuantizationConfigOptions: @@ -39,204 +28,3 @@ def get_default_quantization_config_options() -> QuantizationConfigOptions: return get_current_tp_model().default_qco -def get_default_quantization_config(): - """ - - Returns: The default OpQuantizationConfig of the model. This is the OpQuantizationConfig - to use when a layer's options is queried and it wasn't specified in the TargetPlatformCapabilities. - This OpQuantizationConfig is the single option in the default QuantizationConfigOptions. - - """ - - return get_current_tp_model().get_default_op_quantization_config() - - -class TargetPlatformModel(ImmutableClass): - """ - Modeling of the hardware the quantized model will use during inference. - The model contains definition of operators, quantization configurations of them, and - fusing patterns so that multiple operators will be combined into a single operator. - """ - - def __init__(self, - default_qco: QuantizationConfigOptions, - add_metadata: bool = False, - name="default_tp_model"): - """ - - Args: - default_qco (QuantizationConfigOptions): Default QuantizationConfigOptions to use for operators that their QuantizationConfigOptions are not defined in the model. - add_metadata (bool): Whether to add metadata to the model or not. - name (str): Name of the model. - """ - - super().__init__() - self.add_metadata = add_metadata - self.name = name - self.operator_set = [] - assert isinstance(default_qco, QuantizationConfigOptions) - assert len(default_qco.quantization_config_list) == 1, \ - f'Default QuantizationConfigOptions must contain only one option' - self.default_qco = default_qco - self.fusing_patterns = [] - self.is_simd_padding = False - - def get_config_options_by_operators_set(self, - operators_set_name: str) -> QuantizationConfigOptions: - """ - Get the QuantizationConfigOptions of a OperatorsSet by the OperatorsSet name. - If the name is not in the model, the default QuantizationConfigOptions is returned. - - Args: - operators_set_name: Name of OperatorsSet to get. - - Returns: - QuantizationConfigOptions to use for ops in OperatorsSet named operators_set_name. - """ - for op_set in self.operator_set: - if operators_set_name == op_set.name: - return op_set.qc_options - return self.default_qco - - def get_default_op_quantization_config(self) -> OpQuantizationConfig: - """ - - Returns: The default OpQuantizationConfig of the TargetPlatformModel. - - """ - assert len(self.default_qco.quantization_config_list) == 1, \ - f'Default quantization configuration options must contain only one option,' \ - f' but found {len(get_current_tp_model().default_qco.quantization_config_list)} configurations.' - return self.default_qco.quantization_config_list[0] - - def is_opset_in_model(self, - opset_name: str) -> bool: - """ - Check whether an operators set is defined in the model or not. - - Args: - opset_name: Operators set name to check. - - Returns: - Whether an operators set is defined in the model or not. - """ - return opset_name in [x.name for x in self.operator_set] - - def get_opset_by_name(self, - opset_name: str) -> OperatorsSetBase: - """ - Get an OperatorsSet object from the model by its name. - If name is not in the model - None is returned. - - Args: - opset_name: OperatorsSet name to retrieve. - - Returns: - OperatorsSet object with the name opset_name, or None if opset_name is not in the model. - """ - - opset_list = [x for x in self.operator_set if x.name == opset_name] - assert len(opset_list) <= 1, f'Found more than one OperatorsSet in' \ - f' TargetPlatformModel with the name {opset_name}. ' \ - f'OperatorsSet name must be unique.' - if len(opset_list) == 0: # opset_name is not in the model. - return None - - return opset_list[0] # There's one opset with that name - - def append_component(self, - tp_model_component: TargetPlatformModelComponent): - """ - Attach a TargetPlatformModel component to the model. Components can be for example: - Fusing, OperatorsSet, etc. - - Args: - tp_model_component: Component to attach to the model. - - """ - if isinstance(tp_model_component, Fusing): - self.fusing_patterns.append(tp_model_component) - elif isinstance(tp_model_component, OperatorsSetBase): - self.operator_set.append(tp_model_component) - else: # pragma: no cover - Logger.critical(f'Attempted to append an unrecognized TargetPlatformModelComponent of type: {type(tp_model_component)}.') - - def __enter__(self): - """ - Start defining the TargetPlatformModel using 'with'. - - Returns: Initialized TargetPlatformModel object. - - """ - _current_tp_model.set(self) - return self - - def __exit__(self, exc_type, exc_value, tb): - """ - Finish defining the TargetPlatformModel at the end of the 'with' clause. - Returns the final and immutable TargetPlatformModel instance. - """ - - if exc_value is not None: - print(exc_value, exc_value.args) - raise exc_value - self.__validate_model() # Assert that model is valid. - _current_tp_model.reset() - self.initialized_done() # Make model immutable. - return self - - def __validate_model(self): - """ - - Assert model is valid. - Model is invalid if, for example, it contains multiple operator sets with the same name, - as their names should be unique. - - """ - opsets_names = [op.name for op in self.operator_set] - if len(set(opsets_names)) != len(opsets_names): - Logger.critical(f'Operator Sets must have unique names.') - - def get_default_config(self) -> OpQuantizationConfig: - """ - - Returns: - - """ - assert len(self.default_qco.quantization_config_list) == 1, \ - f'Default quantization configuration options must contain only one option,' \ - f' but found {len(self.default_qco.quantization_config_list)} configurations.' - return self.default_qco.quantization_config_list[0] - - def get_info(self) -> Dict[str, Any]: - """ - - Returns: Dictionary that summarizes the TargetPlatformModel properties (for display purposes). - - """ - return {"Model name": self.name, - "Default quantization config": self.get_default_config().get_info(), - "Operators sets": [o.get_info() for o in self.operator_set], - "Fusing patterns": [f.get_info() for f in self.fusing_patterns] - } - - def show(self): - """ - - Display the TargetPlatformModel. - - """ - pprint.pprint(self.get_info(), sort_dicts=False) - - def set_simd_padding(self, - is_simd_padding: bool): - """ - Set flag is_simd_padding to indicate whether this TP model defines - that padding due to SIMD constrains occurs. - - Args: - is_simd_padding: Whether this TP model defines that padding due to SIMD constrains occurs. - - """ - self.is_simd_padding = is_simd_padding - diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model_component.py b/model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model_component.py deleted file mode 100644 index fbe0ad4c0..000000000 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model_component.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -from typing import Any, Dict - -from model_compression_toolkit.target_platform_capabilities.target_platform.current_tp_model import _current_tp_model - - -class TargetPlatformModelComponent: - """ - Component of TargetPlatformModel (Fusing, OperatorsSet, etc.) - """ - def __init__(self, name: str): - """ - - Args: - name: Name of component. - """ - self.name = name - _current_tp_model.get().append_component(self) - - def get_info(self) -> Dict[str, Any]: - """ - - Returns: Get information about the component to display (return an empty dictionary. - the actual component should fill it with info). - - """ - return {} diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py b/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py index f8607784e..669a068a7 100644 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py +++ b/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py @@ -18,8 +18,7 @@ from model_compression_toolkit.logger import Logger from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.current_tpc import _current_tpc from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.target_platform_capabilities_component import TargetPlatformCapabilitiesComponent -from model_compression_toolkit.target_platform_capabilities.target_platform.operators import OperatorSetConcat, \ - OperatorsSetBase +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import OperatorsSetBase, OperatorSetConcat from model_compression_toolkit import DefaultDict diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py b/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py index 80385553b..ef0cd5713 100644 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py +++ b/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py @@ -24,12 +24,9 @@ from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.target_platform_capabilities_component import TargetPlatformCapabilitiesComponent from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.layer_filter_params import LayerFilterParams from model_compression_toolkit.target_platform_capabilities.immutable import ImmutableClass -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import QuantizationConfigOptions, \ - OpQuantizationConfig -from model_compression_toolkit.target_platform_capabilities.target_platform.operators import OperatorsSetBase -from model_compression_toolkit.target_platform_capabilities.target_platform.target_platform_model import TargetPlatformModel +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, OperatorsSetBase, \ + OpQuantizationConfig, QuantizationConfigOptions from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.current_tpc import _current_tpc -from model_compression_toolkit.constants import MCT_VERSION, TPC_VERSION class TargetPlatformCapabilities(ImmutableClass): @@ -38,14 +35,12 @@ class TargetPlatformCapabilities(ImmutableClass): """ def __init__(self, tp_model: TargetPlatformModel, - name: str = "base", - version: str = None): + name: str = "base"): """ Args: tp_model (TargetPlatformModel): Modeled hardware to attach framework information to. name (str): Name of the TargetPlatformCapabilities. - version (str): TPC version. """ super().__init__() @@ -57,7 +52,6 @@ def __init__(self, # Track the unused opsets for warning purposes. self.__tp_model_opsets_not_used = [s.name for s in tp_model.operator_set] self.remove_fusing_names_from_not_used_list() - self.version = version def get_layers_by_opset_name(self, opset_name: str) -> List[Any]: """ @@ -117,7 +111,9 @@ def get_info(self) -> Dict[str, Any]: """ return {"Target Platform Capabilities": self.name, - "Version": self.version, + "Minor version": self.tp_model.tpc_minor_version, + "Patch version": self.tp_model.tpc_patch_version, + "Platform type": self.tp_model.tpc_platform_type, "Target Platform Model": self.tp_model.get_info(), "Operations to layers": {op2layer.name:[l.__name__ for l in op2layer.layers] for op2layer in self.op_sets_to_layers.op_sets_to_layers}} diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py index 3502d4c10..27d032c29 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py @@ -15,12 +15,12 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH -from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ + IMX500_TP_MODEL +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -63,7 +63,8 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, weights_n_bits=8, weights_per_channel_threshold=False, - enable_weights_quantization=False, # TODO: this will changed to True once implementing multi-attributes quantization + enable_weights_quantization=False, + # TODO: this will changed to True once implementing multi-attributes quantization lut_values_bitwidth=None) # define a quantization config to quantize the kernel (for layers where there is a kernel attribute). @@ -88,7 +89,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -102,7 +103,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -152,12 +153,18 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpc = tp.TargetPlatformModel(default_configuration_options, name=name) + generated_tpc = schema.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=1, + tpc_patch_version=0, + tpc_platform_type=IMX500_TP_MODEL, + name=name, + add_metadata=False) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -172,42 +179,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add") - sub = tp.OperatorsSet("Sub") - mul = tp.OperatorsSet("Mul") - div = tp.OperatorsSet("Div") - prelu = tp.OperatorsSet("PReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add") + sub = schema.OperatorsSet("Sub") + mul = schema.OperatorsSet("Mul") + div = schema.OperatorsSet("Div") + prelu = schema.OperatorsSet("PReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpc diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py index 72d9a363b..43ca2b4e0 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \ @@ -48,7 +49,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -59,7 +60,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) no_quant_list = [Reshape, tf.reshape, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py index 086f0cace..345e525a4 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py @@ -23,12 +23,12 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \ BIAS from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tp_model import get_tp_model import model_compression_toolkit as mct -from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1 import __version__ as TPC_VERSION tp = mct.target_platform @@ -42,7 +42,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -51,9 +51,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py index 09708edda..9da497022 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py @@ -15,13 +15,12 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ - WEIGHTS_QUANTIZATION_METHOD -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig + WEIGHTS_QUANTIZATION_METHOD, IMX500_TP_MODEL +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -84,7 +83,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -98,7 +97,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -151,12 +150,18 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpc = tp.TargetPlatformModel(default_configuration_options, name=name) + generated_tpc = schema.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=1, + tpc_patch_version=0, + tpc_platform_type=IMX500_TP_MODEL, + add_metadata=False, + name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -169,42 +174,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add") - sub = tp.OperatorsSet("Sub") - mul = tp.OperatorsSet("Mul") - div = tp.OperatorsSet("Div") - prelu = tp.OperatorsSet("PReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add") + sub = schema.OperatorsSet("Sub") + mul = schema.OperatorsSet("Mul") + div = schema.OperatorsSet("Div") + prelu = schema.OperatorsSet("PReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpc diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_keras.py index f11ca92d5..9bf658512 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, \ @@ -48,7 +49,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -59,7 +60,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) no_quant_list = [Reshape, tf.reshape, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_pytorch.py index b50921dcd..9e03c497f 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_pytorch.py @@ -23,6 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -42,7 +43,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -51,9 +52,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py index f07ea4e58..24f3e6eae 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py @@ -15,13 +15,12 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH -from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig - +from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ + IMX500_TP_MODEL +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -84,7 +83,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -98,7 +97,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = schema.OpQuantizationConfig( activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, @@ -147,12 +146,18 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpc = tp.TargetPlatformModel(default_configuration_options, name=name) + generated_tpc = schema.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=1, + tpc_patch_version=0, + tpc_platform_type=IMX500_TP_MODEL, + add_metadata=False, + name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -165,42 +170,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add") - sub = tp.OperatorsSet("Sub") - mul = tp.OperatorsSet("Mul") - div = tp.OperatorsSet("Div") - prelu = tp.OperatorsSet("PReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add") + sub = schema.OperatorsSet("Sub") + mul = schema.OperatorsSet("Mul") + div = schema.OperatorsSet("Div") + prelu = schema.OperatorsSet("PReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpc diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py index 0f8e63b2e..0bca7062f 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \ @@ -48,7 +49,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_pot_tpc_keras_tpc', tp_model=imx500_pot_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -59,7 +60,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) no_quant_list = [Reshape, tf.reshape, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py index 7f46dfe0c..7c5ac0769 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py @@ -23,6 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -43,7 +44,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_pot_tpc_pytorch_tpc', tp_model=imx500_pot_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -52,9 +53,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py index 2c10e5e08..947c1608f 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py @@ -15,12 +15,12 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH -from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ + IMX500_TP_MODEL +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -65,7 +65,8 @@ def get_op_quantization_configs() -> \ weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, weights_n_bits=8, weights_per_channel_threshold=False, - enable_weights_quantization=False, # TODO: this will changed to True once implementing multi-attributes quantization + enable_weights_quantization=False, + # TODO: this will changed to True once implementing multi-attributes quantization lut_values_bitwidth=None) # define a quantization config to quantize the kernel (for layers where there is a kernel attribute). @@ -90,7 +91,7 @@ def get_op_quantization_configs() -> \ # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -104,7 +105,7 @@ def get_op_quantization_configs() -> \ signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -154,12 +155,18 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpm = tp.TargetPlatformModel(default_configuration_options, add_metadata=True, name=name) + generated_tpm = schema.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=2, + tpc_patch_version=0, + tpc_platform_type=IMX500_TP_MODEL, + add_metadata=True, + name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -174,42 +181,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add") - sub = tp.OperatorsSet("Sub") - mul = tp.OperatorsSet("Mul") - div = tp.OperatorsSet("Div") - prelu = tp.OperatorsSet("PReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add") + sub = schema.OperatorsSet("Sub") + mul = schema.OperatorsSet("Mul") + div = schema.OperatorsSet("Div") + prelu = schema.OperatorsSet("PReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpm diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py index 8845d929d..b84430911 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \ @@ -49,7 +50,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -60,7 +61,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) no_quant_list = [Identity, tf.identity, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_pytorch.py index ad60efc53..471ed4ae0 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_pytorch.py @@ -23,6 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \ BIAS @@ -43,7 +44,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -52,9 +53,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py index b7565df45..31ba2d9ab 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py @@ -15,13 +15,12 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ - WEIGHTS_QUANTIZATION_METHOD -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig + WEIGHTS_QUANTIZATION_METHOD, IMX500_TP_MODEL +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -86,7 +85,7 @@ def get_op_quantization_configs() -> \ # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -100,7 +99,7 @@ def get_op_quantization_configs() -> \ signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -153,12 +152,18 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpm = tp.TargetPlatformModel(default_configuration_options, add_metadata=True, name=name) + generated_tpm = schema.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=2, + tpc_patch_version=0, + tpc_platform_type=IMX500_TP_MODEL, + add_metadata=True, + name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -171,42 +176,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add") - sub = tp.OperatorsSet("Sub") - mul = tp.OperatorsSet("Mul") - div = tp.OperatorsSet("Div") - prelu = tp.OperatorsSet("PReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add") + sub = schema.OperatorsSet("Sub") + mul = schema.OperatorsSet("Mul") + div = schema.OperatorsSet("Div") + prelu = schema.OperatorsSet("PReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpm diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py index 08b791f4f..192464bac 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, \ @@ -48,7 +49,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -59,7 +60,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) no_quant_list = [Identity, tf.identity, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_pytorch.py index 59189d306..6d05fe910 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_pytorch.py @@ -23,6 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -42,7 +43,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -51,9 +52,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py index 84662cd23..b053ea9eb 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py @@ -15,12 +15,12 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH -from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ + IMX500_TP_MODEL +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -65,7 +65,8 @@ def get_op_quantization_configs() -> \ weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, weights_n_bits=8, weights_per_channel_threshold=False, - enable_weights_quantization=False, # TODO: this will changed to True once implementing multi-attributes quantization + enable_weights_quantization=False, + # TODO: this will changed to True once implementing multi-attributes quantization lut_values_bitwidth=None) # define a quantization config to quantize the kernel (for layers where there is a kernel attribute). @@ -90,7 +91,7 @@ def get_op_quantization_configs() -> \ # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -104,7 +105,7 @@ def get_op_quantization_configs() -> \ signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -154,7 +155,7 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a QuantizationConfigOptions for quantizing constants in functional ops. # Constant configuration is similar to the default eight bit configuration except for PoT @@ -165,7 +166,7 @@ def generate_tp_model(default_config: OpQuantizationConfig, default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit( enable_weights_quantization=True, weights_per_channel_threshold=True, weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO)) - const_configuration_options = tp.QuantizationConfigOptions([const_config]) + const_configuration_options = schema.QuantizationConfigOptions([const_config]) # 16 bits inputs and outputs. Currently, only defined for consts since they are used in operators that # support 16 bit as input and output. @@ -173,14 +174,20 @@ def generate_tp_model(default_config: OpQuantizationConfig, supported_input_activation_n_bits=(8, 16)) const_config_input16_output16 = const_config_input16.clone_and_edit( activation_n_bits=16, signedness=Signedness.SIGNED) - const_configuration_options_inout16 = tp.QuantizationConfigOptions([const_config_input16_output16, - const_config_input16], - base_config=const_config_input16) + const_configuration_options_inout16 = schema.QuantizationConfigOptions([const_config_input16_output16, + const_config_input16], + base_config=const_config_input16) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpm = tp.TargetPlatformModel(default_configuration_options, add_metadata=True, name=name) + generated_tpm = schema.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=3, + tpc_patch_version=0, + tpc_platform_type=IMX500_TP_MODEL, + add_metadata=True, + name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -195,44 +202,44 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False, - supported_input_activation_n_bits=(8, 16)) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) - tp.OperatorsSet("Default16BitInout", const_configuration_options_inout16) + schema.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False, + supported_input_activation_n_bits=(8, 16)) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet("Default16BitInout", const_configuration_options_inout16) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add", const_configuration_options_inout16) - sub = tp.OperatorsSet("Sub", const_configuration_options_inout16) - mul = tp.OperatorsSet("Mul", const_configuration_options_inout16) - div = tp.OperatorsSet("Div", const_configuration_options) - prelu = tp.OperatorsSet("PReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add", const_configuration_options_inout16) + sub = schema.OperatorsSet("Sub", const_configuration_options_inout16) + mul = schema.OperatorsSet("Mul", const_configuration_options_inout16) + div = schema.OperatorsSet("Div", const_configuration_options) + prelu = schema.OperatorsSet("PReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpm diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_keras.py index 439e4321a..27f79c562 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \ @@ -49,7 +50,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -60,7 +61,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) no_quant_list = [Identity, tf.identity, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_pytorch.py index 6abedcbe5..3f52dfe1e 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_pytorch.py @@ -23,6 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \ BIAS @@ -43,7 +44,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -52,9 +53,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py index 0f1901846..9102fcc02 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py @@ -15,13 +15,12 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ - WEIGHTS_QUANTIZATION_METHOD -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig + WEIGHTS_QUANTIZATION_METHOD, IMX500_TP_MODEL +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -86,7 +85,7 @@ def get_op_quantization_configs() -> \ # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -100,7 +99,7 @@ def get_op_quantization_configs() -> \ signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -153,7 +152,7 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a QuantizationConfigOptions for quantizing constants in functional ops. # Constant configuration is similar to the default eight bit configuration except for PoT @@ -164,12 +163,18 @@ def generate_tp_model(default_config: OpQuantizationConfig, default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit( enable_weights_quantization=True, weights_per_channel_threshold=True, weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO)) - const_configuration_options = tp.QuantizationConfigOptions([const_config]) + const_configuration_options = schema.QuantizationConfigOptions([const_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpm = tp.TargetPlatformModel(default_configuration_options, add_metadata=True, name=name) + generated_tpm = schema.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=3, + tpc_patch_version=0, + tpc_platform_type=IMX500_TP_MODEL, + add_metadata=True, + name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -182,42 +187,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add", const_configuration_options) - sub = tp.OperatorsSet("Sub", const_configuration_options) - mul = tp.OperatorsSet("Mul", const_configuration_options) - div = tp.OperatorsSet("Div", const_configuration_options) - prelu = tp.OperatorsSet("PReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add", const_configuration_options) + sub = schema.OperatorsSet("Sub", const_configuration_options) + mul = schema.OperatorsSet("Mul", const_configuration_options) + div = schema.OperatorsSet("Div", const_configuration_options) + prelu = schema.OperatorsSet("PReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpm diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_keras.py index b5c8bd213..ffd712a8c 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, \ @@ -48,7 +49,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -59,7 +60,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) no_quant_list = [Identity, tf.identity, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_pytorch.py index 8e0326499..458470bea 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_pytorch.py @@ -23,6 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -42,7 +43,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -51,9 +52,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py index 424bccd17..493190b12 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py @@ -15,12 +15,12 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH -from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ + IMX500_TP_MODEL +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -112,7 +112,7 @@ def get_op_quantization_configs() -> \ # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -126,7 +126,7 @@ def get_op_quantization_configs() -> \ signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -176,12 +176,13 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) default_config_input16 = default_config.clone_and_edit(supported_input_activation_n_bits=(8, 16)) - default_config_options_16bit = tp.QuantizationConfigOptions([default_config_input16, - default_config_input16.clone_and_edit(activation_n_bits=16, - signedness=Signedness.SIGNED)], - base_config=default_config_input16) + default_config_options_16bit = schema.QuantizationConfigOptions([default_config_input16, + default_config_input16.clone_and_edit( + activation_n_bits=16, + signedness=Signedness.SIGNED)], + base_config=default_config_input16) # Create a QuantizationConfigOptions for quantizing constants in functional ops. # Constant configuration is similar to the default eight bit configuration except for PoT @@ -192,7 +193,7 @@ def generate_tp_model(default_config: OpQuantizationConfig, default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit( enable_weights_quantization=True, weights_per_channel_threshold=True, weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO)) - const_configuration_options = tp.QuantizationConfigOptions([const_config]) + const_configuration_options = schema.QuantizationConfigOptions([const_config]) # 16 bits inputs and outputs. Currently, only defined for consts since they are used in operators that # support 16 bit as input and output. @@ -200,9 +201,9 @@ def generate_tp_model(default_config: OpQuantizationConfig, supported_input_activation_n_bits=(8, 16)) const_config_input16_output16 = const_config_input16.clone_and_edit( activation_n_bits=16, signedness=Signedness.SIGNED) - const_configuration_options_inout16 = tp.QuantizationConfigOptions([const_config_input16_output16, - const_config_input16], - base_config=const_config_input16) + const_configuration_options_inout16 = schema.QuantizationConfigOptions([const_config_input16_output16, + const_config_input16], + base_config=const_config_input16) const_config_input16_per_tensor = const_config.clone_and_edit( supported_input_activation_n_bits=(8, 16), @@ -212,15 +213,16 @@ def generate_tp_model(default_config: OpQuantizationConfig, ) const_config_input16_output16_per_tensor = const_config_input16_per_tensor.clone_and_edit( activation_n_bits=16, signedness=Signedness.SIGNED) - const_configuration_options_inout16_per_tensor = tp.QuantizationConfigOptions([const_config_input16_output16_per_tensor, - const_config_input16_per_tensor], - base_config=const_config_input16_per_tensor) + const_configuration_options_inout16_per_tensor = schema.QuantizationConfigOptions( + [const_config_input16_output16_per_tensor, + const_config_input16_per_tensor], + base_config=const_config_input16_per_tensor) qpreserving_const_config = const_config.clone_and_edit(enable_activation_quantization=False, quantization_preserving=True, default_weight_attr_config=const_config.default_weight_attr_config.clone_and_edit( weights_per_channel_threshold=False)) - qpreserving_const_config_options = tp.QuantizationConfigOptions([qpreserving_const_config]) + qpreserving_const_config_options = schema.QuantizationConfigOptions([qpreserving_const_config]) mp_cfg_list_16bit = [mp_cfg.clone_and_edit(activation_n_bits=16, signedness=Signedness.SIGNED) for mp_cfg in mixed_precision_cfg_list] @@ -228,7 +230,12 @@ def generate_tp_model(default_config: OpQuantizationConfig, # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpm = tp.TargetPlatformModel(default_configuration_options, add_metadata=True, name=name) + generated_tpm = schema.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=4, + tpc_patch_version=0, + tpc_platform_type=IMX500_TP_MODEL, + add_metadata=True, name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -243,60 +250,61 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet(OPSET_NO_QUANTIZATION, - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) - tp.OperatorsSet(OPSET_QUANTIZATION_PRESERVING, - default_qco.clone_and_edit(enable_activation_quantization=False, - quantization_preserving=True) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) - tp.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS, qpreserving_const_config_options) - tp.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS, - default_qco.clone_and_edit(enable_activation_quantization=False, - quantization_preserving=True, - supported_input_activation_n_bits=(8, 16)) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) - tp.OperatorsSet(OPSET_MERGE_OPS, const_configuration_options_inout16_per_tensor) + schema.OperatorsSet(OPSET_NO_QUANTIZATION, + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet(OPSET_QUANTIZATION_PRESERVING, + default_qco.clone_and_edit(enable_activation_quantization=False, + quantization_preserving=True) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS, qpreserving_const_config_options) + schema.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS, + default_qco.clone_and_edit(enable_activation_quantization=False, + quantization_preserving=True, + supported_input_activation_n_bits=(8, 16)) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet(OPSET_MERGE_OPS, const_configuration_options_inout16_per_tensor) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list + mp_cfg_list_16bit, - base_config=base_config) + mixed_precision_configuration_options = schema.QuantizationConfigOptions( + mixed_precision_cfg_list + mp_cfg_list_16bit, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = tp.OperatorsSet(OPSET_CONV, mixed_precision_configuration_options) - fc = tp.OperatorsSet(OPSET_FULLY_CONNECTED, mixed_precision_configuration_options) + conv = schema.OperatorsSet(OPSET_CONV, mixed_precision_configuration_options) + fc = schema.OperatorsSet(OPSET_FULLY_CONNECTED, mixed_precision_configuration_options) - tp.OperatorsSet(OPSET_BATCH_NORM, default_config_options_16bit) + schema.OperatorsSet(OPSET_BATCH_NORM, default_config_options_16bit) # Note: Operations sets without quantization configuration are useful for creating fusing patterns - any_relu = tp.OperatorsSet(OPSET_ANY_RELU, default_config_options_16bit) - add = tp.OperatorsSet(OPSET_ADD, const_configuration_options_inout16) - sub = tp.OperatorsSet(OPSET_SUB, const_configuration_options_inout16) - mul = tp.OperatorsSet(OPSET_MUL, const_configuration_options_inout16) - div = tp.OperatorsSet(OPSET_DIV, const_configuration_options) - tp.OperatorsSet(OPSET_MIN_MAX, const_configuration_options_inout16) - prelu = tp.OperatorsSet(OPSET_PRELU, default_config_options_16bit) - swish = tp.OperatorsSet(OPSET_SWISH, default_config_options_16bit) - sigmoid = tp.OperatorsSet(OPSET_SIGMOID, default_config_options_16bit) - tanh = tp.OperatorsSet(OPSET_TANH, default_config_options_16bit) - gelu = tp.OperatorsSet(OPSET_GELU, default_config_options_16bit) - hardsigmoid = tp.OperatorsSet(OPSET_HARDSIGMOID, default_config_options_16bit) - hardswish = tp.OperatorsSet(OPSET_HARDSWISH, default_config_options_16bit) + any_relu = schema.OperatorsSet(OPSET_ANY_RELU, default_config_options_16bit) + add = schema.OperatorsSet(OPSET_ADD, const_configuration_options_inout16) + sub = schema.OperatorsSet(OPSET_SUB, const_configuration_options_inout16) + mul = schema.OperatorsSet(OPSET_MUL, const_configuration_options_inout16) + div = schema.OperatorsSet(OPSET_DIV, const_configuration_options) + schema.OperatorsSet(OPSET_MIN_MAX, const_configuration_options_inout16) + prelu = schema.OperatorsSet(OPSET_PRELU, default_config_options_16bit) + swish = schema.OperatorsSet(OPSET_SWISH, default_config_options_16bit) + sigmoid = schema.OperatorsSet(OPSET_SIGMOID, default_config_options_16bit) + tanh = schema.OperatorsSet(OPSET_TANH, default_config_options_16bit) + gelu = schema.OperatorsSet(OPSET_GELU, default_config_options_16bit) + hardsigmoid = schema.OperatorsSet(OPSET_HARDSIGMOID, default_config_options_16bit) + hardswish = schema.OperatorsSet(OPSET_HARDSWISH, default_config_options_16bit) # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, - tanh, gelu, hardswish, hardsigmoid) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid, tanh, gelu, - hardswish, hardsigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, + tanh, gelu, hardswish, hardsigmoid) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid, tanh, gelu, + hardswish, hardsigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpm diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py index 656d57116..419a52c11 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \ @@ -53,7 +54,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -64,7 +65,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) no_quant_list = [tf.quantization.fake_quant_with_min_max_vars, tf.math.argmax, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py index 0fa7bda97..6a39a854a 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py @@ -25,6 +25,7 @@ import torch.nn.functional as F from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, hardsigmoid, leaky_relu, gelu +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \ BIAS @@ -50,7 +51,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -59,9 +60,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py index df1fca09b..232630f30 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py @@ -15,12 +15,11 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH -from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, QNNPACK_TP_MODEL +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -85,7 +84,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -99,7 +98,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = schema.OpQuantizationConfig( activation_quantization_method=tp.QuantizationMethod.UNIFORM, default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, @@ -139,12 +138,18 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpc = tp.TargetPlatformModel(default_configuration_options, name=name) + generated_tpc = schema.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=1, + tpc_patch_version=0, + tpc_platform_type=QNNPACK_TP_MODEL, + add_metadata=False, + name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the target platform model instance, and create them as below: @@ -153,17 +158,17 @@ def generate_tp_model(default_config: OpQuantizationConfig, # Pytorch supports the next fusing patterns: # [Conv, Relu], [Conv, BatchNorm], [Conv, BatchNorm, Relu], [Linear, Relu] # Source: # https://pytorch.org/docs/stable/quantization.html#model-preparation-for-quantization-eager-mode - conv = tp.OperatorsSet("Conv") - batchnorm = tp.OperatorsSet("BatchNorm") - relu = tp.OperatorsSet("Relu") - linear = tp.OperatorsSet("Linear") + conv = schema.OperatorsSet("Conv") + batchnorm = schema.OperatorsSet("BatchNorm") + relu = schema.OperatorsSet("Relu") + linear = schema.OperatorsSet("Linear") # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, batchnorm, relu]) - tp.Fusing([conv, batchnorm]) - tp.Fusing([conv, relu]) - tp.Fusing([linear, relu]) + schema.Fusing([conv, batchnorm, relu]) + schema.Fusing([conv, batchnorm]) + schema.Fusing([conv, relu]) + schema.Fusing([linear, relu]) return generated_tpc diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py index e212f46d7..73c6dd1f8 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py @@ -16,6 +16,7 @@ from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, \ KERAS_DEPTHWISE_KERNEL, BIAS @@ -41,7 +42,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='qnnpack_keras', tp_model=qnnpack_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -52,9 +53,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) with keras_tpc: tp.OperationsSetToLayers("Conv", diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py index 3c059bd42..f1a0e39ef 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py @@ -16,6 +16,7 @@ from torch.nn import Conv2d, Linear, BatchNorm2d, ConvTranspose2d, Hardtanh, ReLU, ReLU6 from torch.nn.functional import relu, relu6, hardtanh +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -35,7 +36,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='qnnpack_pytorch', tp_model=qnnpack_pytorch) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -44,9 +45,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py index bc766d2f9..b0a69c6e7 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py @@ -15,12 +15,11 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH -from model_compression_toolkit.target_platform_capabilities.constants import BIAS_ATTR, KERNEL_ATTR -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - QuantizationMethod, AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.constants import BIAS_ATTR, KERNEL_ATTR, TFLITE_TP_MODEL +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -83,7 +82,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -97,8 +96,8 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( - activation_quantization_method=QuantizationMethod.UNIFORM, + linear_eight_bits = schema.OpQuantizationConfig( + activation_quantization_method=tp.QuantizationMethod.UNIFORM, default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_n_bits=8, @@ -137,12 +136,18 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpc = tp.TargetPlatformModel(default_configuration_options, name=name) + generated_tpc = schema.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=1, + tpc_patch_version=0, + tpc_platform_type=TFLITE_TP_MODEL, + add_metadata=False, + name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -150,52 +155,52 @@ def generate_tp_model(default_config: OpQuantizationConfig, # In TFLite, the quantized operator specifications constraint operators quantization # differently. For more details: # https://www.tensorflow.org/lite/performance/quantization_spec#int8_quantized_operator_specifications - tp.OperatorsSet("NoQuantization", - tp.get_default_quantization_config_options().clone_and_edit( - quantization_preserving=True)) + schema.OperatorsSet("NoQuantization", + tp.get_default_quantization_config_options().clone_and_edit( + quantization_preserving=True)) fc_qco = tp.get_default_quantization_config_options() - fc = tp.OperatorsSet("FullyConnected", - fc_qco.clone_and_edit_weight_attribute(weights_per_channel_threshold=False)) - - tp.OperatorsSet("L2Normalization", - tp.get_default_quantization_config_options().clone_and_edit( - fixed_zero_point=0, fixed_scale=1 / 128)) - tp.OperatorsSet("LogSoftmax", - tp.get_default_quantization_config_options().clone_and_edit( - fixed_zero_point=127, fixed_scale=16 / 256)) - tp.OperatorsSet("Tanh", - tp.get_default_quantization_config_options().clone_and_edit( - fixed_zero_point=0, fixed_scale=1 / 128)) - tp.OperatorsSet("Softmax", - tp.get_default_quantization_config_options().clone_and_edit( - fixed_zero_point=-128, fixed_scale=1 / 256)) - tp.OperatorsSet("Logistic", - tp.get_default_quantization_config_options().clone_and_edit( - fixed_zero_point=-128, fixed_scale=1 / 256)) - - conv2d = tp.OperatorsSet("Conv2d") - kernel = tp.OperatorSetConcat(conv2d, fc) - - relu = tp.OperatorsSet("Relu") - elu = tp.OperatorsSet("Elu") - activations_to_fuse = tp.OperatorSetConcat(relu, elu) - - batch_norm = tp.OperatorsSet("BatchNorm") - bias_add = tp.OperatorsSet("BiasAdd") - add = tp.OperatorsSet("Add") - squeeze = tp.OperatorsSet("Squeeze", - qc_options=tp.get_default_quantization_config_options().clone_and_edit( - quantization_preserving=True)) + fc = schema.OperatorsSet("FullyConnected", + fc_qco.clone_and_edit_weight_attribute(weights_per_channel_threshold=False)) + + schema.OperatorsSet("L2Normalization", + tp.get_default_quantization_config_options().clone_and_edit( + fixed_zero_point=0, fixed_scale=1 / 128)) + schema.OperatorsSet("LogSoftmax", + tp.get_default_quantization_config_options().clone_and_edit( + fixed_zero_point=127, fixed_scale=16 / 256)) + schema.OperatorsSet("Tanh", + tp.get_default_quantization_config_options().clone_and_edit( + fixed_zero_point=0, fixed_scale=1 / 128)) + schema.OperatorsSet("Softmax", + tp.get_default_quantization_config_options().clone_and_edit( + fixed_zero_point=-128, fixed_scale=1 / 256)) + schema.OperatorsSet("Logistic", + tp.get_default_quantization_config_options().clone_and_edit( + fixed_zero_point=-128, fixed_scale=1 / 256)) + + conv2d = schema.OperatorsSet("Conv2d") + kernel = schema.OperatorSetConcat(conv2d, fc) + + relu = schema.OperatorsSet("Relu") + elu = schema.OperatorsSet("Elu") + activations_to_fuse = schema.OperatorSetConcat(relu, elu) + + batch_norm = schema.OperatorsSet("BatchNorm") + bias_add = schema.OperatorsSet("BiasAdd") + add = schema.OperatorsSet("Add") + squeeze = schema.OperatorsSet("Squeeze", + qc_options=tp.get_default_quantization_config_options().clone_and_edit( + quantization_preserving=True)) # ------------------- # # Fusions # ------------------- # # Source: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/grappler/optimizers/remapper - tp.Fusing([kernel, bias_add]) - tp.Fusing([kernel, bias_add, activations_to_fuse]) - tp.Fusing([conv2d, batch_norm, activations_to_fuse]) - tp.Fusing([conv2d, squeeze, activations_to_fuse]) - tp.Fusing([batch_norm, activations_to_fuse]) - tp.Fusing([batch_norm, add, activations_to_fuse]) + schema.Fusing([kernel, bias_add]) + schema.Fusing([kernel, bias_add, activations_to_fuse]) + schema.Fusing([conv2d, batch_norm, activations_to_fuse]) + schema.Fusing([conv2d, squeeze, activations_to_fuse]) + schema.Fusing([batch_norm, activations_to_fuse]) + schema.Fusing([batch_norm, add, activations_to_fuse]) return generated_tpc diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py index bc1ba09b4..a5101f819 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, BIAS @@ -46,7 +47,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='tflite_keras', tp_model=tflite_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -57,9 +58,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) with keras_tpc: tp.OperationsSetToLayers("NoQuantization", [AveragePooling2D, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py index 3f7f85d32..6af810100 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py @@ -16,6 +16,7 @@ from torch.nn import AvgPool2d, MaxPool2d from torch.nn.functional import avg_pool2d, max_pool2d, interpolate +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -37,7 +38,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='tflite_torch', tp_model=tflite_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -46,9 +47,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) with pytorch_tpc: tp.OperationsSetToLayers("NoQuantization", [AvgPool2d, diff --git a/tests/common_tests/helpers/generate_test_tp_model.py b/tests/common_tests/helpers/generate_test_tp_model.py index a1436d8f7..83faaa43e 100644 --- a/tests/common_tests/helpers/generate_test_tp_model.py +++ b/tests/common_tests/helpers/generate_test_tp_model.py @@ -15,11 +15,13 @@ import copy from typing import Dict, List, Any +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH, ACTIVATION_N_BITS_ATTRIBUTE, \ SUPPORTED_INPUT_ACTIVATION_NBITS_ATTRIBUTE from model_compression_toolkit.target_platform_capabilities.constants import OPS_SET_LIST, KERNEL_ATTR, BIAS_ATTR, \ WEIGHTS_N_BITS -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, QuantizationConfigOptions, Signedness +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import Signedness, OpQuantizationConfig, \ + QuantizationConfigOptions from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs, generate_tp_model import model_compression_toolkit as mct @@ -37,7 +39,8 @@ def generate_test_tp_model(edit_params_dict, name=""): base_config, op_cfg_list, default_config = get_op_quantization_configs() # separate weights attribute parameters from the requested param to edit - weights_params_names = [name for name in tp.AttributeQuantizationConfig.__init__.__code__.co_varnames if name != 'self'] + weights_params_names = [name for name in schema.AttributeQuantizationConfig.__init__.__code__.co_varnames if + name != 'self'] weights_params = {k: v for k, v in edit_params_dict.items() if k in weights_params_names} rest_params = {k: v for k, v in edit_params_dict.items() if k not in list(weights_params.keys())} @@ -104,8 +107,8 @@ def generate_tp_model_with_activation_mp(base_cfg, default_config, mp_bitwidth_c mixed_precision_cfg_list=mp_op_cfg_list, name=name) - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mp_op_cfg_list, - base_config=base_cfg) + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mp_op_cfg_list, + base_config=base_cfg) operator_sets_dict = {op_set.name: mixed_precision_configuration_options for op_set in base_tp_model.operator_set if op_set.name is not "NoQuantization"} @@ -121,12 +124,17 @@ def generate_tp_model_with_activation_mp(base_cfg, default_config, mp_bitwidth_c def generate_custom_test_tp_model(name: str, base_cfg: OpQuantizationConfig, - base_tp_model: tp.TargetPlatformModel, + base_tp_model: schema.TargetPlatformModel, operator_sets_dict: Dict[str, QuantizationConfigOptions] = None): + default_configuration_options = schema.QuantizationConfigOptions([base_cfg]) - default_configuration_options = tp.QuantizationConfigOptions([base_cfg]) - - custom_tp_model = tp.TargetPlatformModel(default_configuration_options, name=name) + custom_tp_model = schema.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False, + name=name) with custom_tp_model: for op_set in base_tp_model.operator_set: @@ -136,27 +144,26 @@ def generate_custom_test_tp_model(name: str, else: qc_options = op_set.qc_options - tp.OperatorsSet(op_set.name, qc_options) + schema.OperatorsSet(op_set.name, qc_options) existing_op_sets_names = [op_set.name for op_set in base_tp_model.operator_set] for op_set_name, op_set_qc_options in operator_sets_dict.items(): # Add new OperatorSets from the given operator_sets_dict if op_set_name not in existing_op_sets_names: - tp.OperatorsSet(op_set_name, op_set_qc_options) + schema.OperatorsSet(op_set_name, op_set_qc_options) for fusion in base_tp_model.fusing_patterns: - tp.Fusing(fusion.operator_groups_list) + schema.Fusing(fusion.operator_groups_list) return custom_tp_model def generate_test_tpc(name: str, - tp_model: tp.TargetPlatformModel, + tp_model: schema.TargetPlatformModel, base_tpc: tp.TargetPlatformCapabilities, op_sets_to_layer_add: Dict[str, List[Any]] = None, op_sets_to_layer_drop: Dict[str, List[Any]] = None, attr_mapping: Dict[str, Dict] = {}): - op_set_to_layers_list = base_tpc.op_sets_to_layers.op_sets_to_layers op_set_to_layers_dict = {op_set.name: op_set.layers for op_set in op_set_to_layers_list} @@ -174,7 +181,7 @@ def generate_test_tpc(name: str, # Remove empty op sets merged_dict = {op_set_name: layers for op_set_name, layers in merged_dict.items() if len(layers) == 0} - tpc = tp.TargetPlatformCapabilities(tp_model, name=name) + tpc = tp.TargetPlatformCapabilities(tp_model) with tpc: for op_set_name, layers in merged_dict.items(): @@ -190,22 +197,21 @@ def generate_test_attr_configs(default_cfg_nbits: int = 8, kernel_cfg_quantizatiom_method: tp.QuantizationMethod = tp.QuantizationMethod.POWER_OF_TWO, enable_kernel_weights_quantization: bool = True, kernel_lut_values_bitwidth: int = None): - - default_weight_attr_config = tp.AttributeQuantizationConfig( + default_weight_attr_config = schema.AttributeQuantizationConfig( weights_quantization_method=default_cfg_quantizatiom_method, weights_n_bits=default_cfg_nbits, weights_per_channel_threshold=False, enable_weights_quantization=False, lut_values_bitwidth=None) - kernel_base_config = tp.AttributeQuantizationConfig( + kernel_base_config = schema.AttributeQuantizationConfig( weights_quantization_method=kernel_cfg_quantizatiom_method, weights_n_bits=kernel_cfg_nbits, weights_per_channel_threshold=True, enable_weights_quantization=enable_kernel_weights_quantization, lut_values_bitwidth=kernel_lut_values_bitwidth) - bias_config = tp.AttributeQuantizationConfig( + bias_config = schema.AttributeQuantizationConfig( weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, weights_n_bits=FLOAT_BITWIDTH, weights_per_channel_threshold=False, @@ -217,22 +223,21 @@ def generate_test_attr_configs(default_cfg_nbits: int = 8, BIAS_CONFIG: bias_config} -def generate_test_op_qc(default_weight_attr_config: tp.AttributeQuantizationConfig, - kernel_base_config: tp.AttributeQuantizationConfig, - bias_config: tp.AttributeQuantizationConfig, +def generate_test_op_qc(default_weight_attr_config: schema.AttributeQuantizationConfig, + kernel_base_config: schema.AttributeQuantizationConfig, + bias_config: schema.AttributeQuantizationConfig, enable_activation_quantization: bool = True, activation_n_bits: int = 8, activation_quantization_method: tp.QuantizationMethod = tp.QuantizationMethod.POWER_OF_TWO): - - return tp.OpQuantizationConfig(enable_activation_quantization=enable_activation_quantization, - default_weight_attr_config=default_weight_attr_config, - attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, - BIAS_ATTR: bias_config}, - activation_n_bits=activation_n_bits, - supported_input_activation_n_bits=activation_n_bits, - activation_quantization_method=activation_quantization_method, - quantization_preserving=False, - fixed_scale=None, - fixed_zero_point=None, - simd_size=32, - signedness=Signedness.AUTO) + return schema.OpQuantizationConfig(enable_activation_quantization=enable_activation_quantization, + default_weight_attr_config=default_weight_attr_config, + attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, + BIAS_ATTR: bias_config}, + activation_n_bits=activation_n_bits, + supported_input_activation_n_bits=activation_n_bits, + activation_quantization_method=activation_quantization_method, + quantization_preserving=False, + fixed_scale=None, + fixed_zero_point=None, + simd_size=32, + signedness=Signedness.AUTO) diff --git a/tests/common_tests/test_tp_model.py b/tests/common_tests/test_tp_model.py index e84d0936d..ed8a52b59 100644 --- a/tests/common_tests/test_tp_model.py +++ b/tests/common_tests/test_tp_model.py @@ -16,16 +16,18 @@ import unittest import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.core.common import BaseNode from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR -from model_compression_toolkit.target_platform_capabilities.target_platform import get_default_quantization_config_options +from model_compression_toolkit.target_platform_capabilities.target_platform import \ + get_default_quantization_config_options from tests.common_tests.helpers.generate_test_tp_model import generate_test_attr_configs, generate_test_op_qc tp = mct.target_platform TEST_QC = generate_test_op_qc(**generate_test_attr_configs()) -TEST_QCO = tp.QuantizationConfigOptions([TEST_QC]) +TEST_QCO = schema.QuantizationConfigOptions([TEST_QC]) class TargetPlatformModelingTest(unittest.TestCase): @@ -36,27 +38,43 @@ def test_not_initialized_tp(self): self.assertEqual('Target platform model is not initialized.', str(e.exception)) def test_get_default_options(self): - with tp.TargetPlatformModel(TEST_QCO): + with schema.TargetPlatformModel(TEST_QCO, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False): self.assertEqual(tp.get_default_quantization_config_options(), TEST_QCO) def test_immutable_tp(self): - model = tp.TargetPlatformModel(TEST_QCO) + model = schema.TargetPlatformModel(TEST_QCO, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with self.assertRaises(Exception) as e: with model: - tp.OperatorsSet("opset") + schema.OperatorsSet("opset") model.operator_set = [] self.assertEqual('Immutable class. Can\'t edit attributes.', str(e.exception)) def test_default_options_more_than_single_qc(self): - test_qco = tp.QuantizationConfigOptions([TEST_QC, TEST_QC], base_config=TEST_QC) + test_qco = schema.QuantizationConfigOptions([TEST_QC, TEST_QC], base_config=TEST_QC) with self.assertRaises(Exception) as e: - tp.TargetPlatformModel(test_qco) - self.assertEqual('Default QuantizationConfigOptions must contain only one option', str(e.exception)) + schema.TargetPlatformModel(test_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) + self.assertEqual('Default QuantizationConfigOptions must contain exactly one option.', str(e.exception)) def test_tp_model_show(self): - tpm = tp.TargetPlatformModel(TEST_QCO) + tpm = schema.TargetPlatformModel(TEST_QCO, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with tpm: - a = tp.OperatorsSet("opA") + a = schema.OperatorsSet("opA") tpm.show() @@ -64,11 +82,16 @@ def test_tp_model_show(self): class OpsetTest(unittest.TestCase): def test_opset_qco(self): - hm = tp.TargetPlatformModel(TEST_QCO, name='test') + hm = schema.TargetPlatformModel(TEST_QCO, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False, + name='test') opset_name = "ops_3bit" with hm: qco_3bit = get_default_quantization_config_options().clone_and_edit(activation_n_bits=3) - tp.OperatorsSet(opset_name, qco_3bit) + schema.OperatorsSet(opset_name, qco_3bit) for op_qc in hm.get_config_options_by_operators_set(opset_name).quantization_config_list: self.assertEqual(op_qc.activation_n_bits, 3) @@ -80,23 +103,33 @@ def test_opset_qco(self): hm.default_qco) def test_opset_concat(self): - hm = tp.TargetPlatformModel(TEST_QCO, name='test') + hm = schema.TargetPlatformModel(TEST_QCO, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False, + name='test') with hm: - a = tp.OperatorsSet('opset_A') - b = tp.OperatorsSet('opset_B', - get_default_quantization_config_options().clone_and_edit(activation_n_bits=2)) - tp.OperatorsSet('opset_C') # Just add it without using it in concat - tp.OperatorSetConcat(a, b) + a = schema.OperatorsSet('opset_A') + b = schema.OperatorsSet('opset_B', + get_default_quantization_config_options().clone_and_edit(activation_n_bits=2)) + schema.OperatorsSet('opset_C') # Just add it without using it in concat + schema.OperatorSetConcat(a, b) self.assertEqual(len(hm.operator_set), 4) self.assertTrue(hm.is_opset_in_model("opset_A_opset_B")) self.assertTrue(hm.get_config_options_by_operators_set('opset_A_opset_B') is None) def test_non_unique_opset(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with self.assertRaises(Exception) as e: with hm: - tp.OperatorsSet("conv") - tp.OperatorsSet("conv") + schema.OperatorsSet("conv") + schema.OperatorsSet("conv") self.assertEqual('Operator Sets must have unique names.', str(e.exception)) @@ -104,28 +137,31 @@ class QCOptionsTest(unittest.TestCase): def test_empty_qc_options(self): with self.assertRaises(AssertionError) as e: - tp.QuantizationConfigOptions([]) - self.assertEqual("'QuantizationConfigOptions' requires at least one 'OpQuantizationConfig'. The provided list is empty.", - str(e.exception)) + schema.QuantizationConfigOptions([]) + self.assertEqual( + "'QuantizationConfigOptions' requires at least one 'OpQuantizationConfig'. The provided list is empty.", + str(e.exception)) def test_list_of_no_qc(self): with self.assertRaises(AssertionError) as e: - tp.QuantizationConfigOptions([TEST_QC, 3]) + schema.QuantizationConfigOptions([TEST_QC, 3]) self.assertEqual( 'Each option must be an instance of \'OpQuantizationConfig\', but found an object of type: .', str(e.exception)) def test_clone_and_edit_options(self): - modified_options = TEST_QCO.clone_and_edit(activation_n_bits=3).clone_and_edit_weight_attribute(attrs=[KERNEL_ATTR], - weights_n_bits=5) + modified_options = TEST_QCO.clone_and_edit(activation_n_bits=3).clone_and_edit_weight_attribute( + attrs=[KERNEL_ATTR], + weights_n_bits=5) self.assertEqual(modified_options.quantization_config_list[0].activation_n_bits, 3) - self.assertEqual(modified_options.quantization_config_list[0].attr_weights_configs_mapping[KERNEL_ATTR].weights_n_bits, 5) + self.assertEqual( + modified_options.quantization_config_list[0].attr_weights_configs_mapping[KERNEL_ATTR].weights_n_bits, 5) def test_qco_without_base_config(self): - tp.QuantizationConfigOptions([TEST_QC]) # Should work fine as it has only one qc. + schema.QuantizationConfigOptions([TEST_QC]) # Should work fine as it has only one qc. with self.assertRaises(Exception) as e: - tp.QuantizationConfigOptions([TEST_QC, TEST_QC]) # Should raise exception as base_config was not passed + schema.QuantizationConfigOptions([TEST_QC, TEST_QC]) # Should raise exception as base_config was not passed self.assertEqual( 'For multiple configurations, a \'base_config\' is required for non-mixed-precision optimization.', str(e.exception)) @@ -140,21 +176,31 @@ def test_get_qco_for_none_tpc(self): class FusingTest(unittest.TestCase): def test_fusing_single_opset(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with hm: - add = tp.OperatorsSet("add") + add = schema.OperatorsSet("add") with self.assertRaises(Exception) as e: - tp.Fusing([add]) + schema.Fusing([add]) self.assertEqual('Fusing can not be created for a single operators group', str(e.exception)) def test_fusing_contains(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with hm: - conv = tp.OperatorsSet("conv") - add = tp.OperatorsSet("add") - tanh = tp.OperatorsSet("tanh") - tp.Fusing([conv, add]) - tp.Fusing([conv, add, tanh]) + conv = schema.OperatorsSet("conv") + add = schema.OperatorsSet("add") + tanh = schema.OperatorsSet("tanh") + schema.Fusing([conv, add]) + schema.Fusing([conv, add, tanh]) self.assertEqual(len(hm.fusing_patterns), 2) f0, f1 = hm.fusing_patterns[0], hm.fusing_patterns[1] @@ -164,15 +210,20 @@ def test_fusing_contains(self): self.assertTrue(f1.contains(f1)) def test_fusing_contains_with_opset_concat(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with hm: - conv = tp.OperatorsSet("conv") - add = tp.OperatorsSet("add") - tanh = tp.OperatorsSet("tanh") - add_tanh = tp.OperatorSetConcat(add, tanh) - tp.Fusing([conv, add]) - tp.Fusing([conv, add_tanh]) - tp.Fusing([conv, add, tanh]) + conv = schema.OperatorsSet("conv") + add = schema.OperatorsSet("add") + tanh = schema.OperatorsSet("tanh") + add_tanh = schema.OperatorSetConcat(add, tanh) + schema.Fusing([conv, add]) + schema.Fusing([conv, add_tanh]) + schema.Fusing([conv, add, tanh]) self.assertEqual(len(hm.fusing_patterns), 3) f0, f1, f2 = hm.fusing_patterns[0], hm.fusing_patterns[1], hm.fusing_patterns[2] diff --git a/tests/keras_tests/exporter_tests/tflite_int8/imx500_int8_tp_model.py b/tests/keras_tests/exporter_tests/tflite_int8/imx500_int8_tp_model.py index 038dfc769..8e8f2eac4 100644 --- a/tests/keras_tests/exporter_tests/tflite_int8/imx500_int8_tp_model.py +++ b/tests/keras_tests/exporter_tests/tflite_int8/imx500_int8_tp_model.py @@ -17,6 +17,7 @@ import tensorflow as tf from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, BIAS, \ KERAS_DEPTHWISE_KERNEL, WEIGHTS_N_BITS @@ -32,8 +33,7 @@ Conv2DTranspose import model_compression_toolkit as mct -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, OpQuantizationConfig tp = mct.target_platform @@ -66,35 +66,41 @@ def generate_tp_model(default_config: OpQuantizationConfig, base_config: OpQuantizationConfig, mixed_precision_cfg_list: List[OpQuantizationConfig], name: str) -> TargetPlatformModel: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) - generated_tpc = tp.TargetPlatformModel(default_configuration_options, name=name) + default_configuration_options = schema.QuantizationConfigOptions( + [default_config]) + generated_tpc = schema.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False, name=name) with generated_tpc: - tp.OperatorsSet("NoQuantization", - tp.get_default_quantization_config_options() - .clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) - - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) - - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) - - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add") - sub = tp.OperatorsSet("Sub") - mul = tp.OperatorsSet("Mul") - div = tp.OperatorsSet("Div") - prelu = tp.OperatorsSet("PReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + schema.OperatorsSet("NoQuantization", + tp.get_default_quantization_config_options() + .clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) + + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add") + sub = schema.OperatorsSet("Sub") + mul = schema.OperatorsSet("Mul") + div = schema.OperatorsSet("Div") + prelu = schema.OperatorsSet("PReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpc @@ -104,8 +110,8 @@ def get_int8_tpc(edit_weights_params_dict={}, edit_act_params_dict={}) -> tp.Tar return generate_keras_tpc(name='int8_tpc', tp_model=default_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version='v1') +def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel): + keras_tpc = tp.TargetPlatformCapabilities(tp_model) with keras_tpc: tp.OperationsSetToLayers("NoQuantization", [Reshape, diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/bn_attributes_quantization_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/bn_attributes_quantization_test.py index 95ea6d019..8051b7154 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/bn_attributes_quantization_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/bn_attributes_quantization_test.py @@ -16,10 +16,11 @@ import numpy as np import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from mct_quantizers import QuantizationMethod, KerasQuantizationWrapper from model_compression_toolkit import DefaultDict from model_compression_toolkit.core.keras.constants import GAMMA, BETA -from model_compression_toolkit.target_platform_capabilities.target_platform import Signedness +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import Signedness from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS, BIAS_ATTR from tests.common_tests.helpers.generate_test_tp_model import generate_test_attr_configs, \ DEFAULT_WEIGHT_ATTR_CONFIG, KERNEL_BASE_CONFIG, generate_test_op_qc, BIAS_CONFIG @@ -52,46 +53,50 @@ def _generate_bn_quantized_tpm(quantize_linear): bias_config=attr_cfgs_dict[BIAS_CONFIG], enable_activation_quantization=False) - bn_op_qc = tp.OpQuantizationConfig(enable_activation_quantization=False, - default_weight_attr_config=default_attr_cfg, - attr_weights_configs_mapping={BETA: bn_attr_cfg, GAMMA: bn_attr_cfg}, - activation_n_bits=8, - supported_input_activation_n_bits=8, - activation_quantization_method=QuantizationMethod.POWER_OF_TWO, - quantization_preserving=False, - fixed_scale=None, - fixed_zero_point=None, - simd_size=32, - signedness=Signedness.AUTO) - - default_op_qc = tp.OpQuantizationConfig(enable_activation_quantization=False, - default_weight_attr_config=default_attr_cfg, - attr_weights_configs_mapping={}, - activation_n_bits=8, - supported_input_activation_n_bits=8, - activation_quantization_method=QuantizationMethod.POWER_OF_TWO, - quantization_preserving=False, - fixed_scale=None, - fixed_zero_point=None, - simd_size=32, - signedness=Signedness.AUTO) - - default_configuration_options = tp.QuantizationConfigOptions([default_op_qc]) - linear_configuration_options = tp.QuantizationConfigOptions([linear_op_qc]) - bn_configuration_options = tp.QuantizationConfigOptions([bn_op_qc]) - - generated_tpm = tp.TargetPlatformModel(default_configuration_options, name='bn_quantized_tpm') + bn_op_qc = schema.OpQuantizationConfig(enable_activation_quantization=False, + default_weight_attr_config=default_attr_cfg, + attr_weights_configs_mapping={BETA: bn_attr_cfg, GAMMA: bn_attr_cfg}, + activation_n_bits=8, + supported_input_activation_n_bits=8, + activation_quantization_method=QuantizationMethod.POWER_OF_TWO, + quantization_preserving=False, + fixed_scale=None, + fixed_zero_point=None, + simd_size=32, + signedness=Signedness.AUTO) + + default_op_qc = schema.OpQuantizationConfig(enable_activation_quantization=False, + default_weight_attr_config=default_attr_cfg, + attr_weights_configs_mapping={}, + activation_n_bits=8, + supported_input_activation_n_bits=8, + activation_quantization_method=QuantizationMethod.POWER_OF_TWO, + quantization_preserving=False, + fixed_scale=None, + fixed_zero_point=None, + simd_size=32, + signedness=Signedness.AUTO) + + default_configuration_options = schema.QuantizationConfigOptions([default_op_qc]) + linear_configuration_options = schema.QuantizationConfigOptions([linear_op_qc]) + bn_configuration_options = schema.QuantizationConfigOptions([bn_op_qc]) + + generated_tpm = schema.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False, name='bn_quantized_tpm') with generated_tpm: - - tp.OperatorsSet("Conv", linear_configuration_options) - tp.OperatorsSet("BN", bn_configuration_options) + schema.OperatorsSet("Conv", linear_configuration_options) + schema.OperatorsSet("BN", bn_configuration_options) return generated_tpm def _generate_bn_quantized_tpc(tp_model): - tpc = tp.TargetPlatformCapabilities(tp_model, name='bn_quantized_tpc') + tpc = tp.TargetPlatformCapabilities(tp_model) with tpc: tp.OperationsSetToLayers("Conv", [layers.Conv2D], @@ -134,11 +139,13 @@ def create_networks(self): return tf.keras.models.Model(inputs=inputs, outputs=x) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): - float_bn_layer = get_layers_from_model_by_type(float_model, layers.BatchNormalization, include_wrapped_layers=False) + float_bn_layer = get_layers_from_model_by_type(float_model, layers.BatchNormalization, + include_wrapped_layers=False) self.unit_test.assertTrue(len(float_bn_layer) == 1, "Expecting the float model to have exactly 1 BN layer") float_bn_layer = float_bn_layer[0] - quant_bn_layer = get_layers_from_model_by_type(quantized_model, layers.BatchNormalization, include_wrapped_layers=True) + quant_bn_layer = get_layers_from_model_by_type(quantized_model, layers.BatchNormalization, + include_wrapped_layers=True) self.unit_test.assertTrue(len(quant_bn_layer) == 1, "Expecting the quantized model to have exactly 1 BN layer") quant_bn_layer = quant_bn_layer[0] @@ -155,7 +162,8 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= f_beta = f_beta[0] q_beta = q_bn_weights.get(BETA) self.unit_test.assertTrue(q_beta is not None, "Expecting quantized model BN layer to have a BETA attribute") - self.unit_test.assertTrue(np.any(f_beta != q_beta), "Float and quantized BETA attributes are expected to have different values") + self.unit_test.assertTrue(np.any(f_beta != q_beta), + "Float and quantized BETA attributes are expected to have different values") f_gamma = [w for w in f_bn_weights if GAMMA in w.name] self.unit_test.assertTrue(len(f_gamma) == 1, "Expecting float model BN layer to have a GAMMA attribute") diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/const_quantization_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/const_quantization_test.py index df8929552..5d61c9d12 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/const_quantization_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/const_quantization_test.py @@ -17,6 +17,7 @@ import numpy as np import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.core import MixedPrecisionQuantizationConfig from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v4.tp_model import generate_tp_model, \ get_op_quantization_configs @@ -48,11 +49,11 @@ def create_const_quant_tpc(qmethod): default_weight_attr_config=default_cfg.default_weight_attr_config.clone_and_edit( enable_weights_quantization=True, weights_per_channel_threshold=True, weights_n_bits=16, weights_quantization_method=qmethod)) - const_configuration_options = tp.QuantizationConfigOptions([const_config]) + const_configuration_options = schema.QuantizationConfigOptions([const_config]) const_merge_config = default_cfg.clone_and_edit( default_weight_attr_config=default_cfg.default_weight_attr_config.clone_and_edit( weights_per_channel_threshold=False)) - const_merge_configuration_options = tp.QuantizationConfigOptions([const_merge_config]) + const_merge_configuration_options = schema.QuantizationConfigOptions([const_merge_config]) operator_sets_dict = {} operator_sets_dict["Add"] = const_configuration_options @@ -187,9 +188,10 @@ def create_networks(self): x1 = layers.Add()([np.random.random((1, x.shape[-1])), x, np.random.random((1, x.shape[-1]))]) x2 = layers.Multiply()([x, np.random.random((1, x.shape[-1])), x, np.random.random((1, x.shape[-1]))]) x3 = tf.add_n([x1, as_const(x), x2]) - x1 = tf.reshape(tf.stack([as_const(x1), x1, as_const(x1)], axis=1), (-1, 3*x1.shape[1], x1.shape[2], x1.shape[3])) + x1 = tf.reshape(tf.stack([as_const(x1), x1, as_const(x1)], axis=1), + (-1, 3 * x1.shape[1], x1.shape[2], x1.shape[3])) x = tf.concat([x1, x2, as_const(x3), x3], 1) - ind_select_const = np.zeros((192*32, 38)) + ind_select_const = np.zeros((192 * 32, 38)) ind_select_const[4, :] = 100 x1 = tf.add(x, ind_select_const.reshape((192, 32, 38))) inds = tf.argmax(tf.reshape(x1, (-1, 192 * 32, 38)), axis=1) @@ -208,7 +210,8 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= self.unit_test.assertTrue(np.isclose(cs, 1, atol=0.01), msg=f'fail cosine similarity check:{cs}') # check quantization layers: - for op in [tf.concat, tf.stack, layers.Add, layers.Multiply, layers.Concatenate, tf.gather, tf.compat.v1.gather]: + for op in [tf.concat, tf.stack, layers.Add, layers.Multiply, layers.Concatenate, tf.gather, + tf.compat.v1.gather]: for qlayer in get_layers_from_model_by_type(quantized_model, op): self.unit_test.assertTrue(isinstance(qlayer, KerasQuantizationWrapper), msg=f"{op} should be quantized.") diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py b/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py index acafc9dd6..0d8bae6e5 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py @@ -18,6 +18,7 @@ import tensorflow as tf from keras.activations import sigmoid, softmax +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from mct_quantizers import KerasActivationQuantizationHolder from model_compression_toolkit import DefaultDict from model_compression_toolkit.core.keras.constants import SIGMOID, SOFTMAX, BIAS @@ -28,7 +29,8 @@ from keras import backend as K import model_compression_toolkit as mct -from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \ + ResourceUtilization from model_compression_toolkit.core.common.user_info import UserInformation from tests.keras_tests.tpc_keras import get_tpc_with_activation_mp_keras from tests.keras_tests.utils import get_layers_from_model_by_type @@ -94,14 +96,15 @@ def verify_quantization(self, quantized_model, input_x, weights_layers_idx, weig activation_layers_idx, unique_tensor_values): # verify weights quantization conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D) - for conv_layer, num_channels in zip(conv_layers,weights_layers_channels_size): + for conv_layer, num_channels in zip(conv_layers, weights_layers_channels_size): for j in range(num_channels): # quantized per channel self.unit_test.assertTrue( np.unique(conv_layer.get_quantized_weights()['kernel'][:, :, :, j]).flatten().shape[ 0] <= unique_tensor_values) # verify activation quantization - holder_layers = get_layers_from_model_by_type(quantized_model, KerasActivationQuantizationHolder)[1:] # skip the input layer + holder_layers = get_layers_from_model_by_type(quantized_model, KerasActivationQuantizationHolder)[ + 1:] # skip the input layer inp = quantized_model.input # input placeholder out = [layer.output for layer in holder_layers] # all layer outputs get_outputs = K.function([inp], out) @@ -135,7 +138,7 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= class MixedPrecisionActivationSearch4BitsAvgTest(MixedPrecisionActivationBaseTest): def __init__(self, unit_test): - super().__init__(unit_test, activation_layers_idx=[2,4]) + super().__init__(unit_test, activation_layers_idx=[2, 4]) def get_resource_utilization(self): # resource utilization is for 4 bits on average @@ -260,7 +263,7 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= # activation bitwidth for each layer would be 4-bit, this assertion tests the expected result for this specific # test with its current setup (therefore, we don't check the relu layer's bitwidth) holder_layer = get_layers_from_model_by_type(quantized_model, KerasActivationQuantizationHolder)[0] - self.unit_test.assertTrue(holder_layer.activation_holder_quantizer.get_config()['num_bits']==4) + self.unit_test.assertTrue(holder_layer.activation_holder_quantizer.get_config()['num_bits'] == 4) class MixedPrecisionActivationSplitLayerTest(MixedPrecisionActivationBaseTest): @@ -640,24 +643,29 @@ def get_tpc(self): [c.clone_and_edit(enable_activation_quantization=False) for c in mixed_precision_cfg_list] cfg = mixed_precision_cfg_list[0] - act_mixed_cfg = tp.QuantizationConfigOptions( + act_mixed_cfg = schema.QuantizationConfigOptions( [act_eight_bit_cfg, act_four_bit_cfg, act_two_bit_cfg], base_config=act_eight_bit_cfg, ) - weight_mixed_cfg = tp.QuantizationConfigOptions( + weight_mixed_cfg = schema.QuantizationConfigOptions( mixed_precision_cfg_list, base_config=cfg, ) - tp_model = tp.TargetPlatformModel(tp.QuantizationConfigOptions([cfg], cfg), - name="mp_activation_conf_weights_test") + tp_model = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([cfg], cfg), + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False, + name="mp_activation_conf_weights_test") with tp_model: - tp.OperatorsSet("Activations", act_mixed_cfg) - tp.OperatorsSet("Weights", weight_mixed_cfg) + schema.OperatorsSet("Activations", act_mixed_cfg) + schema.OperatorsSet("Weights", weight_mixed_cfg) - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name="mp_activation_conf_weights_test") + keras_tpc = tp.TargetPlatformCapabilities(tp_model) with keras_tpc: tp.OperationsSetToLayers( diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/slicing_op_lambda_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/slicing_op_lambda_test.py index 315884be0..865151f8d 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/slicing_op_lambda_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/slicing_op_lambda_test.py @@ -13,8 +13,8 @@ # limitations under the License. # ============================================================================== import tensorflow as tf +from mct_quantizers.common.quant_info import QuantizationMethod -import model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config from tests.keras_tests.feature_networks_tests.base_keras_feature_test import BaseKerasFeatureNetworkTest from packaging import version @@ -39,8 +39,8 @@ def __init__(self, unit_test): def get_quantization_config(self): return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE, - model_compression_toolkit.target_platform_capabilities.target_platform.QuantizationMethod.POWER_OF_TWO, - model_compression_toolkit.target_platform_capabilities.target_platform.QuantizationMethod.POWER_OF_TWO, + QuantizationMethod.POWER_OF_TWO, + QuantizationMethod.POWER_OF_TWO, 16, 16, False, False, True) def create_networks(self): diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/tanh_activation_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/tanh_activation_test.py index d15847890..bdd70fffd 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/tanh_activation_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/tanh_activation_test.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -import model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config +from mct_quantizers.common.quant_info import QuantizationMethod + from tests.common_tests.base_feature_test import BaseFeatureNetworkTest import model_compression_toolkit as mct import tensorflow as tf @@ -30,8 +31,8 @@ def __init__(self, unit_test): def get_quantization_config(self): return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE, - model_compression_toolkit.target_platform_capabilities.target_platform.QuantizationMethod.POWER_OF_TWO, - model_compression_toolkit.target_platform_capabilities.target_platform.QuantizationMethod.POWER_OF_TWO, + QuantizationMethod.POWER_OF_TWO, + QuantizationMethod.POWER_OF_TWO, 16, 16, True, True, True) def create_networks(self): diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py b/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py index e1fb7c5b1..1e6f06deb 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py @@ -17,6 +17,7 @@ import numpy as np import tensorflow as tf +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from mct_quantizers import KerasQuantizationWrapper from model_compression_toolkit.core.keras.constants import KERNEL from model_compression_toolkit.defaultdict import DefaultDict @@ -177,22 +178,28 @@ def get_tpc(self): two_bit_cfg = mixed_precision_cfg_list[2] - weight_mixed_cfg = tp.QuantizationConfigOptions( + weight_mixed_cfg = schema.QuantizationConfigOptions( mixed_precision_cfg_list, base_config=cfg, ) - weight_fixed_cfg = tp.QuantizationConfigOptions( + weight_fixed_cfg = schema.QuantizationConfigOptions( [two_bit_cfg], base_config=two_bit_cfg, ) - tp_model = tp.TargetPlatformModel(weight_fixed_cfg, name="mp_part_weights_layers_test") + tp_model = schema.TargetPlatformModel( + weight_fixed_cfg, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False, + name="mp_part_weights_layers_test") with tp_model: - tp.OperatorsSet("Weights_mp", weight_mixed_cfg) - tp.OperatorsSet("Weights_fixed", weight_fixed_cfg) + schema.OperatorsSet("Weights_mp", weight_mixed_cfg) + schema.OperatorsSet("Weights_fixed", weight_fixed_cfg) - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name="mp_part_weights_layers_test") + keras_tpc = tp.TargetPlatformCapabilities(tp_model) with keras_tpc: tp.OperationsSetToLayers( @@ -505,24 +512,29 @@ def get_tpc(self): [c.clone_and_edit(enable_activation_quantization=False) for c in mixed_precision_cfg_list] cfg = mixed_precision_cfg_list[0] - act_mixed_cfg = tp.QuantizationConfigOptions( + act_mixed_cfg = schema.QuantizationConfigOptions( [act_eight_bit_cfg, act_four_bit_cfg, act_two_bit_cfg], base_config=act_eight_bit_cfg, ) - weight_mixed_cfg = tp.QuantizationConfigOptions( + weight_mixed_cfg = schema.QuantizationConfigOptions( mixed_precision_cfg_list, base_config=cfg, ) - tp_model = tp.TargetPlatformModel(tp.QuantizationConfigOptions([cfg], cfg), - name="mp_weights_conf_act_test") + tp_model = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([cfg], cfg), + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False, + name="mp_weights_conf_act_test") with tp_model: - tp.OperatorsSet("Activations", act_mixed_cfg) - tp.OperatorsSet("Weights", weight_mixed_cfg) + schema.OperatorsSet("Activations", act_mixed_cfg) + schema.OperatorsSet("Weights", weight_mixed_cfg) - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name="mp_weights_conf_act_test") + keras_tpc = tp.TargetPlatformCapabilities(tp_model) with keras_tpc: tp.OperationsSetToLayers( diff --git a/tests/keras_tests/function_tests/test_custom_layer.py b/tests/keras_tests/function_tests/test_custom_layer.py index 33ea3be4e..b56f1828b 100644 --- a/tests/keras_tests/function_tests/test_custom_layer.py +++ b/tests/keras_tests/function_tests/test_custom_layer.py @@ -18,7 +18,8 @@ import tensorflow as tf import model_compression_toolkit as mct -from model_compression_toolkit.target_platform_capabilities.target_platform import Signedness +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import Signedness from model_compression_toolkit.target_platform_capabilities.constants import BIAS_ATTR, KERNEL_ATTR from tests.common_tests.helpers.generate_test_tp_model import generate_test_attr_configs, DEFAULT_WEIGHT_ATTR_CONFIG, \ KERNEL_BASE_CONFIG, BIAS_CONFIG @@ -63,31 +64,35 @@ def get_tpc(): """ tp = mct.target_platform attr_cfg = generate_test_attr_configs(kernel_lut_values_bitwidth=0) - base_cfg = tp.OpQuantizationConfig(activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, - enable_activation_quantization=True, - activation_n_bits=32, - supported_input_activation_n_bits=32, - default_weight_attr_config=attr_cfg[DEFAULT_WEIGHT_ATTR_CONFIG], - attr_weights_configs_mapping={}, - quantization_preserving=False, - fixed_scale=1.0, - fixed_zero_point=0, - simd_size=32, - signedness=Signedness.AUTO) - - default_configuration_options = tp.QuantizationConfigOptions([base_cfg]) - tp_model = tp.TargetPlatformModel(default_configuration_options) + base_cfg = schema.OpQuantizationConfig(activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, + enable_activation_quantization=True, + activation_n_bits=32, + supported_input_activation_n_bits=32, + default_weight_attr_config=attr_cfg[DEFAULT_WEIGHT_ATTR_CONFIG], + attr_weights_configs_mapping={}, + quantization_preserving=False, + fixed_scale=1.0, + fixed_zero_point=0, + simd_size=32, + signedness=Signedness.AUTO) + + default_configuration_options = schema.QuantizationConfigOptions([base_cfg]) + tp_model = schema.TargetPlatformModel(default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with tp_model: default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) tpc = tp.TargetPlatformCapabilities(tp_model) with tpc: # No need to quantize Flatten and Dropout layers tp.OperationsSetToLayers("NoQuantization", [CustomIdentity, - tp.LayerFilterParams(CustomIdentityWithArg, dummy_arg=0),]) + tp.LayerFilterParams(CustomIdentityWithArg, dummy_arg=0), ]) return tpc @@ -106,7 +111,8 @@ def test_custom_layer_in_tpc(self): # verify the custom layer is in the quantized model self.assertTrue(isinstance(q_model.layers[2], CustomIdentity), 'Custom layer should be in the quantized model') - self.assertTrue(isinstance(q_model.layers[3], CustomIdentityWithArg), 'Custom layer should be in the quantized model') + self.assertTrue(isinstance(q_model.layers[3], CustomIdentityWithArg), + 'Custom layer should be in the quantized model') # verify the custom layer isn't quantized self.assertTrue(len(q_model.layers) == 4, 'Quantized model should have only 3 layers: Input, KerasActivationQuantizationHolder, CustomIdentity & CustomIdentityWithArg') diff --git a/tests/keras_tests/function_tests/test_hmse_error_method.py b/tests/keras_tests/function_tests/test_hmse_error_method.py index 24b7eff49..6d1f0f586 100644 --- a/tests/keras_tests/function_tests/test_hmse_error_method.py +++ b/tests/keras_tests/function_tests/test_hmse_error_method.py @@ -19,6 +19,7 @@ from tensorflow.keras import layers import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit import DefaultDict from model_compression_toolkit.core import QuantizationConfig from model_compression_toolkit.constants import THRESHOLD, RANGE_MAX, NUM_QPARAM_HESSIAN_SAMPLES @@ -29,7 +30,7 @@ calculate_quantization_params from model_compression_toolkit.core.keras.constants import KERNEL, GAMMA from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, KERAS_KERNEL, BIAS -from model_compression_toolkit.target_platform_capabilities.target_platform import AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import AttributeQuantizationConfig from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import generate_keras_tpc from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO from model_compression_toolkit.core.keras.keras_implementation import KerasImplementation @@ -86,7 +87,8 @@ def _setup_with_args(self, quant_method, per_channel, running_gptq=True, tpc_fn= representative_dataset, lambda name, _tp: tpc_fn(quant_method, per_channel), qc=self.qc, - running_gptq=running_gptq # to enable HMSE in params calculation if needed + running_gptq=running_gptq + # to enable HMSE in params calculation if needed ) self.his = HessianInfoService(graph=self.graph, fw_impl=self.keras_impl) @@ -123,42 +125,36 @@ def _run_node_verification(node_type): _run_node_verification(layers.Dense) def test_pot_threshold_selection_hmse_per_channel(self): - self._setup_with_args(quant_method=mct.target_platform.QuantizationMethod.POWER_OF_TWO, per_channel=True) calculate_quantization_params(self.graph, fw_impl=self.keras_impl, repr_data_gen_fn=representative_dataset, hessian_info_service=self.his, num_hessian_samples=1) self._verify_params_calculation_execution(THRESHOLD) def test_pot_threshold_selection_hmse_per_tensor(self): - self._setup_with_args(quant_method=mct.target_platform.QuantizationMethod.POWER_OF_TWO, per_channel=False) calculate_quantization_params(self.graph, fw_impl=self.keras_impl, repr_data_gen_fn=representative_dataset, hessian_info_service=self.his, num_hessian_samples=1) self._verify_params_calculation_execution(THRESHOLD) def test_symmetric_threshold_selection_hmse_per_channel(self): - self._setup_with_args(quant_method=mct.target_platform.QuantizationMethod.SYMMETRIC, per_channel=True) calculate_quantization_params(self.graph, fw_impl=self.keras_impl, repr_data_gen_fn=representative_dataset, hessian_info_service=self.his, num_hessian_samples=1) self._verify_params_calculation_execution(THRESHOLD) def test_symmetric_threshold_selection_hmse_per_tensor(self): - self._setup_with_args(quant_method=mct.target_platform.QuantizationMethod.SYMMETRIC, per_channel=False) calculate_quantization_params(self.graph, fw_impl=self.keras_impl, repr_data_gen_fn=representative_dataset, hessian_info_service=self.his, num_hessian_samples=1) self._verify_params_calculation_execution(THRESHOLD) def test_usniform_threshold_selection_hmse_per_channel(self): - self._setup_with_args(quant_method=mct.target_platform.QuantizationMethod.UNIFORM, per_channel=True) calculate_quantization_params(self.graph, fw_impl=self.keras_impl, repr_data_gen_fn=representative_dataset, hessian_info_service=self.his, num_hessian_samples=1) self._verify_params_calculation_execution(RANGE_MAX) def test_uniform_threshold_selection_hmse_per_tensor(self): - self._setup_with_args(quant_method=mct.target_platform.QuantizationMethod.UNIFORM, per_channel=False) calculate_quantization_params(self.graph, fw_impl=self.keras_impl, repr_data_gen_fn=representative_dataset, hessian_info_service=self.his, num_hessian_samples=1) @@ -169,23 +165,28 @@ def test_threshold_selection_hmse_no_gptq(self): self._setup_with_args(quant_method=mct.target_platform.QuantizationMethod.SYMMETRIC, per_channel=True, running_gptq=False) self.assertTrue('The HMSE error method for parameters selection is only supported when running GPTQ ' - 'optimization due to long execution time that is not suitable for basic PTQ.' in e.exception.args[0]) + 'optimization due to long execution time that is not suitable for basic PTQ.' in + e.exception.args[0]) def test_threshold_selection_hmse_no_kernel_attr(self): def _generate_bn_quantization_tpc(quant_method, per_channel): cfg, _, _ = get_op_quantization_configs() - conv_qco = tp.QuantizationConfigOptions([cfg], base_config=cfg) + conv_qco = schema.QuantizationConfigOptions([cfg], base_config=cfg) # enable BN attributes quantization using the bn_qco = conv_qco.clone_and_edit(attr_weights_configs_mapping= {GAMMA: AttributeQuantizationConfig(weights_n_bits=8, enable_weights_quantization=True)}) - tp_model = tp.TargetPlatformModel(conv_qco) + tp_model = schema.TargetPlatformModel(conv_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with tp_model: - tp.OperatorsSet("Linear", conv_qco) - tp.OperatorsSet("BN", bn_qco) + schema.OperatorsSet("Linear", conv_qco) + schema.OperatorsSet("BN", bn_qco) tpc = tp.TargetPlatformCapabilities(tp_model) diff --git a/tests/keras_tests/function_tests/test_layer_fusing.py b/tests/keras_tests/function_tests/test_layer_fusing.py index 8bf3c5c5e..0c8a5b2e6 100644 --- a/tests/keras_tests/function_tests/test_layer_fusing.py +++ b/tests/keras_tests/function_tests/test_layer_fusing.py @@ -2,6 +2,7 @@ import numpy as np import tensorflow as tf +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.core import DEFAULTCONFIG from model_compression_toolkit.core.common.fusion.layer_fusing import fusion from model_compression_toolkit.core.common.quantization.set_node_quantization_config import \ @@ -9,7 +10,8 @@ from model_compression_toolkit.core.common.substitutions.apply_substitutions import substitute from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO from model_compression_toolkit.core.keras.keras_implementation import KerasImplementation -from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs +from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import \ + get_op_quantization_configs import model_compression_toolkit as mct from tests.common_tests.helpers.prep_graph_for_func_test import prepare_graph_with_configs @@ -79,10 +81,16 @@ def create_network_4(input_shape): def generate_base_tpc(): base_config, mixed_precision_cfg_list, default_config = get_op_quantization_configs() - default_configuration_options = tp.QuantizationConfigOptions([default_config]) - generated_tp = tp.TargetPlatformModel(default_configuration_options, name='layer_fusing_test') - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + default_configuration_options = schema.QuantizationConfigOptions( + [default_config]) + generated_tp = schema.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False, name='layer_fusing_test') + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) return generated_tp, mixed_precision_configuration_options @@ -90,12 +98,12 @@ def generate_base_tpc(): def get_tpc_1(): generated_tp, mixed_precision_configuration_options = generate_base_tpc() with generated_tp: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - any_relu = tp.OperatorsSet("AnyReLU") + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + any_relu = schema.OperatorsSet("AnyReLU") # Define fusions - tp.Fusing([conv, any_relu]) + schema.Fusing([conv, any_relu]) - keras_tpc = tp.TargetPlatformCapabilities(generated_tp, name='layer_fusing_test') + keras_tpc = tp.TargetPlatformCapabilities(generated_tp) with keras_tpc: tp.OperationsSetToLayers("Conv", [Conv2D]) tp.OperationsSetToLayers("AnyReLU", [tf.nn.relu, @@ -107,16 +115,16 @@ def get_tpc_1(): def get_tpc_2(): generated_tp, mixed_precision_configuration_options = generate_base_tpc() with generated_tp: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - any_relu = tp.OperatorsSet("AnyReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid, tanh) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + any_relu = schema.OperatorsSet("AnyReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid, tanh) # Define fusions - tp.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([conv, activations_after_conv_to_fuse]) - keras_tpc = tp.TargetPlatformCapabilities(generated_tp, name='layer_fusing_test') + keras_tpc = tp.TargetPlatformCapabilities(generated_tp) with keras_tpc: tp.OperationsSetToLayers("Conv", [Conv2D, DepthwiseConv2D]) tp.OperationsSetToLayers("AnyReLU", [tf.nn.relu, @@ -131,12 +139,12 @@ def get_tpc_2(): def get_tpc_3(): generated_tp, mixed_precision_configuration_options = generate_base_tpc() with generated_tp: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - any_relu = tp.OperatorsSet("AnyReLU") + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + any_relu = schema.OperatorsSet("AnyReLU") # Define fusions - tp.Fusing([conv, any_relu]) + schema.Fusing([conv, any_relu]) - keras_tpc = tp.TargetPlatformCapabilities(generated_tp, name='layer_fusing_test') + keras_tpc = tp.TargetPlatformCapabilities(generated_tp) with keras_tpc: tp.OperationsSetToLayers("Conv", [Conv2D]) tp.OperationsSetToLayers("AnyReLU", [tf.nn.relu, @@ -148,19 +156,19 @@ def get_tpc_3(): def get_tpc_4(): generated_tp, mixed_precision_configuration_options = generate_base_tpc() with generated_tp: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add") - swish = tp.OperatorsSet("Swish") - activations_to_fuse = tp.OperatorSetConcat(any_relu, swish) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add") + swish = schema.OperatorsSet("Swish") + activations_to_fuse = schema.OperatorSetConcat(any_relu, swish) # Define fusions - tp.Fusing([conv, activations_to_fuse]) - tp.Fusing([conv, add, activations_to_fuse]) - tp.Fusing([conv, activations_to_fuse, add]) - tp.Fusing([fc, activations_to_fuse]) + schema.Fusing([conv, activations_to_fuse]) + schema.Fusing([conv, add, activations_to_fuse]) + schema.Fusing([conv, activations_to_fuse, add]) + schema.Fusing([fc, activations_to_fuse]) - keras_tpc = tp.TargetPlatformCapabilities(generated_tp, name='layer_fusing_test') + keras_tpc = tp.TargetPlatformCapabilities(generated_tp) with keras_tpc: tp.OperationsSetToLayers("Conv", [Conv2D]) tp.OperationsSetToLayers("FullyConnected", [Dense]) @@ -216,7 +224,8 @@ def test_layer_fusing_3(self): self._compare(fusion_graph.fused_nodes, expected_fusions) def test_layer_fusing_4(self): - expected_fusions = [[Conv2D, Activation, Add], [Conv2D, Activation, Add], [Conv2D, Activation], [Conv2D, ReLU, Add], [Dense, tf.nn.silu], [Dense, Activation]] + expected_fusions = [[Conv2D, Activation, Add], [Conv2D, Activation, Add], [Conv2D, Activation], + [Conv2D, ReLU, Add], [Dense, tf.nn.silu], [Dense, Activation]] model = create_network_4(INPUT_SHAPE) fusion_graph = prepare_graph_with_configs(model, KerasImplementation(), DEFAULT_KERAS_INFO, diff --git a/tests/keras_tests/function_tests/test_node_quantization_configurations.py b/tests/keras_tests/function_tests/test_node_quantization_configurations.py index 462a9b0e8..82f2f4069 100644 --- a/tests/keras_tests/function_tests/test_node_quantization_configurations.py +++ b/tests/keras_tests/function_tests/test_node_quantization_configurations.py @@ -23,7 +23,7 @@ power_of_two_selection_histogram from model_compression_toolkit.core.common.quantization.quantizers.uniform_quantizers import power_of_two_quantizer from model_compression_toolkit.core.keras.constants import KERNEL, BIAS -from model_compression_toolkit.target_platform_capabilities.target_platform import AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import AttributeQuantizationConfig from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs diff --git a/tests/keras_tests/non_parallel_tests/test_keras_tp_model.py b/tests/keras_tests/non_parallel_tests/test_keras_tp_model.py index 61642d29a..6f4478aff 100644 --- a/tests/keras_tests/non_parallel_tests/test_keras_tp_model.py +++ b/tests/keras_tests/non_parallel_tests/test_keras_tp_model.py @@ -22,6 +22,7 @@ from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2 +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.core.common import BaseNode from tests.common_tests.helpers.generate_test_tp_model import generate_test_op_qc, generate_test_attr_configs @@ -47,9 +48,8 @@ tp = mct.target_platform - TEST_QC = generate_test_op_qc(**generate_test_attr_configs()) -TEST_QCO = tp.QuantizationConfigOptions([TEST_QC]) +TEST_QCO = schema.QuantizationConfigOptions([TEST_QC]) def get_node(layer) -> BaseNode: @@ -104,9 +104,14 @@ def test_keras_layers_with_params(self): self.assertFalse(get_node(conv).is_match_filter_params(conv_filter_contains)) def test_get_layers_by_op(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with hm: - op_obj = tp.OperatorsSet('opsetA') + op_obj = schema.OperatorsSet('opsetA') fw_tp = TargetPlatformCapabilities(hm) with fw_tp: opset_layers = [Conv2D, LayerFilterParams(ReLU, max_value=2)] @@ -116,11 +121,16 @@ def test_get_layers_by_op(self): self.assertEqual(fw_tp.get_layers_by_opset_name('nonExistingOpsetName'), None) def test_get_layers_by_opconcat(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with hm: - op_obj_a = tp.OperatorsSet('opsetA') - op_obj_b = tp.OperatorsSet('opsetB') - op_concat = tp.OperatorSetConcat(op_obj_a, op_obj_b) + op_obj_a = schema.OperatorsSet('opsetA') + op_obj_b = schema.OperatorsSet('opsetB') + op_concat = schema.OperatorSetConcat(op_obj_a, op_obj_b) fw_tp = TargetPlatformCapabilities(hm) with fw_tp: @@ -133,10 +143,15 @@ def test_get_layers_by_opconcat(self): self.assertEqual(fw_tp.get_layers_by_opset(op_concat), opset_layers_a + opset_layers_b) def test_layer_attached_to_multiple_opsets(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with hm: - tp.OperatorsSet('opsetA') - tp.OperatorsSet('opsetB') + schema.OperatorsSet('opsetA') + schema.OperatorsSet('opsetB') fw_tp = TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: @@ -146,10 +161,15 @@ def test_layer_attached_to_multiple_opsets(self): self.assertEqual('Found layer Conv2D in more than one OperatorsSet', str(e.exception)) def test_filter_layer_attached_to_multiple_opsets(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with hm: - tp.OperatorsSet('opsetA') - tp.OperatorsSet('opsetB') + schema.OperatorsSet('opsetA') + schema.OperatorsSet('opsetB') fw_tp = TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: @@ -159,23 +179,28 @@ def test_filter_layer_attached_to_multiple_opsets(self): self.assertEqual('Found layer Activation(activation=relu) in more than one OperatorsSet', str(e.exception)) def test_qco_by_keras_layer(self): - default_qco = tp.QuantizationConfigOptions([TEST_QC]) + default_qco = schema.QuantizationConfigOptions([TEST_QC]) default_qco = default_qco.clone_and_edit(attr_weights_configs_mapping={}) - tpm = tp.TargetPlatformModel(default_qco, name='test') + tpm = schema.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False, + name='test') with tpm: - mixed_precision_configuration_options = tp.QuantizationConfigOptions( + mixed_precision_configuration_options = schema.QuantizationConfigOptions( quantization_config_list=[TEST_QC, TEST_QC.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 4}}), TEST_QC.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 2}})], base_config=TEST_QC) - tp.OperatorsSet("conv", mixed_precision_configuration_options) + schema.OperatorsSet("conv", mixed_precision_configuration_options) sevenbit_qco = TEST_QCO.clone_and_edit(activation_n_bits=7, attr_weights_configs_mapping={}) - tp.OperatorsSet("tanh", sevenbit_qco) - tp.OperatorsSet("relu") + schema.OperatorsSet("tanh", sevenbit_qco) + schema.OperatorsSet("relu") - tpc_keras = tp.TargetPlatformCapabilities(tpm, name='fw_test') + tpc_keras = tp.TargetPlatformCapabilities(tpm) with tpc_keras: tp.OperationsSetToLayers("conv", [Conv2D], attr_mapping={KERNEL_ATTR: DefaultDict(default_value=KERAS_KERNEL), @@ -195,13 +220,18 @@ def test_qco_by_keras_layer(self): len(mixed_precision_configuration_options.quantization_config_list)) for i in range(len(conv_qco.quantization_config_list)): self.assertEqual(conv_qco.quantization_config_list[i].attr_weights_configs_mapping[KERAS_KERNEL], - mixed_precision_configuration_options.quantization_config_list[i].attr_weights_configs_mapping[KERNEL_ATTR]) + mixed_precision_configuration_options.quantization_config_list[ + i].attr_weights_configs_mapping[KERNEL_ATTR]) self.assertEqual(tanh_qco, sevenbit_qco) self.assertEqual(relu_qco, default_qco) def test_opset_not_in_tp(self): - default_qco = tp.QuantizationConfigOptions([TEST_QC]) - hm = tp.TargetPlatformModel(default_qco) + default_qco = schema.QuantizationConfigOptions([TEST_QC]) + hm = schema.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) hm_keras = tp.TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: with hm_keras: @@ -211,14 +241,18 @@ def test_opset_not_in_tp(self): str(e.exception)) def test_keras_fusing_patterns(self): - default_qco = tp.QuantizationConfigOptions([TEST_QC]) - hm = tp.TargetPlatformModel(default_qco) + default_qco = schema.QuantizationConfigOptions([TEST_QC]) + hm = schema.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with hm: - a = tp.OperatorsSet("opA") - b = tp.OperatorsSet("opB") - c = tp.OperatorsSet("opC") - tp.Fusing([a, b, c]) - tp.Fusing([a, c]) + a = schema.OperatorsSet("opA") + b = schema.OperatorsSet("opB") + c = schema.OperatorsSet("opC") + schema.Fusing([a, b, c]) + schema.Fusing([a, c]) hm_keras = tp.TargetPlatformCapabilities(hm) with hm_keras: @@ -240,10 +274,14 @@ def test_keras_fusing_patterns(self): self.assertEqual(p1[1], LayerFilterParams(ReLU, Greater("max_value", 7), negative_slope=0)) def test_get_default_op_qc(self): - default_qco = tp.QuantizationConfigOptions([TEST_QC]) - tpm = tp.TargetPlatformModel(default_qco) + default_qco = schema.QuantizationConfigOptions([TEST_QC]) + tpm = schema.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with tpm: - a = tp.OperatorsSet("opA") + a = schema.OperatorsSet("opA") tpc = tp.TargetPlatformCapabilities(tpm) with tpc: @@ -269,47 +307,49 @@ def rep_data(): quantized_model, _ = mct.ptq.keras_post_training_quantization(model, rep_data, target_platform_capabilities=tpc) - core_config = mct.core.CoreConfig(mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=2, - use_hessian_based_scores=False)) + core_config = mct.core.CoreConfig( + mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=2, + use_hessian_based_scores=False)) quantized_model, _ = mct.ptq.keras_post_training_quantization(model, rep_data, core_config=core_config, - target_resource_utilization=mct.core.ResourceUtilization(np.inf), + target_resource_utilization=mct.core.ResourceUtilization( + np.inf), target_platform_capabilities=tpc) def test_get_keras_supported_version(self): tpc = mct.get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL) # Latest - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL, 'v1_pot') - self.assertTrue(tpc.version == 'v1_pot') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL, 'v1_lut') - self.assertTrue(tpc.version == 'v1_lut') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL, 'v1') - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL, 'v2_lut') - self.assertTrue(tpc.version == 'v2_lut') + self.assertTrue(tpc.tp_model.tpc_minor_version == 2) tpc = mct.get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL, 'v2') - self.assertTrue(tpc.version == 'v2') + self.assertTrue(tpc.tp_model.tpc_minor_version == 2) tpc = mct.get_target_platform_capabilities(TENSORFLOW, IMX500_TP_MODEL, "v1") - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(TENSORFLOW, IMX500_TP_MODEL, "v2") - self.assertTrue(tpc.version == 'v2') + self.assertTrue(tpc.tp_model.tpc_minor_version == 2) tpc = mct.get_target_platform_capabilities(TENSORFLOW, IMX500_TP_MODEL, "v1_lut") - self.assertTrue(tpc.version == 'v1_lut') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(TENSORFLOW, IMX500_TP_MODEL, "v2_lut") - self.assertTrue(tpc.version == 'v2_lut') + self.assertTrue(tpc.tp_model.tpc_minor_version == 2) tpc = mct.get_target_platform_capabilities(TENSORFLOW, IMX500_TP_MODEL, "v1_pot") - self.assertTrue(tpc.version == 'v1_pot') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(TENSORFLOW, TFLITE_TP_MODEL, "v1") - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(TENSORFLOW, QNNPACK_TP_MODEL, "v1") - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) def test_get_keras_not_supported_platform(self): with self.assertRaises(Exception) as e: diff --git a/tests/pytorch_tests/function_tests/layer_fusing_test.py b/tests/pytorch_tests/function_tests/layer_fusing_test.py index ee24b3324..ccf131ddd 100644 --- a/tests/pytorch_tests/function_tests/layer_fusing_test.py +++ b/tests/pytorch_tests/function_tests/layer_fusing_test.py @@ -17,14 +17,17 @@ from torch.nn import Conv2d, ReLU, SiLU, Sigmoid, Linear, Hardtanh from torch.nn.functional import relu, relu6 +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.target_platform_capabilities.target_platform import LayerFilterParams from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO from model_compression_toolkit.core.pytorch.pytorch_implementation import PytorchImplementation -from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs +from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import \ + get_op_quantization_configs from tests.common_tests.helpers.prep_graph_for_func_test import prepare_graph_with_configs from tests.pytorch_tests.model_tests.base_pytorch_test import BasePytorchTest import model_compression_toolkit as mct + tp = mct.target_platform @@ -47,16 +50,22 @@ def get_type(self, fusion): def get_tpc(self): base_config, mixed_precision_cfg_list, default_config = get_op_quantization_configs() - default_configuration_options = tp.QuantizationConfigOptions([default_config]) - generated_tp = tp.TargetPlatformModel(default_configuration_options, name='layer_fusing_test') - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) + generated_tp = schema.TargetPlatformModel(default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + name='layer_fusing_test') + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) return generated_tp, mixed_precision_configuration_options def _compare(self, fused_nodes): - self.unit_test.assertTrue(len(fused_nodes) == len(self.expected_fusions), msg=f'Number of fusions is not as expected!') + self.unit_test.assertTrue(len(fused_nodes) == len(self.expected_fusions), + msg=f'Number of fusions is not as expected!') for i, fusion in enumerate(fused_nodes): - self.unit_test.assertTrue(self.get_type(fusion) == self.expected_fusions[i], msg=f'Miss-match fusion compared to expected!') + self.unit_test.assertTrue(self.get_type(fusion) == self.expected_fusions[i], + msg=f'Miss-match fusion compared to expected!') class LayerFusingTest1(BaseLayerFusingTest): @@ -67,12 +76,12 @@ def __init__(self, unit_test): def get_tpc(self): generated_tp, mixed_precision_configuration_options = super().get_tpc() with generated_tp: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - any_relu = tp.OperatorsSet("AnyReLU") + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + any_relu = schema.OperatorsSet("AnyReLU") # Define fusions - tp.Fusing([conv, any_relu]) + schema.Fusing([conv, any_relu]) - pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp, name='layer_fusing_test') + pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp) with pytorch_tpc: tp.OperationsSetToLayers("Conv", [nn.Conv2d]) tp.OperationsSetToLayers("AnyReLU", [torch.relu, @@ -109,15 +118,16 @@ def __init__(self, unit_test): def get_tpc(self): generated_tp, mixed_precision_configuration_options = super().get_tpc() with generated_tp: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - any_act = tp.OperatorsSet("AnyAct") + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + any_act = schema.OperatorsSet("AnyAct") # Define fusions - tp.Fusing([conv, any_act]) + schema.Fusing([conv, any_act]) - pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp, name='layer_fusing_test') + pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp) with pytorch_tpc: tp.OperationsSetToLayers("Conv", [Conv2d]) - tp.OperationsSetToLayers("AnyAct", [ReLU,relu6,relu,SiLU,Sigmoid, LayerFilterParams(Hardtanh, min_val=0)]) + tp.OperationsSetToLayers("AnyAct", + [ReLU, relu6, relu, SiLU, Sigmoid, LayerFilterParams(Hardtanh, min_val=0)]) return pytorch_tpc def run_test(self, seed=0): @@ -130,11 +140,11 @@ def run_test(self, seed=0): class LayerFusingNetTest(nn.Module): def __init__(self): super().__init__() - self.conv1 = nn.Conv2d(3, 32, kernel_size=(3,3)) - self.conv2 = nn.Conv2d(32, 32, kernel_size=(1,1)) - self.conv3 = nn.Conv2d(32, 32, kernel_size=(3,3)) - self.conv4 = nn.Conv2d(32, 64, kernel_size=(1,1)) - self.conv5 = nn.Conv2d(64, 64, kernel_size=(2,2)) + self.conv1 = nn.Conv2d(3, 32, kernel_size=(3, 3)) + self.conv2 = nn.Conv2d(32, 32, kernel_size=(1, 1)) + self.conv3 = nn.Conv2d(32, 32, kernel_size=(3, 3)) + self.conv4 = nn.Conv2d(32, 64, kernel_size=(1, 1)) + self.conv5 = nn.Conv2d(64, 64, kernel_size=(2, 2)) self.relu = nn.ReLU() self.tanh = Hardtanh(min_val=0) self.swish = nn.SiLU() @@ -161,15 +171,15 @@ def __init__(self, unit_test): def get_tpc(self): generated_tp, mixed_precision_configuration_options = super().get_tpc() with generated_tp: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - any_act = tp.OperatorsSet("AnyAct") + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + any_act = schema.OperatorsSet("AnyAct") # Define fusions - tp.Fusing([conv, any_act]) + schema.Fusing([conv, any_act]) - pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp, name='layer_fusing_test') + pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp) with pytorch_tpc: tp.OperationsSetToLayers("Conv", [Conv2d]) - tp.OperationsSetToLayers("AnyAct", [ReLU,relu6,relu]) + tp.OperationsSetToLayers("AnyAct", [ReLU, relu6, relu]) return pytorch_tpc def run_test(self, seed=0): @@ -182,11 +192,11 @@ def run_test(self, seed=0): class LayerFusingNetTest(nn.Module): def __init__(self): super().__init__() - self.conv1 = nn.Conv2d(3, 32, kernel_size=(3,3)) - self.conv2 = nn.Conv2d(32, 32, kernel_size=(1,1)) - self.conv3 = nn.Conv2d(32, 32, kernel_size=(3,3)) - self.conv4 = nn.Conv2d(32, 64, kernel_size=(1,1)) - self.conv5 = nn.Conv2d(64, 64, kernel_size=(2,2)) + self.conv1 = nn.Conv2d(3, 32, kernel_size=(3, 3)) + self.conv2 = nn.Conv2d(32, 32, kernel_size=(1, 1)) + self.conv3 = nn.Conv2d(32, 32, kernel_size=(3, 3)) + self.conv4 = nn.Conv2d(32, 64, kernel_size=(1, 1)) + self.conv5 = nn.Conv2d(64, 64, kernel_size=(2, 2)) self.relu = nn.ReLU() self.tanh = nn.Tanh() self.swish = nn.SiLU() @@ -208,24 +218,25 @@ def forward(self, x): class LayerFusingTest4(BaseLayerFusingTest): def __init__(self, unit_test): super().__init__(unit_test) - self.expected_fusions = [[Conv2d, SiLU, torch.add], [Conv2d, SiLU, torch.add], [Conv2d, ReLU], [Conv2d, ReLU, torch.add], [Linear, SiLU], [Linear, SiLU]] + self.expected_fusions = [[Conv2d, SiLU, torch.add], [Conv2d, SiLU, torch.add], [Conv2d, ReLU], + [Conv2d, ReLU, torch.add], [Linear, SiLU], [Linear, SiLU]] def get_tpc(self): generated_tp, mixed_precision_configuration_options = super().get_tpc() with generated_tp: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add") - swish = tp.OperatorsSet("Swish") - activations_to_fuse = tp.OperatorSetConcat(any_relu, swish) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add") + swish = schema.OperatorsSet("Swish") + activations_to_fuse = schema.OperatorSetConcat(any_relu, swish) # Define fusions - tp.Fusing([conv, activations_to_fuse]) - tp.Fusing([conv, add, activations_to_fuse]) - tp.Fusing([conv, activations_to_fuse, add]) - tp.Fusing([fc, activations_to_fuse]) + schema.Fusing([conv, activations_to_fuse]) + schema.Fusing([conv, add, activations_to_fuse]) + schema.Fusing([conv, activations_to_fuse, add]) + schema.Fusing([fc, activations_to_fuse]) - pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp, name='layer_fusing_test') + pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp) with pytorch_tpc: tp.OperationsSetToLayers("Conv", [Conv2d]) tp.OperationsSetToLayers("FullyConnected", [Linear]) @@ -244,12 +255,12 @@ def run_test(self, seed=0): class LayerFusingNetTest(nn.Module): def __init__(self): super().__init__() - self.conv1 = nn.Conv2d(3, 3, kernel_size=(3,3), padding='same') - self.conv2 = nn.Conv2d(3, 3, kernel_size=(1,1), padding='same') - self.conv3 = nn.Conv2d(3, 3, kernel_size=(3,3), padding='same') - self.conv4 = nn.Conv2d(3, 3, kernel_size=(1,1), padding='same') - self.conv5 = nn.Conv2d(3, 3, kernel_size=(3,3), padding='same') - self.conv6 = nn.Conv2d(3, 3, kernel_size=(1,1), padding='same') + self.conv1 = nn.Conv2d(3, 3, kernel_size=(3, 3), padding='same') + self.conv2 = nn.Conv2d(3, 3, kernel_size=(1, 1), padding='same') + self.conv3 = nn.Conv2d(3, 3, kernel_size=(3, 3), padding='same') + self.conv4 = nn.Conv2d(3, 3, kernel_size=(1, 1), padding='same') + self.conv5 = nn.Conv2d(3, 3, kernel_size=(3, 3), padding='same') + self.conv6 = nn.Conv2d(3, 3, kernel_size=(1, 1), padding='same') self.relu = nn.ReLU() self.swish = nn.SiLU() self.flatten = nn.Flatten() diff --git a/tests/pytorch_tests/function_tests/test_pytorch_tp_model.py b/tests/pytorch_tests/function_tests/test_pytorch_tp_model.py index 1ce79dd82..cb7c7647d 100644 --- a/tests/pytorch_tests/function_tests/test_pytorch_tp_model.py +++ b/tests/pytorch_tests/function_tests/test_pytorch_tp_model.py @@ -23,13 +23,16 @@ from torchvision.models import mobilenet_v2 import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.core import MixedPrecisionQuantizationConfig from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.constants import PYTORCH from model_compression_toolkit.core.common import BaseNode from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities -from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import LayerFilterParams -from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attribute_filter import Greater, Smaller, Eq +from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import \ + LayerFilterParams +from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attribute_filter import \ + Greater, Smaller, Eq from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL, IMX500_TP_MODEL, \ TFLITE_TP_MODEL, QNNPACK_TP_MODEL, KERNEL_ATTR, WEIGHTS_N_BITS, PYTORCH_KERNEL, BIAS_ATTR, BIAS from model_compression_toolkit.core.pytorch.pytorch_implementation import PytorchImplementation @@ -38,9 +41,8 @@ tp = mct.target_platform - TEST_QC = generate_test_op_qc(**generate_test_attr_configs()) -TEST_QCO = tp.QuantizationConfigOptions([TEST_QC]) +TEST_QCO = schema.QuantizationConfigOptions([TEST_QC]) class TestPytorchTPModel(unittest.TestCase): @@ -66,42 +68,50 @@ def test_pytorch_layers_with_params(self): hardtanh_with_params = LayerFilterParams(hardtanh, Greater("max_val", 2) & Smaller("min_val", 1)) self.assertTrue(get_node(partial(hardtanh, max_val=3, min_val=0)).is_match_filter_params(hardtanh_with_params)) self.assertFalse(get_node(partial(hardtanh, max_val=3, min_val=1)).is_match_filter_params(hardtanh_with_params)) - self.assertFalse(get_node(partial(hardtanh, max_val=2, min_val=0.5)).is_match_filter_params(hardtanh_with_params)) + self.assertFalse( + get_node(partial(hardtanh, max_val=2, min_val=0.5)).is_match_filter_params(hardtanh_with_params)) self.assertFalse(get_node(partial(hardtanh, max_val=2)).is_match_filter_params(hardtanh_with_params)) - self.assertFalse(get_node(partial(hardtanh, max_val=1, min_val=0.5)).is_match_filter_params(hardtanh_with_params)) - - l2norm_tflite_opset = LayerFilterParams(torch.nn.functional.normalize, Eq('p',2) | Eq('p',None)) - self.assertTrue(get_node(partial(torch.nn.functional.normalize, p=2)).is_match_filter_params(l2norm_tflite_opset)) - self.assertTrue(get_node(partial(torch.nn.functional.normalize, p=2.0)).is_match_filter_params(l2norm_tflite_opset)) + self.assertFalse( + get_node(partial(hardtanh, max_val=1, min_val=0.5)).is_match_filter_params(hardtanh_with_params)) + + l2norm_tflite_opset = LayerFilterParams(torch.nn.functional.normalize, Eq('p', 2) | Eq('p', None)) + self.assertTrue( + get_node(partial(torch.nn.functional.normalize, p=2)).is_match_filter_params(l2norm_tflite_opset)) + self.assertTrue( + get_node(partial(torch.nn.functional.normalize, p=2.0)).is_match_filter_params(l2norm_tflite_opset)) self.assertTrue(get_node(torch.nn.functional.normalize).is_match_filter_params(l2norm_tflite_opset)) - self.assertFalse(get_node(partial(torch.nn.functional.normalize, p=3.0)).is_match_filter_params(l2norm_tflite_opset)) - - + self.assertFalse( + get_node(partial(torch.nn.functional.normalize, p=3.0)).is_match_filter_params(l2norm_tflite_opset)) def test_qco_by_pytorch_layer(self): - default_qco = tp.QuantizationConfigOptions([TEST_QC]) + default_qco = schema.QuantizationConfigOptions([TEST_QC]) default_qco = default_qco.clone_and_edit(attr_weights_configs_mapping={}) - tpm = tp.TargetPlatformModel(default_qco, name='test') + tpm = schema.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False, + name='test') with tpm: - mixed_precision_configuration_options = tp.QuantizationConfigOptions( + mixed_precision_configuration_options = schema.QuantizationConfigOptions( [TEST_QC, TEST_QC.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 4}}), TEST_QC.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 2}})], base_config=TEST_QC) - tp.OperatorsSet("conv", mixed_precision_configuration_options) + schema.OperatorsSet("conv", mixed_precision_configuration_options) sevenbit_qco = TEST_QCO.clone_and_edit(activation_n_bits=7, attr_weights_configs_mapping={}) - tp.OperatorsSet("tanh", sevenbit_qco) + schema.OperatorsSet("tanh", sevenbit_qco) sixbit_qco = TEST_QCO.clone_and_edit(activation_n_bits=6, attr_weights_configs_mapping={}) - tp.OperatorsSet("avg_pool2d_kernel_2", sixbit_qco) + schema.OperatorsSet("avg_pool2d_kernel_2", sixbit_qco) - tp.OperatorsSet("avg_pool2d") + schema.OperatorsSet("avg_pool2d") - tpc_pytorch = tp.TargetPlatformCapabilities(tpm, name='fw_test') + tpc_pytorch = tp.TargetPlatformCapabilities(tpm) with tpc_pytorch: tp.OperationsSetToLayers("conv", [torch.nn.Conv2d], attr_mapping={KERNEL_ATTR: DefaultDict(default_value=PYTORCH_KERNEL), @@ -133,9 +143,14 @@ def test_qco_by_pytorch_layer(self): self.assertEqual(avg_pool2d_qco, default_qco) def test_get_layers_by_op(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with hm: - op_obj = tp.OperatorsSet('opsetA') + op_obj = schema.OperatorsSet('opsetA') fw_tp = TargetPlatformCapabilities(hm) with fw_tp: opset_layers = [torch.nn.Conv2d, LayerFilterParams(torch.nn.Softmax, dim=1)] @@ -144,11 +159,16 @@ def test_get_layers_by_op(self): self.assertEqual(fw_tp.get_layers_by_opset(op_obj), opset_layers) def test_get_layers_by_opconcat(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with hm: - op_obj_a = tp.OperatorsSet('opsetA') - op_obj_b = tp.OperatorsSet('opsetB') - op_concat = tp.OperatorSetConcat(op_obj_a, op_obj_b) + op_obj_a = schema.OperatorsSet('opsetA') + op_obj_b = schema.OperatorsSet('opsetB') + op_concat = schema.OperatorSetConcat(op_obj_a, op_obj_b) fw_tp = TargetPlatformCapabilities(hm) with fw_tp: @@ -161,10 +181,15 @@ def test_get_layers_by_opconcat(self): self.assertEqual(fw_tp.get_layers_by_opset(op_concat), opset_layers_a + opset_layers_b) def test_layer_attached_to_multiple_opsets(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with hm: - tp.OperatorsSet('opsetA') - tp.OperatorsSet('opsetB') + schema.OperatorsSet('opsetA') + schema.OperatorsSet('opsetB') fw_tp = TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: @@ -174,10 +199,15 @@ def test_layer_attached_to_multiple_opsets(self): self.assertEqual('Found layer Conv2d in more than one OperatorsSet', str(e.exception)) def test_filter_layer_attached_to_multiple_opsets(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with hm: - tp.OperatorsSet('opsetA') - tp.OperatorsSet('opsetB') + schema.OperatorsSet('opsetA') + schema.OperatorsSet('opsetB') fw_tp = TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: @@ -187,8 +217,12 @@ def test_filter_layer_attached_to_multiple_opsets(self): self.assertEqual('Found layer Softmax(dim=2) in more than one OperatorsSet', str(e.exception)) def test_opset_not_in_tp(self): - default_qco = tp.QuantizationConfigOptions([TEST_QC]) - hm = tp.TargetPlatformModel(default_qco) + default_qco = schema.QuantizationConfigOptions([TEST_QC]) + hm = schema.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) hm_pytorch = tp.TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: with hm_pytorch: @@ -198,14 +232,19 @@ def test_opset_not_in_tp(self): str(e.exception)) def test_pytorch_fusing_patterns(self): - default_qco = tp.QuantizationConfigOptions([TEST_QC]) - hm = tp.TargetPlatformModel(default_qco) + default_qco = schema.QuantizationConfigOptions( + [TEST_QC]) + hm = schema.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with hm: - a = tp.OperatorsSet("opA") - b = tp.OperatorsSet("opB") - c = tp.OperatorsSet("opC") - tp.Fusing([a, b, c]) - tp.Fusing([a, c]) + a = schema.OperatorsSet("opA") + b = schema.OperatorsSet("opB") + c = schema.OperatorsSet("opC") + schema.Fusing([a, b, c]) + schema.Fusing([a, c]) hm_keras = tp.TargetPlatformCapabilities(hm) with hm_keras: @@ -246,29 +285,30 @@ def rep_data(): mixed_precision_config=mp_qc) quantized_model, _ = mct.ptq.pytorch_post_training_quantization(model, rep_data, - target_resource_utilization=mct.core.ResourceUtilization(np.inf), + target_resource_utilization=mct.core.ResourceUtilization( + np.inf), target_platform_capabilities=tpc, core_config=core_config) def test_get_pytorch_supported_version(self): tpc = mct.get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL) # Latest - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL, 'v1') - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL, 'v2') - self.assertTrue(tpc.version == 'v2') + self.assertTrue(tpc.tp_model.tpc_minor_version == 2) tpc = mct.get_target_platform_capabilities(PYTORCH, IMX500_TP_MODEL, "v1") - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(PYTORCH, IMX500_TP_MODEL, "v2") - self.assertTrue(tpc.version == 'v2') + self.assertTrue(tpc.tp_model.tpc_minor_version == 2) tpc = mct.get_target_platform_capabilities(PYTORCH, TFLITE_TP_MODEL, "v1") - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(PYTORCH, QNNPACK_TP_MODEL, "v1") - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) def test_get_pytorch_not_supported_platform(self): with self.assertRaises(Exception) as e: diff --git a/tests/pytorch_tests/model_tests/feature_models/bn_attributes_quantization_test.py b/tests/pytorch_tests/model_tests/feature_models/bn_attributes_quantization_test.py index ebc308cfe..e51ead220 100644 --- a/tests/pytorch_tests/model_tests/feature_models/bn_attributes_quantization_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/bn_attributes_quantization_test.py @@ -16,13 +16,15 @@ from torch import nn import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from mct_quantizers import QuantizationMethod, PytorchQuantizationWrapper from model_compression_toolkit import DefaultDict from model_compression_toolkit.core.pytorch.constants import GAMMA, BETA -from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS, BIAS_ATTR +from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS, \ + BIAS_ATTR from tests.common_tests.helpers.generate_test_tp_model import generate_test_attr_configs, \ DEFAULT_WEIGHT_ATTR_CONFIG, KERNEL_BASE_CONFIG, generate_test_op_qc, BIAS_CONFIG -from model_compression_toolkit.target_platform_capabilities.target_platform import Signedness +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import Signedness from tests.pytorch_tests.model_tests.base_pytorch_test import BasePytorchTest from tests.pytorch_tests.utils import get_layers_from_model_by_type @@ -50,46 +52,50 @@ def _generate_bn_quantized_tpm(quantize_linear): bias_config=attr_cfgs_dict[BIAS_CONFIG], enable_activation_quantization=False) - bn_op_qc = tp.OpQuantizationConfig(enable_activation_quantization=False, - default_weight_attr_config=default_attr_cfg, - attr_weights_configs_mapping={BETA: bn_attr_cfg, GAMMA: bn_attr_cfg}, - activation_n_bits=8, - supported_input_activation_n_bits=8, - activation_quantization_method=QuantizationMethod.POWER_OF_TWO, - quantization_preserving=False, - fixed_scale=None, - fixed_zero_point=None, - simd_size=32, - signedness=Signedness.AUTO) - - default_op_qc = tp.OpQuantizationConfig(enable_activation_quantization=False, - default_weight_attr_config=default_attr_cfg, - attr_weights_configs_mapping={}, - activation_n_bits=8, - supported_input_activation_n_bits=8, - activation_quantization_method=QuantizationMethod.POWER_OF_TWO, - quantization_preserving=False, - fixed_scale=None, - fixed_zero_point=None, - simd_size=32, - signedness=Signedness.AUTO) - - default_configuration_options = tp.QuantizationConfigOptions([default_op_qc]) - linear_configuration_options = tp.QuantizationConfigOptions([linear_op_qc]) - bn_configuration_options = tp.QuantizationConfigOptions([bn_op_qc]) - - generated_tpm = tp.TargetPlatformModel(default_configuration_options, name='bn_quantized_tpm') + bn_op_qc = schema.OpQuantizationConfig(enable_activation_quantization=False, + default_weight_attr_config=default_attr_cfg, + attr_weights_configs_mapping={BETA: bn_attr_cfg, GAMMA: bn_attr_cfg}, + activation_n_bits=8, + supported_input_activation_n_bits=8, + activation_quantization_method=QuantizationMethod.POWER_OF_TWO, + quantization_preserving=False, + fixed_scale=None, + fixed_zero_point=None, + simd_size=32, + signedness=Signedness.AUTO) + + default_op_qc = schema.OpQuantizationConfig(enable_activation_quantization=False, + default_weight_attr_config=default_attr_cfg, + attr_weights_configs_mapping={}, + activation_n_bits=8, + supported_input_activation_n_bits=8, + activation_quantization_method=QuantizationMethod.POWER_OF_TWO, + quantization_preserving=False, + fixed_scale=None, + fixed_zero_point=None, + simd_size=32, + signedness=Signedness.AUTO) + + default_configuration_options = schema.QuantizationConfigOptions([default_op_qc]) + linear_configuration_options = schema.QuantizationConfigOptions([linear_op_qc]) + bn_configuration_options = schema.QuantizationConfigOptions([bn_op_qc]) + + generated_tpm = schema.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False, name='bn_quantized_tpm') with generated_tpm: - - tp.OperatorsSet("Conv", linear_configuration_options) - tp.OperatorsSet("BN", bn_configuration_options) + schema.OperatorsSet("Conv", linear_configuration_options) + schema.OperatorsSet("BN", bn_configuration_options) return generated_tpm def _generate_bn_quantized_tpc(tp_model): - tpc = tp.TargetPlatformCapabilities(tp_model, name='bn_quantized_tpc') + tpc = tp.TargetPlatformCapabilities(tp_model) with tpc: tp.OperationsSetToLayers("Conv", [nn.Conv2d], diff --git a/tests/pytorch_tests/model_tests/feature_models/const_quantization_test.py b/tests/pytorch_tests/model_tests/feature_models/const_quantization_test.py index 099b74f7c..fea672a49 100644 --- a/tests/pytorch_tests/model_tests/feature_models/const_quantization_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/const_quantization_test.py @@ -17,7 +17,8 @@ import torch.nn as nn import numpy as np import model_compression_toolkit as mct -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import Signedness +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import Signedness from model_compression_toolkit.core import MixedPrecisionQuantizationConfig from model_compression_toolkit.core.pytorch.utils import to_torch_tensor, torch_tensor_to_numpy, set_model from tests.pytorch_tests.model_tests.base_pytorch_feature_test import BasePytorchFeatureNetworkTest @@ -60,7 +61,7 @@ def __init__(self, unit_test, func, const, input_reverse_order=False): self.input_reverse_order = input_reverse_order def generate_inputs(self): - return [np.random.random(in_shape)+1 for in_shape in self.get_input_shapes()] + return [np.random.random(in_shape) + 1 for in_shape in self.get_input_shapes()] def get_tpc(self): return mct.get_target_platform_capabilities(PYTORCH, IMX500_TP_MODEL, "v3") @@ -117,7 +118,7 @@ def get_resource_utilization(self): return mct.core.ResourceUtilization(9e3) def generate_inputs(self): - return [np.random.random(in_shape)+1 for in_shape in self.get_input_shapes()] + return [np.random.random(in_shape) + 1 for in_shape in self.get_input_shapes()] def get_tpc(self): return mct.get_target_platform_capabilities(PYTORCH, IMX500_TP_MODEL, "v3") @@ -154,7 +155,7 @@ def __init__(self): self.register_buffer('concatenate_const_3', to_torch_tensor(np.random.randint(-128, 127, size=(1, 3, 36, 36)))) self.register_buffer('stack_const_1', to_torch_tensor(np.random.randint(-128, 127, size=(1, 39, 36, 36)))) self.register_buffer('stack_const_2', to_torch_tensor(np.random.randint(-128, 127, size=(1, 39, 36, 36)))) - self.register_buffer('gather_const', to_torch_tensor(np.random.randint(-128, 127, size=(1, 2*36*36)))) + self.register_buffer('gather_const', to_torch_tensor(np.random.randint(-128, 127, size=(1, 2 * 36 * 36)))) def forward(self, x): x = torch.cat([self.cat_const_1, x, self.cat_const_2], dim=2) @@ -163,9 +164,9 @@ def forward(self, x): self.concatenate_const_2, x, self.concatenate_const_3, self.concatenate_const_1], dim=1) x = torch.stack([self.stack_const_1, x, self.stack_const_2], dim=1) - x = torch.reshape(x, (1, 3*39, 36, 36)) + x = torch.reshape(x, (1, 3 * 39, 36, 36)) - inds = torch.argmax(torch.reshape(x, (-1, 117, 36*36)), dim=2) + inds = torch.argmax(torch.reshape(x, (-1, 117, 36 * 36)), dim=2) b = torch.reshape(torch.gather(self.gather_const, 1, inds), (-1, 117, 1, 1)) return x + b @@ -203,9 +204,11 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= class ExpandConstQuantizationNet(nn.Module): def __init__(self, batch_size): super().__init__() - self.register_buffer('cat_const', to_torch_tensor(np.random.randint(-128, 127, size=(batch_size, 3, 32, 32)).astype(np.float32))) + self.register_buffer('cat_const', to_torch_tensor( + np.random.randint(-128, 127, size=(batch_size, 3, 32, 32)).astype(np.float32))) self.register_parameter('expand_const', - nn.Parameter(to_torch_tensor(np.random.randint(-128, 127, size=(1, 2, 32, 1)).astype(np.float32)), + nn.Parameter(to_torch_tensor( + np.random.randint(-128, 127, size=(1, 2, 32, 1)).astype(np.float32)), requires_grad=False)) def forward(self, x): @@ -225,29 +228,35 @@ def generate_inputs(self): def get_tpc(self): tp = mct.target_platform attr_cfg = generate_test_attr_configs() - base_cfg = tp.OpQuantizationConfig(activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, - enable_activation_quantization=True, - activation_n_bits=32, - supported_input_activation_n_bits=32, - default_weight_attr_config=attr_cfg[DEFAULT_WEIGHT_ATTR_CONFIG], - attr_weights_configs_mapping={}, - quantization_preserving=False, - fixed_scale=1.0, - fixed_zero_point=0, - simd_size=32, - signedness=Signedness.AUTO) - - default_configuration_options = tp.QuantizationConfigOptions([base_cfg]) + base_cfg = schema.OpQuantizationConfig(activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, + enable_activation_quantization=True, + activation_n_bits=32, + supported_input_activation_n_bits=32, + default_weight_attr_config=attr_cfg[DEFAULT_WEIGHT_ATTR_CONFIG], + attr_weights_configs_mapping={}, + quantization_preserving=False, + fixed_scale=1.0, + fixed_zero_point=0, + simd_size=32, + signedness=Signedness.AUTO) + + default_configuration_options = schema.QuantizationConfigOptions([base_cfg]) const_config = base_cfg.clone_and_edit(enable_activation_quantization=False, default_weight_attr_config=base_cfg.default_weight_attr_config.clone_and_edit( - enable_weights_quantization=True, weights_per_channel_threshold=False, + enable_weights_quantization=True, + weights_per_channel_threshold=False, weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO)) - const_configuration_options = tp.QuantizationConfigOptions([const_config]) - - tp_model = tp.TargetPlatformModel(default_configuration_options) + const_configuration_options = schema.QuantizationConfigOptions([const_config]) + + tp_model = schema.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with tp_model: - tp.OperatorsSet("WeightQuant", const_configuration_options) + schema.OperatorsSet("WeightQuant", const_configuration_options) tpc = tp.TargetPlatformCapabilities(tp_model) with tpc: diff --git a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py index 426e71f83..881bfda44 100644 --- a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py @@ -22,8 +22,9 @@ from model_compression_toolkit.core.common.user_info import UserInformation from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \ BIAS -from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationConfigOptions, \ - TargetPlatformModel, OperatorsSet, TargetPlatformCapabilities, OperationsSetToLayers +from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities, OperationsSetToLayers +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, OperatorsSet, \ + QuantizationConfigOptions from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs from tests.common_tests.helpers.generate_test_tp_model import generate_tp_model_with_activation_mp from tests.pytorch_tests.model_tests.base_pytorch_test import BasePytorchTest @@ -301,13 +302,17 @@ def get_tpc(self): ) tp_model = TargetPlatformModel(QuantizationConfigOptions([cfg], cfg), + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False, name="mp_activation_conf_weights_test") with tp_model: OperatorsSet("Activations", act_mixed_cfg) OperatorsSet("Weights", weight_mixed_cfg) - torch_tpc = TargetPlatformCapabilities(tp_model, name="mp_activation_conf_weights_test") + torch_tpc = TargetPlatformCapabilities(tp_model) with torch_tpc: OperationsSetToLayers( diff --git a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py index f88b9270f..38a112550 100644 --- a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py @@ -16,15 +16,19 @@ import numpy as np from torch.nn import Conv2d +import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.core import ResourceUtilization from model_compression_toolkit.core.common.mixed_precision.distance_weighting import MpDistanceWeighting from model_compression_toolkit.core.common.user_info import UserInformation from model_compression_toolkit.core.pytorch.constants import BIAS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR -from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationConfigOptions, \ - TargetPlatformModel, OperatorsSet, TargetPlatformCapabilities, OperationsSetToLayers -from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_tp_model, get_op_quantization_configs +from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities, \ + OperationsSetToLayers +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, OperatorsSet, \ + QuantizationConfigOptions +from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_tp_model, \ + get_op_quantization_configs from tests.common_tests.helpers.generate_test_tp_model import generate_mixed_precision_test_tp_model from tests.pytorch_tests.tpc_pytorch import get_pytorch_test_tpc_dict from tests.pytorch_tests.model_tests.base_pytorch_test import BasePytorchTest @@ -137,22 +141,27 @@ def get_tpc(self): two_bit_cfg = mixed_precision_cfg_list[2] - weight_mixed_cfg = tp.QuantizationConfigOptions( + weight_mixed_cfg = schema.QuantizationConfigOptions( mixed_precision_cfg_list, base_config=cfg, ) - weight_fixed_cfg = tp.QuantizationConfigOptions( + weight_fixed_cfg = schema.QuantizationConfigOptions( [two_bit_cfg], base_config=two_bit_cfg, ) - tp_model = tp.TargetPlatformModel(weight_fixed_cfg, name="mp_part_weights_layers_test") + tp_model = schema.TargetPlatformModel( + weight_fixed_cfg, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + name="mp_part_weights_layers_test") with tp_model: - tp.OperatorsSet("Weights_mp", weight_mixed_cfg) - tp.OperatorsSet("Weights_fixed", weight_fixed_cfg) + schema.OperatorsSet("Weights_mp", weight_mixed_cfg) + schema.OperatorsSet("Weights_fixed", weight_fixed_cfg) - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, name="mp_part_weights_layers_test") + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) with pytorch_tpc: tp.OperationsSetToLayers( @@ -203,6 +212,7 @@ def compare(self, quantized_models, float_model, input_x=None, quantization_info self.unit_test.assertTrue( np.unique(q_weights[i, :]).flatten().shape[0] <= 4) + class MixedPrecisionSearch2Bit(MixedPrecisionBaseTest): def __init__(self, unit_test): super().__init__(unit_test) @@ -309,13 +319,16 @@ def get_tpc(self): ) tp_model = TargetPlatformModel(QuantizationConfigOptions([cfg], cfg), + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, name="mp_weights_conf_act_test") with tp_model: OperatorsSet("Activations", act_mixed_cfg) OperatorsSet("Weights", weight_mixed_cfg) - torch_tpc = TargetPlatformCapabilities(tp_model, name="mp_weights_conf_act_test") + torch_tpc = TargetPlatformCapabilities(tp_model) with torch_tpc: OperationsSetToLayers( @@ -353,4 +366,4 @@ def forward(self, inp): x = self.conv1(inp) x = torch.add(x, x) output = self.relu(x) - return output \ No newline at end of file + return output