diff --git a/model_compression_toolkit/qat/keras/quantizer/README.md b/model_compression_toolkit/qat/keras/quantizer/README.md index afa5ffcd6..7f0252cb6 100644 --- a/model_compression_toolkit/qat/keras/quantizer/README.md +++ b/model_compression_toolkit/qat/keras/quantizer/README.md @@ -5,7 +5,7 @@ Several training methods may be applied by the user to train the QAT ready model created by `keras_quantization_aware_training_init` method in [`keras/quantization_facade`](../quantization_facade.py). Each `TrainingMethod` (an enum defined in the [`qat_config`](../../common/qat_config.py)) -and [`QuantizationMethod`](../../../target_platform_capabilities/target_platform/op_quantization_config.py) +and `QuantizationMethod` selects a quantizer for weights and a quantizer for activations. Currently, only the STE (straight through estimator) training method is implemented by the MCT. diff --git a/model_compression_toolkit/qat/pytorch/quantizer/README.md b/model_compression_toolkit/qat/pytorch/quantizer/README.md index 9a0d911d0..169e335c2 100644 --- a/model_compression_toolkit/qat/pytorch/quantizer/README.md +++ b/model_compression_toolkit/qat/pytorch/quantizer/README.md @@ -5,7 +5,7 @@ Several training methods may be applied by the user to train the QAT ready model created by `pytorch_quantization_aware_training_init` method in [`pytorch/quantization_facade`](../quantization_facade.py). Each [`TrainingMethod`](../../../trainable_infrastructure/common/training_method.py) -and [`QuantizationMethod`](../../../target_platform_capabilities/target_platform/op_quantization_config.py) +and `QuantizationMethod` selects a quantizer for weights and a quantizer for activations. ## Make your own training method diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py b/model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py deleted file mode 100644 index fa9315adc..000000000 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py b/model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py deleted file mode 100644 index fa9315adc..000000000 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/operators.py b/model_compression_toolkit/target_platform_capabilities/target_platform/operators.py deleted file mode 100644 index fa9315adc..000000000 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/operators.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py b/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py index 06ca789aa..55c25ffd7 100644 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py +++ b/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py @@ -33,14 +33,18 @@ class TargetPlatformCapabilities(ImmutableClass): """ Attach framework information to a modeled hardware. """ - def __init__(self, tp_model: TargetPlatformModel): + def __init__(self, + tp_model: TargetPlatformModel, + name: str = "base"): """ Args: tp_model (TargetPlatformModel): Modeled hardware to attach framework information to. + name (str): Name of the TargetPlatformCapabilities. """ super().__init__() + self.name = name assert isinstance(tp_model, TargetPlatformModel), f'Target platform model that was passed to TargetPlatformCapabilities must be of type TargetPlatformModel, but has type of {type(tp_model)}' self.tp_model = tp_model self.op_sets_to_layers = OperationsToLayers() # Init an empty OperationsToLayers @@ -107,7 +111,9 @@ def get_info(self) -> Dict[str, Any]: """ return {"Target Platform Capabilities": self.name, - "Version": self.version, + "Minor version": self.tp_model.tpc_minor_version, + "Patch version": self.tp_model.tpc_patch_version, + "Platform type": self.tp_model.tpc_platform_type, "Target Platform Model": self.tp_model.get_info(), "Operations to layers": {op2layer.name:[l.__name__ for l in op2layer.layers] for op2layer in self.op_sets_to_layers.op_sets_to_layers}} diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py index 4e79838ec..004ce0f28 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py @@ -15,7 +15,7 @@ from typing import List, Tuple import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ IMX500_TP_MODEL @@ -63,7 +63,8 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, weights_n_bits=8, weights_per_channel_threshold=False, - enable_weights_quantization=False, # TODO: this will changed to True once implementing multi-attributes quantization + enable_weights_quantization=False, + # TODO: this will changed to True once implementing multi-attributes quantization lut_values_bitwidth=None) # define a quantization config to quantize the kernel (for layers where there is a kernel attribute). @@ -88,7 +89,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -102,7 +103,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + linear_eight_bits = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -152,12 +153,12 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpc = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + generated_tpc = schema.TargetPlatformModel( default_configuration_options, tpc_minor_version=1, tpc_patch_version=0, @@ -178,42 +179,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") - add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") - sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sub") - mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Mul") - div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Div") - prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("PReLU") - swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") - sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") - tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add") + sub = schema.OperatorsSet("Sub") + mul = schema.OperatorsSet("Mul") + div = schema.OperatorsSet("Div") + prelu = schema.OperatorsSet("PReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpc diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py index 02f0155d6..202d9802c 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py @@ -15,7 +15,7 @@ import tensorflow as tf from packaging import version -import model_compression_toolkit.target_platform_capabilities.schema.v1 +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \ @@ -49,7 +49,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py index ca6a3bf22..a5b5af2c9 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py @@ -23,13 +23,12 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu -import model_compression_toolkit.target_platform_capabilities.schema.v1 +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \ BIAS from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tp_model import get_tp_model import model_compression_toolkit as mct -from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1 import __version__ as TPC_VERSION tp = mct.target_platform @@ -43,7 +42,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py index b4d03390f..f455e4168 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py @@ -15,7 +15,7 @@ from typing import List, Tuple import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ WEIGHTS_QUANTIZATION_METHOD, IMX500_TP_MODEL @@ -83,7 +83,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -97,7 +97,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + linear_eight_bits = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -150,12 +150,12 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpc = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + generated_tpc = schema.TargetPlatformModel( default_configuration_options, tpc_minor_version=1, tpc_patch_version=0, @@ -174,42 +174,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") - add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") - sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sub") - mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Mul") - div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Div") - prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("PReLU") - swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") - sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") - tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add") + sub = schema.OperatorsSet("Sub") + mul = schema.OperatorsSet("Mul") + div = schema.OperatorsSet("Div") + prelu = schema.OperatorsSet("PReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpc diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_keras.py index b0c278670..81a902047 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_keras.py @@ -15,7 +15,7 @@ import tensorflow as tf from packaging import version -import model_compression_toolkit.target_platform_capabilities.schema.v1 +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, \ @@ -49,7 +49,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_pytorch.py index 7545331fe..e69e76082 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_pytorch.py @@ -23,7 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -43,7 +43,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py index 083d10fc7..58aed4ded 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py @@ -15,7 +15,7 @@ from typing import List, Tuple import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ IMX500_TP_MODEL @@ -83,7 +83,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -97,7 +97,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + linear_eight_bits = schema.OpQuantizationConfig( activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, @@ -146,12 +146,12 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpc = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + generated_tpc = schema.TargetPlatformModel( default_configuration_options, tpc_minor_version=1, tpc_patch_version=0, @@ -170,42 +170,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") - add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") - sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sub") - mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Mul") - div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Div") - prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("PReLU") - swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") - sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") - tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add") + sub = schema.OperatorsSet("Sub") + mul = schema.OperatorsSet("Mul") + div = schema.OperatorsSet("Div") + prelu = schema.OperatorsSet("PReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpc diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py index 3173878f0..4e291a0af 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py @@ -15,7 +15,7 @@ import tensorflow as tf from packaging import version -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \ @@ -49,7 +49,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_pot_tpc_keras_tpc', tp_model=imx500_pot_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py index dde0ff2c3..ac98a618a 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py @@ -23,7 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -44,7 +44,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_pot_tpc_pytorch_tpc', tp_model=imx500_pot_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py index 9922dcd3e..0003fc9f0 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py @@ -15,7 +15,7 @@ from typing import List, Tuple import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ IMX500_TP_MODEL @@ -65,7 +65,8 @@ def get_op_quantization_configs() -> \ weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, weights_n_bits=8, weights_per_channel_threshold=False, - enable_weights_quantization=False, # TODO: this will changed to True once implementing multi-attributes quantization + enable_weights_quantization=False, + # TODO: this will changed to True once implementing multi-attributes quantization lut_values_bitwidth=None) # define a quantization config to quantize the kernel (for layers where there is a kernel attribute). @@ -90,7 +91,7 @@ def get_op_quantization_configs() -> \ # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -104,7 +105,7 @@ def get_op_quantization_configs() -> \ signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + linear_eight_bits = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -154,12 +155,12 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + generated_tpm = schema.TargetPlatformModel( default_configuration_options, tpc_minor_version=2, tpc_patch_version=0, @@ -180,42 +181,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") - add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") - sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sub") - mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Mul") - div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Div") - prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("PReLU") - swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") - sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") - tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add") + sub = schema.OperatorsSet("Sub") + mul = schema.OperatorsSet("Mul") + div = schema.OperatorsSet("Div") + prelu = schema.OperatorsSet("PReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpm diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py index dd79a0a95..cfb4bef23 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py @@ -15,7 +15,7 @@ import tensorflow as tf from packaging import version -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \ @@ -50,7 +50,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_pytorch.py index 3877011db..af3b35f23 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_pytorch.py @@ -23,7 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \ BIAS @@ -44,7 +44,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py index a0f5886a0..167a6b9c2 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py @@ -15,7 +15,7 @@ from typing import List, Tuple import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ WEIGHTS_QUANTIZATION_METHOD, IMX500_TP_MODEL @@ -85,7 +85,7 @@ def get_op_quantization_configs() -> \ # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -99,7 +99,7 @@ def get_op_quantization_configs() -> \ signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + linear_eight_bits = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -152,12 +152,12 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + generated_tpm = schema.TargetPlatformModel( default_configuration_options, tpc_minor_version=2, tpc_patch_version=0, @@ -176,42 +176,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") - add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") - sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sub") - mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Mul") - div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Div") - prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("PReLU") - swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") - sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") - tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add") + sub = schema.OperatorsSet("Sub") + mul = schema.OperatorsSet("Mul") + div = schema.OperatorsSet("Div") + prelu = schema.OperatorsSet("PReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpm diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py index ce5abf498..375dde0b8 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py @@ -15,7 +15,7 @@ import tensorflow as tf from packaging import version -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, \ @@ -49,7 +49,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_pytorch.py index 43a52600b..faabbdfe3 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_pytorch.py @@ -23,7 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -43,7 +43,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py index a5bb5c090..2c997e37f 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py @@ -15,7 +15,7 @@ from typing import List, Tuple import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ IMX500_TP_MODEL @@ -65,7 +65,8 @@ def get_op_quantization_configs() -> \ weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, weights_n_bits=8, weights_per_channel_threshold=False, - enable_weights_quantization=False, # TODO: this will changed to True once implementing multi-attributes quantization + enable_weights_quantization=False, + # TODO: this will changed to True once implementing multi-attributes quantization lut_values_bitwidth=None) # define a quantization config to quantize the kernel (for layers where there is a kernel attribute). @@ -90,7 +91,7 @@ def get_op_quantization_configs() -> \ # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -104,7 +105,7 @@ def get_op_quantization_configs() -> \ signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + linear_eight_bits = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -154,7 +155,7 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a QuantizationConfigOptions for quantizing constants in functional ops. # Constant configuration is similar to the default eight bit configuration except for PoT @@ -165,7 +166,7 @@ def generate_tp_model(default_config: OpQuantizationConfig, default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit( enable_weights_quantization=True, weights_per_channel_threshold=True, weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO)) - const_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_config]) + const_configuration_options = schema.QuantizationConfigOptions([const_config]) # 16 bits inputs and outputs. Currently, only defined for consts since they are used in operators that # support 16 bit as input and output. @@ -173,14 +174,14 @@ def generate_tp_model(default_config: OpQuantizationConfig, supported_input_activation_n_bits=(8, 16)) const_config_input16_output16 = const_config_input16.clone_and_edit( activation_n_bits=16, signedness=Signedness.SIGNED) - const_configuration_options_inout16 = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_config_input16_output16, - const_config_input16], - base_config=const_config_input16) + const_configuration_options_inout16 = schema.QuantizationConfigOptions([const_config_input16_output16, + const_config_input16], + base_config=const_config_input16) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + generated_tpm = schema.TargetPlatformModel( default_configuration_options, tpc_minor_version=3, tpc_patch_version=0, @@ -201,44 +202,44 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False, - supported_input_activation_n_bits=(8, 16)) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Default16BitInout", const_configuration_options_inout16) + schema.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False, + supported_input_activation_n_bits=(8, 16)) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet("Default16BitInout", const_configuration_options_inout16) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") - add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add", const_configuration_options_inout16) - sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sub", const_configuration_options_inout16) - mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Mul", const_configuration_options_inout16) - div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Div", const_configuration_options) - prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("PReLU") - swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") - sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") - tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add", const_configuration_options_inout16) + sub = schema.OperatorsSet("Sub", const_configuration_options_inout16) + mul = schema.OperatorsSet("Mul", const_configuration_options_inout16) + div = schema.OperatorsSet("Div", const_configuration_options) + prelu = schema.OperatorsSet("PReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpm diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_keras.py index 5ac0bae89..52248b18a 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_keras.py @@ -15,7 +15,7 @@ import tensorflow as tf from packaging import version -import model_compression_toolkit.target_platform_capabilities.schema.v1 +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \ @@ -50,7 +50,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_pytorch.py index 65ae29a48..39cc71373 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_pytorch.py @@ -23,7 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu -import model_compression_toolkit.target_platform_capabilities.schema.v1 +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \ BIAS @@ -44,7 +44,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py index a0c89ccee..8891ccc1d 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py @@ -15,7 +15,7 @@ from typing import List, Tuple import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ WEIGHTS_QUANTIZATION_METHOD, IMX500_TP_MODEL @@ -85,7 +85,7 @@ def get_op_quantization_configs() -> \ # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -99,7 +99,7 @@ def get_op_quantization_configs() -> \ signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + linear_eight_bits = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -152,7 +152,7 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a QuantizationConfigOptions for quantizing constants in functional ops. # Constant configuration is similar to the default eight bit configuration except for PoT @@ -163,12 +163,12 @@ def generate_tp_model(default_config: OpQuantizationConfig, default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit( enable_weights_quantization=True, weights_per_channel_threshold=True, weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO)) - const_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_config]) + const_configuration_options = schema.QuantizationConfigOptions([const_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + generated_tpm = schema.TargetPlatformModel( default_configuration_options, tpc_minor_version=3, tpc_patch_version=0, @@ -187,42 +187,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") - add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add", const_configuration_options) - sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sub", const_configuration_options) - mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Mul", const_configuration_options) - div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Div", const_configuration_options) - prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("PReLU") - swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") - sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") - tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add", const_configuration_options) + sub = schema.OperatorsSet("Sub", const_configuration_options) + mul = schema.OperatorsSet("Mul", const_configuration_options) + div = schema.OperatorsSet("Div", const_configuration_options) + prelu = schema.OperatorsSet("PReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpm diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_keras.py index dc586d1d3..4b24aa73e 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_keras.py @@ -15,7 +15,7 @@ import tensorflow as tf from packaging import version -import model_compression_toolkit.target_platform_capabilities.schema.v1 +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, \ @@ -49,7 +49,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_pytorch.py index 0dde170c7..3069b07c5 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_pytorch.py @@ -23,7 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu -import model_compression_toolkit.target_platform_capabilities.schema.v1 +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -43,7 +43,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py index d3a45678d..6a3da25b0 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py @@ -15,7 +15,7 @@ from typing import List, Tuple import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ IMX500_TP_MODEL @@ -112,7 +112,7 @@ def get_op_quantization_configs() -> \ # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + eight_bits_default = OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -126,7 +126,7 @@ def get_op_quantization_configs() -> \ signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + linear_eight_bits = OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -176,12 +176,13 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) default_config_input16 = default_config.clone_and_edit(supported_input_activation_n_bits=(8, 16)) - default_config_options_16bit = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config_input16, - default_config_input16.clone_and_edit(activation_n_bits=16, - signedness=Signedness.SIGNED)], - base_config=default_config_input16) + default_config_options_16bit = schema.QuantizationConfigOptions([default_config_input16, + default_config_input16.clone_and_edit( + activation_n_bits=16, + signedness=Signedness.SIGNED)], + base_config=default_config_input16) # Create a QuantizationConfigOptions for quantizing constants in functional ops. # Constant configuration is similar to the default eight bit configuration except for PoT @@ -192,7 +193,7 @@ def generate_tp_model(default_config: OpQuantizationConfig, default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit( enable_weights_quantization=True, weights_per_channel_threshold=True, weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO)) - const_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_config]) + const_configuration_options = schema.QuantizationConfigOptions([const_config]) # 16 bits inputs and outputs. Currently, only defined for consts since they are used in operators that # support 16 bit as input and output. @@ -200,8 +201,8 @@ def generate_tp_model(default_config: OpQuantizationConfig, supported_input_activation_n_bits=(8, 16)) const_config_input16_output16 = const_config_input16.clone_and_edit( activation_n_bits=16, signedness=Signedness.SIGNED) - const_configuration_options_inout16 = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_config_input16_output16, - const_config_input16], + const_configuration_options_inout16 = schema.QuantizationConfigOptions([const_config_input16_output16, + const_config_input16], base_config=const_config_input16) const_config_input16_per_tensor = const_config.clone_and_edit( @@ -212,15 +213,16 @@ def generate_tp_model(default_config: OpQuantizationConfig, ) const_config_input16_output16_per_tensor = const_config_input16_per_tensor.clone_and_edit( activation_n_bits=16, signedness=Signedness.SIGNED) - const_configuration_options_inout16_per_tensor = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_config_input16_output16_per_tensor, - const_config_input16_per_tensor], - base_config=const_config_input16_per_tensor) + const_configuration_options_inout16_per_tensor = schema.QuantizationConfigOptions( + [const_config_input16_output16_per_tensor, + const_config_input16_per_tensor], + base_config=const_config_input16_per_tensor) qpreserving_const_config = const_config.clone_and_edit(enable_activation_quantization=False, quantization_preserving=True, default_weight_attr_config=const_config.default_weight_attr_config.clone_and_edit( weights_per_channel_threshold=False)) - qpreserving_const_config_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([qpreserving_const_config]) + qpreserving_const_config_options = schema.QuantizationConfigOptions([qpreserving_const_config]) mp_cfg_list_16bit = [mp_cfg.clone_and_edit(activation_n_bits=16, signedness=Signedness.SIGNED) for mp_cfg in mixed_precision_cfg_list] @@ -228,7 +230,7 @@ def generate_tp_model(default_config: OpQuantizationConfig, # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + generated_tpm = schema.TargetPlatformModel( default_configuration_options, tpc_minor_version=4, tpc_patch_version=0, @@ -248,60 +250,61 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_NO_QUANTIZATION, - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_QUANTIZATION_PRESERVING, - default_qco.clone_and_edit(enable_activation_quantization=False, - quantization_preserving=True) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS, qpreserving_const_config_options) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS, - default_qco.clone_and_edit(enable_activation_quantization=False, - quantization_preserving=True, - supported_input_activation_n_bits=(8, 16)) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_MERGE_OPS, const_configuration_options_inout16_per_tensor) + schema.OperatorsSet(OPSET_NO_QUANTIZATION, + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet(OPSET_QUANTIZATION_PRESERVING, + default_qco.clone_and_edit(enable_activation_quantization=False, + quantization_preserving=True) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS, qpreserving_const_config_options) + schema.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS, + default_qco.clone_and_edit(enable_activation_quantization=False, + quantization_preserving=True, + supported_input_activation_n_bits=(8, 16)) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet(OPSET_MERGE_OPS, const_configuration_options_inout16_per_tensor) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list + mp_cfg_list_16bit, - base_config=base_config) + mixed_precision_configuration_options = schema.QuantizationConfigOptions( + mixed_precision_cfg_list + mp_cfg_list_16bit, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_CONV, mixed_precision_configuration_options) - fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_FULLY_CONNECTED, mixed_precision_configuration_options) + conv = schema.OperatorsSet(OPSET_CONV, mixed_precision_configuration_options) + fc = schema.OperatorsSet(OPSET_FULLY_CONNECTED, mixed_precision_configuration_options) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_BATCH_NORM, default_config_options_16bit) + schema.OperatorsSet(OPSET_BATCH_NORM, default_config_options_16bit) # Note: Operations sets without quantization configuration are useful for creating fusing patterns - any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_ANY_RELU, default_config_options_16bit) - add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_ADD, const_configuration_options_inout16) - sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_SUB, const_configuration_options_inout16) - mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_MUL, const_configuration_options_inout16) - div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_DIV, const_configuration_options) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_MIN_MAX, const_configuration_options_inout16) - prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_PRELU, default_config_options_16bit) - swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_SWISH, default_config_options_16bit) - sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_SIGMOID, default_config_options_16bit) - tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_TANH, default_config_options_16bit) - gelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_GELU, default_config_options_16bit) - hardsigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_HARDSIGMOID, default_config_options_16bit) - hardswish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_HARDSWISH, default_config_options_16bit) + any_relu = schema.OperatorsSet(OPSET_ANY_RELU, default_config_options_16bit) + add = schema.OperatorsSet(OPSET_ADD, const_configuration_options_inout16) + sub = schema.OperatorsSet(OPSET_SUB, const_configuration_options_inout16) + mul = schema.OperatorsSet(OPSET_MUL, const_configuration_options_inout16) + div = schema.OperatorsSet(OPSET_DIV, const_configuration_options) + schema.OperatorsSet(OPSET_MIN_MAX, const_configuration_options_inout16) + prelu = schema.OperatorsSet(OPSET_PRELU, default_config_options_16bit) + swish = schema.OperatorsSet(OPSET_SWISH, default_config_options_16bit) + sigmoid = schema.OperatorsSet(OPSET_SIGMOID, default_config_options_16bit) + tanh = schema.OperatorsSet(OPSET_TANH, default_config_options_16bit) + gelu = schema.OperatorsSet(OPSET_GELU, default_config_options_16bit) + hardsigmoid = schema.OperatorsSet(OPSET_HARDSIGMOID, default_config_options_16bit) + hardswish = schema.OperatorsSet(OPSET_HARDSWISH, default_config_options_16bit) # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, - tanh, gelu, hardswish, hardsigmoid) - activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid, tanh, gelu, - hardswish, hardsigmoid) - any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, + tanh, gelu, hardswish, hardsigmoid) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid, tanh, gelu, + hardswish, hardsigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpm diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py index 73a2bdda1..b6a190ac5 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py @@ -15,7 +15,7 @@ import tensorflow as tf from packaging import version -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \ @@ -54,7 +54,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py index 3409e6ce8..e1c552c11 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py @@ -25,7 +25,7 @@ import torch.nn.functional as F from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, hardsigmoid, leaky_relu, gelu -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \ BIAS @@ -51,7 +51,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py index 0cc2a75b0..fb6d864cc 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py @@ -15,7 +15,7 @@ from typing import List, Tuple import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, QNNPACK_TP_MODEL from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, Signedness, \ @@ -84,7 +84,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -98,7 +98,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + linear_eight_bits = schema.OpQuantizationConfig( activation_quantization_method=tp.QuantizationMethod.UNIFORM, default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, @@ -138,12 +138,12 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpc = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + generated_tpc = schema.TargetPlatformModel( default_configuration_options, tpc_minor_version=1, tpc_patch_version=0, @@ -158,17 +158,17 @@ def generate_tp_model(default_config: OpQuantizationConfig, # Pytorch supports the next fusing patterns: # [Conv, Relu], [Conv, BatchNorm], [Conv, BatchNorm, Relu], [Linear, Relu] # Source: # https://pytorch.org/docs/stable/quantization.html#model-preparation-for-quantization-eager-mode - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv") - batchnorm = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("BatchNorm") - relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Relu") - linear = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Linear") + conv = schema.OperatorsSet("Conv") + batchnorm = schema.OperatorsSet("BatchNorm") + relu = schema.OperatorsSet("Relu") + linear = schema.OperatorsSet("Linear") # ------------------- # # Fusions # ------------------- # - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, batchnorm, relu]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, batchnorm]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, relu]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([linear, relu]) + schema.Fusing([conv, batchnorm, relu]) + schema.Fusing([conv, batchnorm]) + schema.Fusing([conv, relu]) + schema.Fusing([linear, relu]) return generated_tpc diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py index 4a366e3ca..fa8fb07e9 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py @@ -16,7 +16,7 @@ from packaging import version -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, \ KERAS_DEPTHWISE_KERNEL, BIAS @@ -42,7 +42,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='qnnpack_keras', tp_model=qnnpack_tp_model) -def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py index 2c2160c85..b13e6d8c7 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py @@ -16,7 +16,7 @@ from torch.nn import Conv2d, Linear, BatchNorm2d, ConvTranspose2d, Hardtanh, ReLU, ReLU6 from torch.nn.functional import relu, relu6, hardtanh -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -36,7 +36,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='qnnpack_pytorch', tp_model=qnnpack_pytorch) -def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py index 72f4c77e3..3c7e7e01c 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py @@ -15,7 +15,7 @@ from typing import List, Tuple import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import BIAS_ATTR, KERNEL_ATTR, TFLITE_TP_MODEL from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, Signedness, \ @@ -82,7 +82,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + eight_bits_default = schema.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -96,7 +96,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + linear_eight_bits = schema.OpQuantizationConfig( activation_quantization_method=tp.QuantizationMethod.UNIFORM, default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, @@ -136,12 +136,12 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpc = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + generated_tpc = schema.TargetPlatformModel( default_configuration_options, tpc_minor_version=1, tpc_patch_version=0, @@ -155,52 +155,52 @@ def generate_tp_model(default_config: OpQuantizationConfig, # In TFLite, the quantized operator specifications constraint operators quantization # differently. For more details: # https://www.tensorflow.org/lite/performance/quantization_spec#int8_quantized_operator_specifications - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", - tp.get_default_quantization_config_options().clone_and_edit( - quantization_preserving=True)) + schema.OperatorsSet("NoQuantization", + tp.get_default_quantization_config_options().clone_and_edit( + quantization_preserving=True)) fc_qco = tp.get_default_quantization_config_options() - fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", - fc_qco.clone_and_edit_weight_attribute(weights_per_channel_threshold=False)) - - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("L2Normalization", - tp.get_default_quantization_config_options().clone_and_edit( - fixed_zero_point=0, fixed_scale=1 / 128)) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("LogSoftmax", - tp.get_default_quantization_config_options().clone_and_edit( - fixed_zero_point=127, fixed_scale=16 / 256)) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh", - tp.get_default_quantization_config_options().clone_and_edit( - fixed_zero_point=0, fixed_scale=1 / 128)) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Softmax", - tp.get_default_quantization_config_options().clone_and_edit( - fixed_zero_point=-128, fixed_scale=1 / 256)) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Logistic", - tp.get_default_quantization_config_options().clone_and_edit( - fixed_zero_point=-128, fixed_scale=1 / 256)) - - conv2d = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv2d") - kernel = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(conv2d, fc) - - relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Relu") - elu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Elu") - activations_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(relu, elu) - - batch_norm = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("BatchNorm") - bias_add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("BiasAdd") - add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") - squeeze = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Squeeze", - qc_options=tp.get_default_quantization_config_options().clone_and_edit( - quantization_preserving=True)) + fc = schema.OperatorsSet("FullyConnected", + fc_qco.clone_and_edit_weight_attribute(weights_per_channel_threshold=False)) + + schema.OperatorsSet("L2Normalization", + tp.get_default_quantization_config_options().clone_and_edit( + fixed_zero_point=0, fixed_scale=1 / 128)) + schema.OperatorsSet("LogSoftmax", + tp.get_default_quantization_config_options().clone_and_edit( + fixed_zero_point=127, fixed_scale=16 / 256)) + schema.OperatorsSet("Tanh", + tp.get_default_quantization_config_options().clone_and_edit( + fixed_zero_point=0, fixed_scale=1 / 128)) + schema.OperatorsSet("Softmax", + tp.get_default_quantization_config_options().clone_and_edit( + fixed_zero_point=-128, fixed_scale=1 / 256)) + schema.OperatorsSet("Logistic", + tp.get_default_quantization_config_options().clone_and_edit( + fixed_zero_point=-128, fixed_scale=1 / 256)) + + conv2d = schema.OperatorsSet("Conv2d") + kernel = schema.OperatorSetConcat(conv2d, fc) + + relu = schema.OperatorsSet("Relu") + elu = schema.OperatorsSet("Elu") + activations_to_fuse = schema.OperatorSetConcat(relu, elu) + + batch_norm = schema.OperatorsSet("BatchNorm") + bias_add = schema.OperatorsSet("BiasAdd") + add = schema.OperatorsSet("Add") + squeeze = schema.OperatorsSet("Squeeze", + qc_options=tp.get_default_quantization_config_options().clone_and_edit( + quantization_preserving=True)) # ------------------- # # Fusions # ------------------- # # Source: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/grappler/optimizers/remapper - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([kernel, bias_add]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([kernel, bias_add, activations_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv2d, batch_norm, activations_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv2d, squeeze, activations_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([batch_norm, activations_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([batch_norm, add, activations_to_fuse]) + schema.Fusing([kernel, bias_add]) + schema.Fusing([kernel, bias_add, activations_to_fuse]) + schema.Fusing([conv2d, batch_norm, activations_to_fuse]) + schema.Fusing([conv2d, squeeze, activations_to_fuse]) + schema.Fusing([batch_norm, activations_to_fuse]) + schema.Fusing([batch_norm, add, activations_to_fuse]) return generated_tpc diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py index 1a0c9102d..ad9626837 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py @@ -15,7 +15,7 @@ import tensorflow as tf from packaging import version -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, BIAS @@ -47,7 +47,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='tflite_keras', tp_model=tflite_tp_model) -def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py index 4a125a846..e12dbcbee 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py @@ -16,7 +16,7 @@ from torch.nn import AvgPool2d, MaxPool2d from torch.nn.functional import avg_pool2d, max_pool2d, interpolate -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -38,7 +38,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='tflite_torch', tp_model=tflite_tp_model) -def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: diff --git a/tests/common_tests/helpers/generate_test_tp_model.py b/tests/common_tests/helpers/generate_test_tp_model.py index 4ea3a84f8..19b8390ed 100644 --- a/tests/common_tests/helpers/generate_test_tp_model.py +++ b/tests/common_tests/helpers/generate_test_tp_model.py @@ -15,7 +15,7 @@ import copy from typing import Dict, List, Any -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH, ACTIVATION_N_BITS_ATTRIBUTE, \ SUPPORTED_INPUT_ACTIVATION_NBITS_ATTRIBUTE from model_compression_toolkit.target_platform_capabilities.constants import OPS_SET_LIST, KERNEL_ATTR, BIAS_ATTR, \ @@ -39,7 +39,8 @@ def generate_test_tp_model(edit_params_dict, name=""): base_config, op_cfg_list, default_config = get_op_quantization_configs() # separate weights attribute parameters from the requested param to edit - weights_params_names = [name for name in model_compression_toolkit.target_platform_capabilities.schema.v1.AttributeQuantizationConfig.__init__.__code__.co_varnames if name != 'self'] + weights_params_names = [name for name in schema.AttributeQuantizationConfig.__init__.__code__.co_varnames if + name != 'self'] weights_params = {k: v for k, v in edit_params_dict.items() if k in weights_params_names} rest_params = {k: v for k, v in edit_params_dict.items() if k not in list(weights_params.keys())} @@ -106,8 +107,8 @@ def generate_tp_model_with_activation_mp(base_cfg, default_config, mp_bitwidth_c mixed_precision_cfg_list=mp_op_cfg_list, name=name) - mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mp_op_cfg_list, - base_config=base_cfg) + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mp_op_cfg_list, + base_config=base_cfg) operator_sets_dict = {op_set.name: mixed_precision_configuration_options for op_set in base_tp_model.operator_set if op_set.name is not "NoQuantization"} @@ -123,12 +124,11 @@ def generate_tp_model_with_activation_mp(base_cfg, default_config, mp_bitwidth_c def generate_custom_test_tp_model(name: str, base_cfg: OpQuantizationConfig, - base_tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel, + base_tp_model: schema.TargetPlatformModel, operator_sets_dict: Dict[str, QuantizationConfigOptions] = None): + default_configuration_options = schema.QuantizationConfigOptions([base_cfg]) - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([base_cfg]) - - custom_tp_model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + custom_tp_model = schema.TargetPlatformModel( default_configuration_options, tpc_minor_version=None, tpc_patch_version=None, @@ -144,27 +144,26 @@ def generate_custom_test_tp_model(name: str, else: qc_options = op_set.qc_options - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(op_set.name, qc_options) + schema.OperatorsSet(op_set.name, qc_options) existing_op_sets_names = [op_set.name for op_set in base_tp_model.operator_set] for op_set_name, op_set_qc_options in operator_sets_dict.items(): # Add new OperatorSets from the given operator_sets_dict if op_set_name not in existing_op_sets_names: - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(op_set_name, op_set_qc_options) + schema.OperatorsSet(op_set_name, op_set_qc_options) for fusion in base_tp_model.fusing_patterns: - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing(fusion.operator_groups_list) + schema.Fusing(fusion.operator_groups_list) return custom_tp_model def generate_test_tpc(name: str, - tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel, + tp_model: schema.TargetPlatformModel, base_tpc: tp.TargetPlatformCapabilities, op_sets_to_layer_add: Dict[str, List[Any]] = None, op_sets_to_layer_drop: Dict[str, List[Any]] = None, attr_mapping: Dict[str, Dict] = {}): - op_set_to_layers_list = base_tpc.op_sets_to_layers.op_sets_to_layers op_set_to_layers_dict = {op_set.name: op_set.layers for op_set in op_set_to_layers_list} @@ -198,22 +197,21 @@ def generate_test_attr_configs(default_cfg_nbits: int = 8, kernel_cfg_quantizatiom_method: tp.QuantizationMethod = tp.QuantizationMethod.POWER_OF_TWO, enable_kernel_weights_quantization: bool = True, kernel_lut_values_bitwidth: int = None): - - default_weight_attr_config = model_compression_toolkit.target_platform_capabilities.schema.v1.AttributeQuantizationConfig( + default_weight_attr_config = schema.AttributeQuantizationConfig( weights_quantization_method=default_cfg_quantizatiom_method, weights_n_bits=default_cfg_nbits, weights_per_channel_threshold=False, enable_weights_quantization=False, lut_values_bitwidth=None) - kernel_base_config = model_compression_toolkit.target_platform_capabilities.schema.v1.AttributeQuantizationConfig( + kernel_base_config = schema.AttributeQuantizationConfig( weights_quantization_method=kernel_cfg_quantizatiom_method, weights_n_bits=kernel_cfg_nbits, weights_per_channel_threshold=True, enable_weights_quantization=enable_kernel_weights_quantization, lut_values_bitwidth=kernel_lut_values_bitwidth) - bias_config = model_compression_toolkit.target_platform_capabilities.schema.v1.AttributeQuantizationConfig( + bias_config = schema.AttributeQuantizationConfig( weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, weights_n_bits=FLOAT_BITWIDTH, weights_per_channel_threshold=False, @@ -225,22 +223,21 @@ def generate_test_attr_configs(default_cfg_nbits: int = 8, BIAS_CONFIG: bias_config} -def generate_test_op_qc(default_weight_attr_config: model_compression_toolkit.target_platform_capabilities.schema.v1.AttributeQuantizationConfig, - kernel_base_config: model_compression_toolkit.target_platform_capabilities.schema.v1.AttributeQuantizationConfig, - bias_config: model_compression_toolkit.target_platform_capabilities.schema.v1.AttributeQuantizationConfig, +def generate_test_op_qc(default_weight_attr_config: schema.AttributeQuantizationConfig, + kernel_base_config: schema.AttributeQuantizationConfig, + bias_config: schema.AttributeQuantizationConfig, enable_activation_quantization: bool = True, activation_n_bits: int = 8, activation_quantization_method: tp.QuantizationMethod = tp.QuantizationMethod.POWER_OF_TWO): - - return model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig(enable_activation_quantization=enable_activation_quantization, - default_weight_attr_config=default_weight_attr_config, - attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, - BIAS_ATTR: bias_config}, - activation_n_bits=activation_n_bits, - supported_input_activation_n_bits=activation_n_bits, - activation_quantization_method=activation_quantization_method, - quantization_preserving=False, - fixed_scale=None, - fixed_zero_point=None, - simd_size=32, - signedness=Signedness.AUTO) + return schema.OpQuantizationConfig(enable_activation_quantization=enable_activation_quantization, + default_weight_attr_config=default_weight_attr_config, + attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, + BIAS_ATTR: bias_config}, + activation_n_bits=activation_n_bits, + supported_input_activation_n_bits=activation_n_bits, + activation_quantization_method=activation_quantization_method, + quantization_preserving=False, + fixed_scale=None, + fixed_zero_point=None, + simd_size=32, + signedness=Signedness.AUTO) diff --git a/tests/common_tests/test_tp_model.py b/tests/common_tests/test_tp_model.py index 6515ada05..298d0dc4a 100644 --- a/tests/common_tests/test_tp_model.py +++ b/tests/common_tests/test_tp_model.py @@ -16,17 +16,18 @@ import unittest import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.core.common import BaseNode from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR -from model_compression_toolkit.target_platform_capabilities.target_platform import get_default_quantization_config_options +from model_compression_toolkit.target_platform_capabilities.target_platform import \ + get_default_quantization_config_options from tests.common_tests.helpers.generate_test_tp_model import generate_test_attr_configs, generate_test_op_qc tp = mct.target_platform TEST_QC = generate_test_op_qc(**generate_test_attr_configs()) -TEST_QCO = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) +TEST_QCO = schema.QuantizationConfigOptions([TEST_QC]) class TargetPlatformModelingTest(unittest.TestCase): @@ -37,43 +38,43 @@ def test_not_initialized_tp(self): self.assertEqual('Target platform model is not initialized.', str(e.exception)) def test_get_default_options(self): - with model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(TEST_QCO, - tpc_minor_version=None, - tpc_patch_version=None, - tpc_platform_type=None, - add_metadata=False): + with schema.TargetPlatformModel(TEST_QCO, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False): self.assertEqual(tp.get_default_quantization_config_options(), TEST_QCO) def test_immutable_tp(self): - model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(TEST_QCO, - tpc_minor_version=None, - tpc_patch_version=None, - tpc_platform_type=None, - add_metadata=False) + model = schema.TargetPlatformModel(TEST_QCO, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with self.assertRaises(Exception) as e: with model: - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opset") + schema.OperatorsSet("opset") model.operator_set = [] self.assertEqual('Immutable class. Can\'t edit attributes.', str(e.exception)) def test_default_options_more_than_single_qc(self): - test_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC, TEST_QC], base_config=TEST_QC) + test_qco = schema.QuantizationConfigOptions([TEST_QC, TEST_QC], base_config=TEST_QC) with self.assertRaises(Exception) as e: - model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(test_qco, - tpc_minor_version=None, - tpc_patch_version=None, - tpc_platform_type=None, - add_metadata=False) - self.assertEqual('Default QuantizationConfigOptions must contain only one option', str(e.exception)) + schema.TargetPlatformModel(test_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) + self.assertEqual('Default QuantizationConfigOptions must contain exactly one option.', str(e.exception)) def test_tp_model_show(self): - tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(TEST_QCO, - tpc_minor_version=None, - tpc_patch_version=None, - tpc_platform_type=None, - add_metadata=False) + tpm = schema.TargetPlatformModel(TEST_QCO, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with tpm: - a = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opA") + a = schema.OperatorsSet("opA") tpm.show() @@ -81,16 +82,16 @@ def test_tp_model_show(self): class OpsetTest(unittest.TestCase): def test_opset_qco(self): - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(TEST_QCO, - tpc_minor_version=None, - tpc_patch_version=None, - tpc_platform_type=None, - add_metadata=False, - name='test') + hm = schema.TargetPlatformModel(TEST_QCO, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False, + name='test') opset_name = "ops_3bit" with hm: qco_3bit = get_default_quantization_config_options().clone_and_edit(activation_n_bits=3) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(opset_name, qco_3bit) + schema.OperatorsSet(opset_name, qco_3bit) for op_qc in hm.get_config_options_by_operators_set(opset_name).quantization_config_list: self.assertEqual(op_qc.activation_n_bits, 3) @@ -102,33 +103,33 @@ def test_opset_qco(self): hm.default_qco) def test_opset_concat(self): - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(TEST_QCO, - tpc_minor_version=None, - tpc_patch_version=None, - tpc_platform_type=None, - add_metadata=False, - name='test') + hm = schema.TargetPlatformModel(TEST_QCO, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False, + name='test') with hm: - a = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opset_A') - b = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opset_B', - get_default_quantization_config_options().clone_and_edit(activation_n_bits=2)) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opset_C') # Just add it without using it in concat - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(a, b) + a = schema.OperatorsSet('opset_A') + b = schema.OperatorsSet('opset_B', + get_default_quantization_config_options().clone_and_edit(activation_n_bits=2)) + schema.OperatorsSet('opset_C') # Just add it without using it in concat + schema.OperatorSetConcat(a, b) self.assertEqual(len(hm.operator_set), 4) self.assertTrue(hm.is_opset_in_model("opset_A_opset_B")) self.assertTrue(hm.get_config_options_by_operators_set('opset_A_opset_B') is None) def test_non_unique_opset(self): - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), tpc_minor_version=None, tpc_patch_version=None, tpc_platform_type=None, add_metadata=False) with self.assertRaises(Exception) as e: with hm: - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("conv") - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("conv") + schema.OperatorsSet("conv") + schema.OperatorsSet("conv") self.assertEqual('Operator Sets must have unique names.', str(e.exception)) @@ -136,28 +137,31 @@ class QCOptionsTest(unittest.TestCase): def test_empty_qc_options(self): with self.assertRaises(AssertionError) as e: - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([]) - self.assertEqual("'QuantizationConfigOptions' requires at least one 'OpQuantizationConfig'. The provided list is empty.", - str(e.exception)) + schema.QuantizationConfigOptions([]) + self.assertEqual( + "'QuantizationConfigOptions' requires at least one 'OpQuantizationConfig'. The provided list is empty.", + str(e.exception)) def test_list_of_no_qc(self): with self.assertRaises(AssertionError) as e: - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC, 3]) + schema.QuantizationConfigOptions([TEST_QC, 3]) self.assertEqual( 'Each option must be an instance of \'OpQuantizationConfig\', but found an object of type: .', str(e.exception)) def test_clone_and_edit_options(self): - modified_options = TEST_QCO.clone_and_edit(activation_n_bits=3).clone_and_edit_weight_attribute(attrs=[KERNEL_ATTR], - weights_n_bits=5) + modified_options = TEST_QCO.clone_and_edit(activation_n_bits=3).clone_and_edit_weight_attribute( + attrs=[KERNEL_ATTR], + weights_n_bits=5) self.assertEqual(modified_options.quantization_config_list[0].activation_n_bits, 3) - self.assertEqual(modified_options.quantization_config_list[0].attr_weights_configs_mapping[KERNEL_ATTR].weights_n_bits, 5) + self.assertEqual( + modified_options.quantization_config_list[0].attr_weights_configs_mapping[KERNEL_ATTR].weights_n_bits, 5) def test_qco_without_base_config(self): - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) # Should work fine as it has only one qc. + schema.QuantizationConfigOptions([TEST_QC]) # Should work fine as it has only one qc. with self.assertRaises(Exception) as e: - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC, TEST_QC]) # Should raise exception as base_config was not passed + schema.QuantizationConfigOptions([TEST_QC, TEST_QC]) # Should raise exception as base_config was not passed self.assertEqual( 'For multiple configurations, a \'base_config\' is required for non-mixed-precision optimization.', str(e.exception)) @@ -172,30 +176,31 @@ def test_get_qco_for_none_tpc(self): class FusingTest(unittest.TestCase): def test_fusing_single_opset(self): - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), - tpc_minor_version = None, - tpc_patch_version = None, - add_metadata = False) + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with hm: - add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("add") + add = schema.OperatorsSet("add") with self.assertRaises(Exception) as e: - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([add]) + schema.Fusing([add]) self.assertEqual('Fusing can not be created for a single operators group', str(e.exception)) def test_fusing_contains(self): - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), tpc_minor_version=None, tpc_patch_version=None, tpc_platform_type=None, add_metadata=False) with hm: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("conv") - add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("add") - tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("tanh") - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, add]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, add, tanh]) + conv = schema.OperatorsSet("conv") + add = schema.OperatorsSet("add") + tanh = schema.OperatorsSet("tanh") + schema.Fusing([conv, add]) + schema.Fusing([conv, add, tanh]) self.assertEqual(len(hm.fusing_patterns), 2) f0, f1 = hm.fusing_patterns[0], hm.fusing_patterns[1] @@ -205,20 +210,20 @@ def test_fusing_contains(self): self.assertTrue(f1.contains(f1)) def test_fusing_contains_with_opset_concat(self): - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), tpc_minor_version=None, tpc_patch_version=None, tpc_platform_type=None, add_metadata=False) with hm: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("conv") - add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("add") - tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("tanh") - add_tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, tanh) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, add]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, add_tanh]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, add, tanh]) + conv = schema.OperatorsSet("conv") + add = schema.OperatorsSet("add") + tanh = schema.OperatorsSet("tanh") + add_tanh = schema.OperatorSetConcat(add, tanh) + schema.Fusing([conv, add]) + schema.Fusing([conv, add_tanh]) + schema.Fusing([conv, add, tanh]) self.assertEqual(len(hm.fusing_patterns), 3) f0, f1, f2 = hm.fusing_patterns[0], hm.fusing_patterns[1], hm.fusing_patterns[2] diff --git a/tests/keras_tests/exporter_tests/tflite_int8/imx500_int8_tp_model.py b/tests/keras_tests/exporter_tests/tflite_int8/imx500_int8_tp_model.py index 8bcd21c07..0470bc613 100644 --- a/tests/keras_tests/exporter_tests/tflite_int8/imx500_int8_tp_model.py +++ b/tests/keras_tests/exporter_tests/tflite_int8/imx500_int8_tp_model.py @@ -17,7 +17,7 @@ import tensorflow as tf from packaging import version -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, BIAS, \ KERAS_DEPTHWISE_KERNEL, WEIGHTS_N_BITS @@ -66,41 +66,41 @@ def generate_tp_model(default_config: OpQuantizationConfig, base_config: OpQuantizationConfig, mixed_precision_cfg_list: List[OpQuantizationConfig], name: str) -> TargetPlatformModel: - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( + default_configuration_options = schema.QuantizationConfigOptions( [default_config]) - generated_tpc = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + generated_tpc = schema.TargetPlatformModel( default_configuration_options, tpc_minor_version=None, tpc_patch_version=None, tpc_platform_type=None, add_metadata=False, name=name) with generated_tpc: - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", - tp.get_default_quantization_config_options() - .clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) - - mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) - - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) - - any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") - add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") - sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sub") - mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Mul") - div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Div") - prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("PReLU") - swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") - sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") - tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") - activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) + schema.OperatorsSet("NoQuantization", + tp.get_default_quantization_config_options() + .clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) + + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add") + sub = schema.OperatorsSet("Sub") + mul = schema.OperatorsSet("Mul") + div = schema.OperatorsSet("Div") + prelu = schema.OperatorsSet("PReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = schema.OperatorSetConcat(add, sub, mul, div) + schema.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([fc, activations_after_fc_to_fuse]) + schema.Fusing([any_binary, any_relu]) return generated_tpc @@ -110,7 +110,7 @@ def get_int8_tpc(edit_weights_params_dict={}, edit_act_params_dict={}) -> tp.Tar return generate_keras_tpc(name='int8_tpc', tp_model=default_tp_model) -def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel): keras_tpc = tp.TargetPlatformCapabilities(tp_model) with keras_tpc: diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/bn_attributes_quantization_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/bn_attributes_quantization_test.py index 86f3b21bc..7942824c5 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/bn_attributes_quantization_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/bn_attributes_quantization_test.py @@ -16,7 +16,7 @@ import numpy as np import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from mct_quantizers import QuantizationMethod, KerasQuantizationWrapper from model_compression_toolkit import DefaultDict from model_compression_toolkit.core.keras.constants import GAMMA, BETA @@ -53,35 +53,35 @@ def _generate_bn_quantized_tpm(quantize_linear): bias_config=attr_cfgs_dict[BIAS_CONFIG], enable_activation_quantization=False) - bn_op_qc = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig(enable_activation_quantization=False, - default_weight_attr_config=default_attr_cfg, - attr_weights_configs_mapping={BETA: bn_attr_cfg, GAMMA: bn_attr_cfg}, - activation_n_bits=8, - supported_input_activation_n_bits=8, - activation_quantization_method=QuantizationMethod.POWER_OF_TWO, - quantization_preserving=False, - fixed_scale=None, - fixed_zero_point=None, - simd_size=32, - signedness=Signedness.AUTO) - - default_op_qc = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig(enable_activation_quantization=False, - default_weight_attr_config=default_attr_cfg, - attr_weights_configs_mapping={}, - activation_n_bits=8, - supported_input_activation_n_bits=8, - activation_quantization_method=QuantizationMethod.POWER_OF_TWO, - quantization_preserving=False, - fixed_scale=None, - fixed_zero_point=None, - simd_size=32, - signedness=Signedness.AUTO) - - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_op_qc]) - linear_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([linear_op_qc]) - bn_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([bn_op_qc]) - - generated_tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + bn_op_qc = schema.OpQuantizationConfig(enable_activation_quantization=False, + default_weight_attr_config=default_attr_cfg, + attr_weights_configs_mapping={BETA: bn_attr_cfg, GAMMA: bn_attr_cfg}, + activation_n_bits=8, + supported_input_activation_n_bits=8, + activation_quantization_method=QuantizationMethod.POWER_OF_TWO, + quantization_preserving=False, + fixed_scale=None, + fixed_zero_point=None, + simd_size=32, + signedness=Signedness.AUTO) + + default_op_qc = schema.OpQuantizationConfig(enable_activation_quantization=False, + default_weight_attr_config=default_attr_cfg, + attr_weights_configs_mapping={}, + activation_n_bits=8, + supported_input_activation_n_bits=8, + activation_quantization_method=QuantizationMethod.POWER_OF_TWO, + quantization_preserving=False, + fixed_scale=None, + fixed_zero_point=None, + simd_size=32, + signedness=Signedness.AUTO) + + default_configuration_options = schema.QuantizationConfigOptions([default_op_qc]) + linear_configuration_options = schema.QuantizationConfigOptions([linear_op_qc]) + bn_configuration_options = schema.QuantizationConfigOptions([bn_op_qc]) + + generated_tpm = schema.TargetPlatformModel( default_configuration_options, tpc_minor_version=None, tpc_patch_version=None, @@ -89,9 +89,8 @@ def _generate_bn_quantized_tpm(quantize_linear): add_metadata=False, name='bn_quantized_tpm') with generated_tpm: - - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", linear_configuration_options) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("BN", bn_configuration_options) + schema.OperatorsSet("Conv", linear_configuration_options) + schema.OperatorsSet("BN", bn_configuration_options) return generated_tpm @@ -140,11 +139,13 @@ def create_networks(self): return tf.keras.models.Model(inputs=inputs, outputs=x) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): - float_bn_layer = get_layers_from_model_by_type(float_model, layers.BatchNormalization, include_wrapped_layers=False) + float_bn_layer = get_layers_from_model_by_type(float_model, layers.BatchNormalization, + include_wrapped_layers=False) self.unit_test.assertTrue(len(float_bn_layer) == 1, "Expecting the float model to have exactly 1 BN layer") float_bn_layer = float_bn_layer[0] - quant_bn_layer = get_layers_from_model_by_type(quantized_model, layers.BatchNormalization, include_wrapped_layers=True) + quant_bn_layer = get_layers_from_model_by_type(quantized_model, layers.BatchNormalization, + include_wrapped_layers=True) self.unit_test.assertTrue(len(quant_bn_layer) == 1, "Expecting the quantized model to have exactly 1 BN layer") quant_bn_layer = quant_bn_layer[0] @@ -161,7 +162,8 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= f_beta = f_beta[0] q_beta = q_bn_weights.get(BETA) self.unit_test.assertTrue(q_beta is not None, "Expecting quantized model BN layer to have a BETA attribute") - self.unit_test.assertTrue(np.any(f_beta != q_beta), "Float and quantized BETA attributes are expected to have different values") + self.unit_test.assertTrue(np.any(f_beta != q_beta), + "Float and quantized BETA attributes are expected to have different values") f_gamma = [w for w in f_bn_weights if GAMMA in w.name] self.unit_test.assertTrue(len(f_gamma) == 1, "Expecting float model BN layer to have a GAMMA attribute") diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/const_quantization_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/const_quantization_test.py index 5017f2d88..dbba20909 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/const_quantization_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/const_quantization_test.py @@ -17,7 +17,7 @@ import numpy as np import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.core import MixedPrecisionQuantizationConfig from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v4.tp_model import generate_tp_model, \ get_op_quantization_configs @@ -49,11 +49,11 @@ def create_const_quant_tpc(qmethod): default_weight_attr_config=default_cfg.default_weight_attr_config.clone_and_edit( enable_weights_quantization=True, weights_per_channel_threshold=True, weights_n_bits=16, weights_quantization_method=qmethod)) - const_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_config]) + const_configuration_options = schema.QuantizationConfigOptions([const_config]) const_merge_config = default_cfg.clone_and_edit( default_weight_attr_config=default_cfg.default_weight_attr_config.clone_and_edit( weights_per_channel_threshold=False)) - const_merge_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_merge_config]) + const_merge_configuration_options = schema.QuantizationConfigOptions([const_merge_config]) operator_sets_dict = {} operator_sets_dict["Add"] = const_configuration_options @@ -188,9 +188,10 @@ def create_networks(self): x1 = layers.Add()([np.random.random((1, x.shape[-1])), x, np.random.random((1, x.shape[-1]))]) x2 = layers.Multiply()([x, np.random.random((1, x.shape[-1])), x, np.random.random((1, x.shape[-1]))]) x3 = tf.add_n([x1, as_const(x), x2]) - x1 = tf.reshape(tf.stack([as_const(x1), x1, as_const(x1)], axis=1), (-1, 3*x1.shape[1], x1.shape[2], x1.shape[3])) + x1 = tf.reshape(tf.stack([as_const(x1), x1, as_const(x1)], axis=1), + (-1, 3 * x1.shape[1], x1.shape[2], x1.shape[3])) x = tf.concat([x1, x2, as_const(x3), x3], 1) - ind_select_const = np.zeros((192*32, 38)) + ind_select_const = np.zeros((192 * 32, 38)) ind_select_const[4, :] = 100 x1 = tf.add(x, ind_select_const.reshape((192, 32, 38))) inds = tf.argmax(tf.reshape(x1, (-1, 192 * 32, 38)), axis=1) @@ -209,7 +210,8 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= self.unit_test.assertTrue(np.isclose(cs, 1, atol=0.01), msg=f'fail cosine similarity check:{cs}') # check quantization layers: - for op in [tf.concat, tf.stack, layers.Add, layers.Multiply, layers.Concatenate, tf.gather, tf.compat.v1.gather]: + for op in [tf.concat, tf.stack, layers.Add, layers.Multiply, layers.Concatenate, tf.gather, + tf.compat.v1.gather]: for qlayer in get_layers_from_model_by_type(quantized_model, op): self.unit_test.assertTrue(isinstance(qlayer, KerasQuantizationWrapper), msg=f"{op} should be quantized.") diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py b/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py index 11b4b1b2e..591818683 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py @@ -18,7 +18,7 @@ import tensorflow as tf from keras.activations import sigmoid, softmax -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from mct_quantizers import KerasActivationQuantizationHolder from model_compression_toolkit import DefaultDict from model_compression_toolkit.core.keras.constants import SIGMOID, SOFTMAX, BIAS @@ -29,7 +29,8 @@ from keras import backend as K import model_compression_toolkit as mct -from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \ + ResourceUtilization from model_compression_toolkit.core.common.user_info import UserInformation from tests.keras_tests.tpc_keras import get_tpc_with_activation_mp_keras from tests.keras_tests.utils import get_layers_from_model_by_type @@ -95,14 +96,15 @@ def verify_quantization(self, quantized_model, input_x, weights_layers_idx, weig activation_layers_idx, unique_tensor_values): # verify weights quantization conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D) - for conv_layer, num_channels in zip(conv_layers,weights_layers_channels_size): + for conv_layer, num_channels in zip(conv_layers, weights_layers_channels_size): for j in range(num_channels): # quantized per channel self.unit_test.assertTrue( np.unique(conv_layer.get_quantized_weights()['kernel'][:, :, :, j]).flatten().shape[ 0] <= unique_tensor_values) # verify activation quantization - holder_layers = get_layers_from_model_by_type(quantized_model, KerasActivationQuantizationHolder)[1:] # skip the input layer + holder_layers = get_layers_from_model_by_type(quantized_model, KerasActivationQuantizationHolder)[ + 1:] # skip the input layer inp = quantized_model.input # input placeholder out = [layer.output for layer in holder_layers] # all layer outputs get_outputs = K.function([inp], out) @@ -136,7 +138,7 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= class MixedPrecisionActivationSearch4BitsAvgTest(MixedPrecisionActivationBaseTest): def __init__(self, unit_test): - super().__init__(unit_test, activation_layers_idx=[2,4]) + super().__init__(unit_test, activation_layers_idx=[2, 4]) def get_resource_utilization(self): # resource utilization is for 4 bits on average @@ -261,7 +263,7 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= # activation bitwidth for each layer would be 4-bit, this assertion tests the expected result for this specific # test with its current setup (therefore, we don't check the relu layer's bitwidth) holder_layer = get_layers_from_model_by_type(quantized_model, KerasActivationQuantizationHolder)[0] - self.unit_test.assertTrue(holder_layer.activation_holder_quantizer.get_config()['num_bits']==4) + self.unit_test.assertTrue(holder_layer.activation_holder_quantizer.get_config()['num_bits'] == 4) class MixedPrecisionActivationSplitLayerTest(MixedPrecisionActivationBaseTest): @@ -641,18 +643,18 @@ def get_tpc(self): [c.clone_and_edit(enable_activation_quantization=False) for c in mixed_precision_cfg_list] cfg = mixed_precision_cfg_list[0] - act_mixed_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( + act_mixed_cfg = schema.QuantizationConfigOptions( [act_eight_bit_cfg, act_four_bit_cfg, act_two_bit_cfg], base_config=act_eight_bit_cfg, ) - weight_mixed_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( + weight_mixed_cfg = schema.QuantizationConfigOptions( mixed_precision_cfg_list, base_config=cfg, ) - tp_model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([cfg], cfg), + tp_model = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([cfg], cfg), tpc_minor_version=None, tpc_patch_version=None, tpc_platform_type=None, @@ -660,8 +662,8 @@ def get_tpc(self): name="mp_activation_conf_weights_test") with tp_model: - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Activations", act_mixed_cfg) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Weights", weight_mixed_cfg) + schema.OperatorsSet("Activations", act_mixed_cfg) + schema.OperatorsSet("Weights", weight_mixed_cfg) keras_tpc = tp.TargetPlatformCapabilities(tp_model) diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/slicing_op_lambda_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/slicing_op_lambda_test.py index 315884be0..865151f8d 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/slicing_op_lambda_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/slicing_op_lambda_test.py @@ -13,8 +13,8 @@ # limitations under the License. # ============================================================================== import tensorflow as tf +from mct_quantizers.common.quant_info import QuantizationMethod -import model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config from tests.keras_tests.feature_networks_tests.base_keras_feature_test import BaseKerasFeatureNetworkTest from packaging import version @@ -39,8 +39,8 @@ def __init__(self, unit_test): def get_quantization_config(self): return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE, - model_compression_toolkit.target_platform_capabilities.target_platform.QuantizationMethod.POWER_OF_TWO, - model_compression_toolkit.target_platform_capabilities.target_platform.QuantizationMethod.POWER_OF_TWO, + QuantizationMethod.POWER_OF_TWO, + QuantizationMethod.POWER_OF_TWO, 16, 16, False, False, True) def create_networks(self): diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/tanh_activation_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/tanh_activation_test.py index d15847890..bdd70fffd 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/tanh_activation_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/tanh_activation_test.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -import model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config +from mct_quantizers.common.quant_info import QuantizationMethod + from tests.common_tests.base_feature_test import BaseFeatureNetworkTest import model_compression_toolkit as mct import tensorflow as tf @@ -30,8 +31,8 @@ def __init__(self, unit_test): def get_quantization_config(self): return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE, - model_compression_toolkit.target_platform_capabilities.target_platform.QuantizationMethod.POWER_OF_TWO, - model_compression_toolkit.target_platform_capabilities.target_platform.QuantizationMethod.POWER_OF_TWO, + QuantizationMethod.POWER_OF_TWO, + QuantizationMethod.POWER_OF_TWO, 16, 16, True, True, True) def create_networks(self): diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py b/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py index ab1cc8a4c..8522531b7 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py @@ -17,7 +17,7 @@ import numpy as np import tensorflow as tf -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from mct_quantizers import KerasQuantizationWrapper from model_compression_toolkit.core.keras.constants import KERNEL from model_compression_toolkit.defaultdict import DefaultDict @@ -178,17 +178,17 @@ def get_tpc(self): two_bit_cfg = mixed_precision_cfg_list[2] - weight_mixed_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( + weight_mixed_cfg = schema.QuantizationConfigOptions( mixed_precision_cfg_list, base_config=cfg, ) - weight_fixed_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( + weight_fixed_cfg = schema.QuantizationConfigOptions( [two_bit_cfg], base_config=two_bit_cfg, ) - tp_model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + tp_model = schema.TargetPlatformModel( weight_fixed_cfg, tpc_minor_version=None, tpc_patch_version=None, @@ -196,8 +196,8 @@ def get_tpc(self): add_metadata=False, name="mp_part_weights_layers_test") with tp_model: - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Weights_mp", weight_mixed_cfg) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Weights_fixed", weight_fixed_cfg) + schema.OperatorsSet("Weights_mp", weight_mixed_cfg) + schema.OperatorsSet("Weights_fixed", weight_fixed_cfg) keras_tpc = tp.TargetPlatformCapabilities(tp_model) @@ -512,18 +512,18 @@ def get_tpc(self): [c.clone_and_edit(enable_activation_quantization=False) for c in mixed_precision_cfg_list] cfg = mixed_precision_cfg_list[0] - act_mixed_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( + act_mixed_cfg = schema.QuantizationConfigOptions( [act_eight_bit_cfg, act_four_bit_cfg, act_two_bit_cfg], base_config=act_eight_bit_cfg, ) - weight_mixed_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( + weight_mixed_cfg = schema.QuantizationConfigOptions( mixed_precision_cfg_list, base_config=cfg, ) - tp_model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([cfg], cfg), + tp_model = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([cfg], cfg), tpc_minor_version=None, tpc_patch_version=None, tpc_platform_type=None, @@ -531,8 +531,8 @@ def get_tpc(self): name="mp_weights_conf_act_test") with tp_model: - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Activations", act_mixed_cfg) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Weights", weight_mixed_cfg) + schema.OperatorsSet("Activations", act_mixed_cfg) + schema.OperatorsSet("Weights", weight_mixed_cfg) keras_tpc = tp.TargetPlatformCapabilities(tp_model) diff --git a/tests/keras_tests/function_tests/test_custom_layer.py b/tests/keras_tests/function_tests/test_custom_layer.py index f14880006..e8b786c38 100644 --- a/tests/keras_tests/function_tests/test_custom_layer.py +++ b/tests/keras_tests/function_tests/test_custom_layer.py @@ -18,7 +18,7 @@ import tensorflow as tf import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.target_platform_capabilities.schema.v1 import Signedness from model_compression_toolkit.target_platform_capabilities.constants import BIAS_ATTR, KERNEL_ATTR from tests.common_tests.helpers.generate_test_tp_model import generate_test_attr_configs, DEFAULT_WEIGHT_ATTR_CONFIG, \ @@ -64,35 +64,35 @@ def get_tpc(): """ tp = mct.target_platform attr_cfg = generate_test_attr_configs(kernel_lut_values_bitwidth=0) - base_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig(activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, - enable_activation_quantization=True, - activation_n_bits=32, - supported_input_activation_n_bits=32, - default_weight_attr_config=attr_cfg[DEFAULT_WEIGHT_ATTR_CONFIG], - attr_weights_configs_mapping={}, - quantization_preserving=False, - fixed_scale=1.0, - fixed_zero_point=0, - simd_size=32, - signedness=Signedness.AUTO) - - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([base_cfg]) - tp_model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_configuration_options, - tpc_minor_version=None, - tpc_patch_version=None, - tpc_platform_type=None, - add_metadata=False) + base_cfg = schema.OpQuantizationConfig(activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, + enable_activation_quantization=True, + activation_n_bits=32, + supported_input_activation_n_bits=32, + default_weight_attr_config=attr_cfg[DEFAULT_WEIGHT_ATTR_CONFIG], + attr_weights_configs_mapping={}, + quantization_preserving=False, + fixed_scale=1.0, + fixed_zero_point=0, + simd_size=32, + signedness=Signedness.AUTO) + + default_configuration_options = schema.QuantizationConfigOptions([base_cfg]) + tp_model = schema.TargetPlatformModel(default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with tp_model: default_qco = tp.get_default_quantization_config_options() - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + schema.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) tpc = tp.TargetPlatformCapabilities(tp_model) with tpc: # No need to quantize Flatten and Dropout layers tp.OperationsSetToLayers("NoQuantization", [CustomIdentity, - tp.LayerFilterParams(CustomIdentityWithArg, dummy_arg=0),]) + tp.LayerFilterParams(CustomIdentityWithArg, dummy_arg=0), ]) return tpc @@ -111,7 +111,8 @@ def test_custom_layer_in_tpc(self): # verify the custom layer is in the quantized model self.assertTrue(isinstance(q_model.layers[2], CustomIdentity), 'Custom layer should be in the quantized model') - self.assertTrue(isinstance(q_model.layers[3], CustomIdentityWithArg), 'Custom layer should be in the quantized model') + self.assertTrue(isinstance(q_model.layers[3], CustomIdentityWithArg), + 'Custom layer should be in the quantized model') # verify the custom layer isn't quantized self.assertTrue(len(q_model.layers) == 4, 'Quantized model should have only 3 layers: Input, KerasActivationQuantizationHolder, CustomIdentity & CustomIdentityWithArg') diff --git a/tests/keras_tests/function_tests/test_hmse_error_method.py b/tests/keras_tests/function_tests/test_hmse_error_method.py index b0243ce1a..9b9e33967 100644 --- a/tests/keras_tests/function_tests/test_hmse_error_method.py +++ b/tests/keras_tests/function_tests/test_hmse_error_method.py @@ -19,7 +19,7 @@ from tensorflow.keras import layers import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit import DefaultDict from model_compression_toolkit.core import QuantizationConfig from model_compression_toolkit.constants import THRESHOLD, RANGE_MAX, NUM_QPARAM_HESSIAN_SAMPLES @@ -87,7 +87,8 @@ def _setup_with_args(self, quant_method, per_channel, running_gptq=True, tpc_fn= representative_dataset, lambda name, _tp: tpc_fn(quant_method, per_channel), qc=self.qc, - running_gptq=running_gptq # to enable HMSE in params calculation if needed + running_gptq=running_gptq + # to enable HMSE in params calculation if needed ) self.his = HessianInfoService(graph=self.graph, fw_impl=self.keras_impl) @@ -124,42 +125,36 @@ def _run_node_verification(node_type): _run_node_verification(layers.Dense) def test_pot_threshold_selection_hmse_per_channel(self): - self._setup_with_args(quant_method=mct.target_platform.QuantizationMethod.POWER_OF_TWO, per_channel=True) calculate_quantization_params(self.graph, fw_impl=self.keras_impl, repr_data_gen_fn=representative_dataset, hessian_info_service=self.his, num_hessian_samples=1) self._verify_params_calculation_execution(THRESHOLD) def test_pot_threshold_selection_hmse_per_tensor(self): - self._setup_with_args(quant_method=mct.target_platform.QuantizationMethod.POWER_OF_TWO, per_channel=False) calculate_quantization_params(self.graph, fw_impl=self.keras_impl, repr_data_gen_fn=representative_dataset, hessian_info_service=self.his, num_hessian_samples=1) self._verify_params_calculation_execution(THRESHOLD) def test_symmetric_threshold_selection_hmse_per_channel(self): - self._setup_with_args(quant_method=mct.target_platform.QuantizationMethod.SYMMETRIC, per_channel=True) calculate_quantization_params(self.graph, fw_impl=self.keras_impl, repr_data_gen_fn=representative_dataset, hessian_info_service=self.his, num_hessian_samples=1) self._verify_params_calculation_execution(THRESHOLD) def test_symmetric_threshold_selection_hmse_per_tensor(self): - self._setup_with_args(quant_method=mct.target_platform.QuantizationMethod.SYMMETRIC, per_channel=False) calculate_quantization_params(self.graph, fw_impl=self.keras_impl, repr_data_gen_fn=representative_dataset, hessian_info_service=self.his, num_hessian_samples=1) self._verify_params_calculation_execution(THRESHOLD) def test_usniform_threshold_selection_hmse_per_channel(self): - self._setup_with_args(quant_method=mct.target_platform.QuantizationMethod.UNIFORM, per_channel=True) calculate_quantization_params(self.graph, fw_impl=self.keras_impl, repr_data_gen_fn=representative_dataset, hessian_info_service=self.his, num_hessian_samples=1) self._verify_params_calculation_execution(RANGE_MAX) def test_uniform_threshold_selection_hmse_per_tensor(self): - self._setup_with_args(quant_method=mct.target_platform.QuantizationMethod.UNIFORM, per_channel=False) calculate_quantization_params(self.graph, fw_impl=self.keras_impl, repr_data_gen_fn=representative_dataset, hessian_info_service=self.his, num_hessian_samples=1) @@ -170,27 +165,28 @@ def test_threshold_selection_hmse_no_gptq(self): self._setup_with_args(quant_method=mct.target_platform.QuantizationMethod.SYMMETRIC, per_channel=True, running_gptq=False) self.assertTrue('The HMSE error method for parameters selection is only supported when running GPTQ ' - 'optimization due to long execution time that is not suitable for basic PTQ.' in e.exception.args[0]) + 'optimization due to long execution time that is not suitable for basic PTQ.' in + e.exception.args[0]) def test_threshold_selection_hmse_no_kernel_attr(self): def _generate_bn_quantization_tpc(quant_method, per_channel): cfg, _, _ = get_op_quantization_configs() - conv_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([cfg], base_config=cfg) + conv_qco = schema.QuantizationConfigOptions([cfg], base_config=cfg) # enable BN attributes quantization using the bn_qco = conv_qco.clone_and_edit(attr_weights_configs_mapping= {GAMMA: AttributeQuantizationConfig(weights_n_bits=8, enable_weights_quantization=True)}) - tp_model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(conv_qco, - tpc_minor_version=None, - tpc_patch_version=None, - tpc_platform_type=None, - add_metadata=False) + tp_model = schema.TargetPlatformModel(conv_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with tp_model: - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Linear", conv_qco) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("BN", bn_qco) + schema.OperatorsSet("Linear", conv_qco) + schema.OperatorsSet("BN", bn_qco) tpc = tp.TargetPlatformCapabilities(tp_model) diff --git a/tests/keras_tests/function_tests/test_layer_fusing.py b/tests/keras_tests/function_tests/test_layer_fusing.py index 2422bae3c..5100dcef0 100644 --- a/tests/keras_tests/function_tests/test_layer_fusing.py +++ b/tests/keras_tests/function_tests/test_layer_fusing.py @@ -2,7 +2,7 @@ import numpy as np import tensorflow as tf -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.core import DEFAULTCONFIG from model_compression_toolkit.core.common.fusion.layer_fusing import fusion from model_compression_toolkit.core.common.quantization.set_node_quantization_config import \ @@ -10,7 +10,8 @@ from model_compression_toolkit.core.common.substitutions.apply_substitutions import substitute from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO from model_compression_toolkit.core.keras.keras_implementation import KerasImplementation -from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs +from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import \ + get_op_quantization_configs import model_compression_toolkit as mct from tests.common_tests.helpers.prep_graph_for_func_test import prepare_graph_with_configs @@ -80,16 +81,16 @@ def create_network_4(input_shape): def generate_base_tpc(): base_config, mixed_precision_cfg_list, default_config = get_op_quantization_configs() - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( + default_configuration_options = schema.QuantizationConfigOptions( [default_config]) - generated_tp = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + generated_tp = schema.TargetPlatformModel( default_configuration_options, tpc_minor_version=None, tpc_patch_version=None, tpc_platform_type=None, add_metadata=False, name='layer_fusing_test') - mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) return generated_tp, mixed_precision_configuration_options @@ -97,10 +98,10 @@ def generate_base_tpc(): def get_tpc_1(): generated_tp, mixed_precision_configuration_options = generate_base_tpc() with generated_tp: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) - any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + any_relu = schema.OperatorsSet("AnyReLU") # Define fusions - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, any_relu]) + schema.Fusing([conv, any_relu]) keras_tpc = tp.TargetPlatformCapabilities(generated_tp) with keras_tpc: @@ -114,14 +115,14 @@ def get_tpc_1(): def get_tpc_2(): generated_tp, mixed_precision_configuration_options = generate_base_tpc() with generated_tp: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) - any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") - swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") - sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") - tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") - activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid, tanh) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + any_relu = schema.OperatorsSet("AnyReLU") + swish = schema.OperatorsSet("Swish") + sigmoid = schema.OperatorsSet("Sigmoid") + tanh = schema.OperatorsSet("Tanh") + activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid, tanh) # Define fusions - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) + schema.Fusing([conv, activations_after_conv_to_fuse]) keras_tpc = tp.TargetPlatformCapabilities(generated_tp) with keras_tpc: @@ -138,10 +139,10 @@ def get_tpc_2(): def get_tpc_3(): generated_tp, mixed_precision_configuration_options = generate_base_tpc() with generated_tp: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) - any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + any_relu = schema.OperatorsSet("AnyReLU") # Define fusions - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, any_relu]) + schema.Fusing([conv, any_relu]) keras_tpc = tp.TargetPlatformCapabilities(generated_tp) with keras_tpc: @@ -155,17 +156,17 @@ def get_tpc_3(): def get_tpc_4(): generated_tp, mixed_precision_configuration_options = generate_base_tpc() with generated_tp: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) - any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") - add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") - swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") - activations_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add") + swish = schema.OperatorsSet("Swish") + activations_to_fuse = schema.OperatorSetConcat(any_relu, swish) # Define fusions - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, add, activations_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_to_fuse, add]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_to_fuse]) + schema.Fusing([conv, activations_to_fuse]) + schema.Fusing([conv, add, activations_to_fuse]) + schema.Fusing([conv, activations_to_fuse, add]) + schema.Fusing([fc, activations_to_fuse]) keras_tpc = tp.TargetPlatformCapabilities(generated_tp) with keras_tpc: @@ -223,7 +224,8 @@ def test_layer_fusing_3(self): self._compare(fusion_graph.fused_nodes, expected_fusions) def test_layer_fusing_4(self): - expected_fusions = [[Conv2D, Activation, Add], [Conv2D, Activation, Add], [Conv2D, Activation], [Conv2D, ReLU, Add], [Dense, tf.nn.silu], [Dense, Activation]] + expected_fusions = [[Conv2D, Activation, Add], [Conv2D, Activation, Add], [Conv2D, Activation], + [Conv2D, ReLU, Add], [Dense, tf.nn.silu], [Dense, Activation]] model = create_network_4(INPUT_SHAPE) fusion_graph = prepare_graph_with_configs(model, KerasImplementation(), DEFAULT_KERAS_INFO, diff --git a/tests/keras_tests/non_parallel_tests/test_keras_tp_model.py b/tests/keras_tests/non_parallel_tests/test_keras_tp_model.py index 5c805fdb4..da48b9187 100644 --- a/tests/keras_tests/non_parallel_tests/test_keras_tp_model.py +++ b/tests/keras_tests/non_parallel_tests/test_keras_tp_model.py @@ -22,7 +22,7 @@ from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2 -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.core.common import BaseNode from tests.common_tests.helpers.generate_test_tp_model import generate_test_op_qc, generate_test_attr_configs @@ -48,9 +48,8 @@ tp = mct.target_platform - TEST_QC = generate_test_op_qc(**generate_test_attr_configs()) -TEST_QCO = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) +TEST_QCO = schema.QuantizationConfigOptions([TEST_QC]) def get_node(layer) -> BaseNode: @@ -105,14 +104,14 @@ def test_keras_layers_with_params(self): self.assertFalse(get_node(conv).is_match_filter_params(conv_filter_contains)) def test_get_layers_by_op(self): - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), tpc_minor_version=None, tpc_patch_version=None, tpc_platform_type=None, add_metadata=False) with hm: - op_obj = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetA') + op_obj = schema.OperatorsSet('opsetA') fw_tp = TargetPlatformCapabilities(hm) with fw_tp: opset_layers = [Conv2D, LayerFilterParams(ReLU, max_value=2)] @@ -122,16 +121,16 @@ def test_get_layers_by_op(self): self.assertEqual(fw_tp.get_layers_by_opset_name('nonExistingOpsetName'), None) def test_get_layers_by_opconcat(self): - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), tpc_minor_version=None, tpc_patch_version=None, tpc_platform_type=None, add_metadata=False) with hm: - op_obj_a = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetA') - op_obj_b = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetB') - op_concat = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(op_obj_a, op_obj_b) + op_obj_a = schema.OperatorsSet('opsetA') + op_obj_b = schema.OperatorsSet('opsetB') + op_concat = schema.OperatorSetConcat(op_obj_a, op_obj_b) fw_tp = TargetPlatformCapabilities(hm) with fw_tp: @@ -144,15 +143,15 @@ def test_get_layers_by_opconcat(self): self.assertEqual(fw_tp.get_layers_by_opset(op_concat), opset_layers_a + opset_layers_b) def test_layer_attached_to_multiple_opsets(self): - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), tpc_minor_version=None, tpc_patch_version=None, tpc_platform_type=None, add_metadata=False) with hm: - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetA') - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetB') + schema.OperatorsSet('opsetA') + schema.OperatorsSet('opsetB') fw_tp = TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: @@ -162,15 +161,15 @@ def test_layer_attached_to_multiple_opsets(self): self.assertEqual('Found layer Conv2D in more than one OperatorsSet', str(e.exception)) def test_filter_layer_attached_to_multiple_opsets(self): - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), tpc_minor_version=None, tpc_patch_version=None, tpc_platform_type=None, add_metadata=False) with hm: - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetA') - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetB') + schema.OperatorsSet('opsetA') + schema.OperatorsSet('opsetB') fw_tp = TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: @@ -180,26 +179,26 @@ def test_filter_layer_attached_to_multiple_opsets(self): self.assertEqual('Found layer Activation(activation=relu) in more than one OperatorsSet', str(e.exception)) def test_qco_by_keras_layer(self): - default_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) + default_qco = schema.QuantizationConfigOptions([TEST_QC]) default_qco = default_qco.clone_and_edit(attr_weights_configs_mapping={}) - tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_qco, - tpc_minor_version=None, - tpc_patch_version=None, - tpc_platform_type=None, - add_metadata=False, - name='test') + tpm = schema.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False, + name='test') with tpm: - mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( + mixed_precision_configuration_options = schema.QuantizationConfigOptions( quantization_config_list=[TEST_QC, TEST_QC.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 4}}), TEST_QC.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 2}})], base_config=TEST_QC) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("conv", mixed_precision_configuration_options) + schema.OperatorsSet("conv", mixed_precision_configuration_options) sevenbit_qco = TEST_QCO.clone_and_edit(activation_n_bits=7, attr_weights_configs_mapping={}) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("tanh", sevenbit_qco) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("relu") + schema.OperatorsSet("tanh", sevenbit_qco) + schema.OperatorsSet("relu") tpc_keras = tp.TargetPlatformCapabilities(tpm) with tpc_keras: @@ -221,17 +220,18 @@ def test_qco_by_keras_layer(self): len(mixed_precision_configuration_options.quantization_config_list)) for i in range(len(conv_qco.quantization_config_list)): self.assertEqual(conv_qco.quantization_config_list[i].attr_weights_configs_mapping[KERAS_KERNEL], - mixed_precision_configuration_options.quantization_config_list[i].attr_weights_configs_mapping[KERNEL_ATTR]) + mixed_precision_configuration_options.quantization_config_list[ + i].attr_weights_configs_mapping[KERNEL_ATTR]) self.assertEqual(tanh_qco, sevenbit_qco) self.assertEqual(relu_qco, default_qco) def test_opset_not_in_tp(self): - default_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_qco, - tpc_minor_version=None, - tpc_patch_version=None, - tpc_platform_type=None, - add_metadata=False) + default_qco = schema.QuantizationConfigOptions([TEST_QC]) + hm = schema.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) hm_keras = tp.TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: with hm_keras: @@ -241,18 +241,18 @@ def test_opset_not_in_tp(self): str(e.exception)) def test_keras_fusing_patterns(self): - default_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_qco, - tpc_minor_version=None, - tpc_patch_version=None, - tpc_platform_type=None, - add_metadata=False) + default_qco = schema.QuantizationConfigOptions([TEST_QC]) + hm = schema.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with hm: - a = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opA") - b = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opB") - c = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opC") - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([a, b, c]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([a, c]) + a = schema.OperatorsSet("opA") + b = schema.OperatorsSet("opB") + c = schema.OperatorsSet("opC") + schema.Fusing([a, b, c]) + schema.Fusing([a, c]) hm_keras = tp.TargetPlatformCapabilities(hm) with hm_keras: @@ -274,14 +274,14 @@ def test_keras_fusing_patterns(self): self.assertEqual(p1[1], LayerFilterParams(ReLU, Greater("max_value", 7), negative_slope=0)) def test_get_default_op_qc(self): - default_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) - tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_qco, - tpc_minor_version=None, - tpc_patch_version=None, - tpc_platform_type=None, - add_metadata=False) + default_qco = schema.QuantizationConfigOptions([TEST_QC]) + tpm = schema.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with tpm: - a = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opA") + a = schema.OperatorsSet("opA") tpc = tp.TargetPlatformCapabilities(tpm) with tpc: @@ -307,12 +307,14 @@ def rep_data(): quantized_model, _ = mct.ptq.keras_post_training_quantization(model, rep_data, target_platform_capabilities=tpc) - core_config = mct.core.CoreConfig(mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=2, - use_hessian_based_scores=False)) + core_config = mct.core.CoreConfig( + mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=2, + use_hessian_based_scores=False)) quantized_model, _ = mct.ptq.keras_post_training_quantization(model, rep_data, core_config=core_config, - target_resource_utilization=mct.core.ResourceUtilization(np.inf), + target_resource_utilization=mct.core.ResourceUtilization( + np.inf), target_platform_capabilities=tpc) def test_get_keras_supported_version(self): diff --git a/tests/pytorch_tests/function_tests/layer_fusing_test.py b/tests/pytorch_tests/function_tests/layer_fusing_test.py index e66a9ca93..ad23bc623 100644 --- a/tests/pytorch_tests/function_tests/layer_fusing_test.py +++ b/tests/pytorch_tests/function_tests/layer_fusing_test.py @@ -17,15 +17,17 @@ from torch.nn import Conv2d, ReLU, SiLU, Sigmoid, Linear, Hardtanh from torch.nn.functional import relu, relu6 -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.target_platform_capabilities.target_platform import LayerFilterParams from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO from model_compression_toolkit.core.pytorch.pytorch_implementation import PytorchImplementation -from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs +from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import \ + get_op_quantization_configs from tests.common_tests.helpers.prep_graph_for_func_test import prepare_graph_with_configs from tests.pytorch_tests.model_tests.base_pytorch_test import BasePytorchTest import model_compression_toolkit as mct + tp = mct.target_platform @@ -48,20 +50,22 @@ def get_type(self, fusion): def get_tpc(self): base_config, mixed_precision_cfg_list, default_config = get_op_quantization_configs() - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) - generated_tp = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_configuration_options, - tpc_minor_version=None, - tpc_patch_version=None, - tpc_platform_type=None, - name='layer_fusing_test') - mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + default_configuration_options = schema.QuantizationConfigOptions([default_config]) + generated_tp = schema.TargetPlatformModel(default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + name='layer_fusing_test') + mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) return generated_tp, mixed_precision_configuration_options def _compare(self, fused_nodes): - self.unit_test.assertTrue(len(fused_nodes) == len(self.expected_fusions), msg=f'Number of fusions is not as expected!') + self.unit_test.assertTrue(len(fused_nodes) == len(self.expected_fusions), + msg=f'Number of fusions is not as expected!') for i, fusion in enumerate(fused_nodes): - self.unit_test.assertTrue(self.get_type(fusion) == self.expected_fusions[i], msg=f'Miss-match fusion compared to expected!') + self.unit_test.assertTrue(self.get_type(fusion) == self.expected_fusions[i], + msg=f'Miss-match fusion compared to expected!') class LayerFusingTest1(BaseLayerFusingTest): @@ -72,10 +76,10 @@ def __init__(self, unit_test): def get_tpc(self): generated_tp, mixed_precision_configuration_options = super().get_tpc() with generated_tp: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) - any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + any_relu = schema.OperatorsSet("AnyReLU") # Define fusions - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, any_relu]) + schema.Fusing([conv, any_relu]) pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp) with pytorch_tpc: @@ -114,15 +118,16 @@ def __init__(self, unit_test): def get_tpc(self): generated_tp, mixed_precision_configuration_options = super().get_tpc() with generated_tp: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) - any_act = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyAct") + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + any_act = schema.OperatorsSet("AnyAct") # Define fusions - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, any_act]) + schema.Fusing([conv, any_act]) pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp) with pytorch_tpc: tp.OperationsSetToLayers("Conv", [Conv2d]) - tp.OperationsSetToLayers("AnyAct", [ReLU,relu6,relu,SiLU,Sigmoid, LayerFilterParams(Hardtanh, min_val=0)]) + tp.OperationsSetToLayers("AnyAct", + [ReLU, relu6, relu, SiLU, Sigmoid, LayerFilterParams(Hardtanh, min_val=0)]) return pytorch_tpc def run_test(self, seed=0): @@ -135,11 +140,11 @@ def run_test(self, seed=0): class LayerFusingNetTest(nn.Module): def __init__(self): super().__init__() - self.conv1 = nn.Conv2d(3, 32, kernel_size=(3,3)) - self.conv2 = nn.Conv2d(32, 32, kernel_size=(1,1)) - self.conv3 = nn.Conv2d(32, 32, kernel_size=(3,3)) - self.conv4 = nn.Conv2d(32, 64, kernel_size=(1,1)) - self.conv5 = nn.Conv2d(64, 64, kernel_size=(2,2)) + self.conv1 = nn.Conv2d(3, 32, kernel_size=(3, 3)) + self.conv2 = nn.Conv2d(32, 32, kernel_size=(1, 1)) + self.conv3 = nn.Conv2d(32, 32, kernel_size=(3, 3)) + self.conv4 = nn.Conv2d(32, 64, kernel_size=(1, 1)) + self.conv5 = nn.Conv2d(64, 64, kernel_size=(2, 2)) self.relu = nn.ReLU() self.tanh = Hardtanh(min_val=0) self.swish = nn.SiLU() @@ -166,15 +171,15 @@ def __init__(self, unit_test): def get_tpc(self): generated_tp, mixed_precision_configuration_options = super().get_tpc() with generated_tp: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) - any_act = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyAct") + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + any_act = schema.OperatorsSet("AnyAct") # Define fusions - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, any_act]) + schema.Fusing([conv, any_act]) pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp) with pytorch_tpc: tp.OperationsSetToLayers("Conv", [Conv2d]) - tp.OperationsSetToLayers("AnyAct", [ReLU,relu6,relu]) + tp.OperationsSetToLayers("AnyAct", [ReLU, relu6, relu]) return pytorch_tpc def run_test(self, seed=0): @@ -187,11 +192,11 @@ def run_test(self, seed=0): class LayerFusingNetTest(nn.Module): def __init__(self): super().__init__() - self.conv1 = nn.Conv2d(3, 32, kernel_size=(3,3)) - self.conv2 = nn.Conv2d(32, 32, kernel_size=(1,1)) - self.conv3 = nn.Conv2d(32, 32, kernel_size=(3,3)) - self.conv4 = nn.Conv2d(32, 64, kernel_size=(1,1)) - self.conv5 = nn.Conv2d(64, 64, kernel_size=(2,2)) + self.conv1 = nn.Conv2d(3, 32, kernel_size=(3, 3)) + self.conv2 = nn.Conv2d(32, 32, kernel_size=(1, 1)) + self.conv3 = nn.Conv2d(32, 32, kernel_size=(3, 3)) + self.conv4 = nn.Conv2d(32, 64, kernel_size=(1, 1)) + self.conv5 = nn.Conv2d(64, 64, kernel_size=(2, 2)) self.relu = nn.ReLU() self.tanh = nn.Tanh() self.swish = nn.SiLU() @@ -213,22 +218,23 @@ def forward(self, x): class LayerFusingTest4(BaseLayerFusingTest): def __init__(self, unit_test): super().__init__(unit_test) - self.expected_fusions = [[Conv2d, SiLU, torch.add], [Conv2d, SiLU, torch.add], [Conv2d, ReLU], [Conv2d, ReLU, torch.add], [Linear, SiLU], [Linear, SiLU]] + self.expected_fusions = [[Conv2d, SiLU, torch.add], [Conv2d, SiLU, torch.add], [Conv2d, ReLU], + [Conv2d, ReLU, torch.add], [Linear, SiLU], [Linear, SiLU]] def get_tpc(self): generated_tp, mixed_precision_configuration_options = super().get_tpc() with generated_tp: - conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) - any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") - add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") - swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") - activations_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish) + conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + any_relu = schema.OperatorsSet("AnyReLU") + add = schema.OperatorsSet("Add") + swish = schema.OperatorsSet("Swish") + activations_to_fuse = schema.OperatorSetConcat(any_relu, swish) # Define fusions - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, add, activations_to_fuse]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_to_fuse, add]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_to_fuse]) + schema.Fusing([conv, activations_to_fuse]) + schema.Fusing([conv, add, activations_to_fuse]) + schema.Fusing([conv, activations_to_fuse, add]) + schema.Fusing([fc, activations_to_fuse]) pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp) with pytorch_tpc: @@ -249,12 +255,12 @@ def run_test(self, seed=0): class LayerFusingNetTest(nn.Module): def __init__(self): super().__init__() - self.conv1 = nn.Conv2d(3, 3, kernel_size=(3,3), padding='same') - self.conv2 = nn.Conv2d(3, 3, kernel_size=(1,1), padding='same') - self.conv3 = nn.Conv2d(3, 3, kernel_size=(3,3), padding='same') - self.conv4 = nn.Conv2d(3, 3, kernel_size=(1,1), padding='same') - self.conv5 = nn.Conv2d(3, 3, kernel_size=(3,3), padding='same') - self.conv6 = nn.Conv2d(3, 3, kernel_size=(1,1), padding='same') + self.conv1 = nn.Conv2d(3, 3, kernel_size=(3, 3), padding='same') + self.conv2 = nn.Conv2d(3, 3, kernel_size=(1, 1), padding='same') + self.conv3 = nn.Conv2d(3, 3, kernel_size=(3, 3), padding='same') + self.conv4 = nn.Conv2d(3, 3, kernel_size=(1, 1), padding='same') + self.conv5 = nn.Conv2d(3, 3, kernel_size=(3, 3), padding='same') + self.conv6 = nn.Conv2d(3, 3, kernel_size=(1, 1), padding='same') self.relu = nn.ReLU() self.swish = nn.SiLU() self.flatten = nn.Flatten() diff --git a/tests/pytorch_tests/function_tests/test_pytorch_tp_model.py b/tests/pytorch_tests/function_tests/test_pytorch_tp_model.py index 286cbf7fa..d3ce92a8a 100644 --- a/tests/pytorch_tests/function_tests/test_pytorch_tp_model.py +++ b/tests/pytorch_tests/function_tests/test_pytorch_tp_model.py @@ -23,14 +23,16 @@ from torchvision.models import mobilenet_v2 import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.core import MixedPrecisionQuantizationConfig from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.constants import PYTORCH from model_compression_toolkit.core.common import BaseNode from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities -from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import LayerFilterParams -from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attribute_filter import Greater, Smaller, Eq +from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import \ + LayerFilterParams +from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attribute_filter import \ + Greater, Smaller, Eq from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL, IMX500_TP_MODEL, \ TFLITE_TP_MODEL, QNNPACK_TP_MODEL, KERNEL_ATTR, WEIGHTS_N_BITS, PYTORCH_KERNEL, BIAS_ATTR, BIAS from model_compression_toolkit.core.pytorch.pytorch_implementation import PytorchImplementation @@ -39,9 +41,8 @@ tp = mct.target_platform - TEST_QC = generate_test_op_qc(**generate_test_attr_configs()) -TEST_QCO = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) +TEST_QCO = schema.QuantizationConfigOptions([TEST_QC]) class TestPytorchTPModel(unittest.TestCase): @@ -67,45 +68,48 @@ def test_pytorch_layers_with_params(self): hardtanh_with_params = LayerFilterParams(hardtanh, Greater("max_val", 2) & Smaller("min_val", 1)) self.assertTrue(get_node(partial(hardtanh, max_val=3, min_val=0)).is_match_filter_params(hardtanh_with_params)) self.assertFalse(get_node(partial(hardtanh, max_val=3, min_val=1)).is_match_filter_params(hardtanh_with_params)) - self.assertFalse(get_node(partial(hardtanh, max_val=2, min_val=0.5)).is_match_filter_params(hardtanh_with_params)) + self.assertFalse( + get_node(partial(hardtanh, max_val=2, min_val=0.5)).is_match_filter_params(hardtanh_with_params)) self.assertFalse(get_node(partial(hardtanh, max_val=2)).is_match_filter_params(hardtanh_with_params)) - self.assertFalse(get_node(partial(hardtanh, max_val=1, min_val=0.5)).is_match_filter_params(hardtanh_with_params)) - - l2norm_tflite_opset = LayerFilterParams(torch.nn.functional.normalize, Eq('p',2) | Eq('p',None)) - self.assertTrue(get_node(partial(torch.nn.functional.normalize, p=2)).is_match_filter_params(l2norm_tflite_opset)) - self.assertTrue(get_node(partial(torch.nn.functional.normalize, p=2.0)).is_match_filter_params(l2norm_tflite_opset)) + self.assertFalse( + get_node(partial(hardtanh, max_val=1, min_val=0.5)).is_match_filter_params(hardtanh_with_params)) + + l2norm_tflite_opset = LayerFilterParams(torch.nn.functional.normalize, Eq('p', 2) | Eq('p', None)) + self.assertTrue( + get_node(partial(torch.nn.functional.normalize, p=2)).is_match_filter_params(l2norm_tflite_opset)) + self.assertTrue( + get_node(partial(torch.nn.functional.normalize, p=2.0)).is_match_filter_params(l2norm_tflite_opset)) self.assertTrue(get_node(torch.nn.functional.normalize).is_match_filter_params(l2norm_tflite_opset)) - self.assertFalse(get_node(partial(torch.nn.functional.normalize, p=3.0)).is_match_filter_params(l2norm_tflite_opset)) - - + self.assertFalse( + get_node(partial(torch.nn.functional.normalize, p=3.0)).is_match_filter_params(l2norm_tflite_opset)) def test_qco_by_pytorch_layer(self): - default_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) + default_qco = schema.QuantizationConfigOptions([TEST_QC]) default_qco = default_qco.clone_and_edit(attr_weights_configs_mapping={}) - tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_qco, - tpc_minor_version=None, - tpc_patch_version=None, - tpc_platform_type=None, - add_metadata=False, - name='test') + tpm = schema.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False, + name='test') with tpm: - mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( + mixed_precision_configuration_options = schema.QuantizationConfigOptions( [TEST_QC, TEST_QC.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 4}}), TEST_QC.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 2}})], base_config=TEST_QC) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("conv", mixed_precision_configuration_options) + schema.OperatorsSet("conv", mixed_precision_configuration_options) sevenbit_qco = TEST_QCO.clone_and_edit(activation_n_bits=7, attr_weights_configs_mapping={}) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("tanh", sevenbit_qco) + schema.OperatorsSet("tanh", sevenbit_qco) sixbit_qco = TEST_QCO.clone_and_edit(activation_n_bits=6, attr_weights_configs_mapping={}) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("avg_pool2d_kernel_2", sixbit_qco) + schema.OperatorsSet("avg_pool2d_kernel_2", sixbit_qco) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("avg_pool2d") + schema.OperatorsSet("avg_pool2d") tpc_pytorch = tp.TargetPlatformCapabilities(tpm) with tpc_pytorch: @@ -139,14 +143,14 @@ def test_qco_by_pytorch_layer(self): self.assertEqual(avg_pool2d_qco, default_qco) def test_get_layers_by_op(self): - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), tpc_minor_version=None, tpc_patch_version=None, tpc_platform_type=None, add_metadata=False) with hm: - op_obj = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetA') + op_obj = schema.OperatorsSet('opsetA') fw_tp = TargetPlatformCapabilities(hm) with fw_tp: opset_layers = [torch.nn.Conv2d, LayerFilterParams(torch.nn.Softmax, dim=1)] @@ -155,16 +159,16 @@ def test_get_layers_by_op(self): self.assertEqual(fw_tp.get_layers_by_opset(op_obj), opset_layers) def test_get_layers_by_opconcat(self): - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), tpc_minor_version=None, tpc_patch_version=None, tpc_platform_type=None, add_metadata=False) with hm: - op_obj_a = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetA') - op_obj_b = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetB') - op_concat = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(op_obj_a, op_obj_b) + op_obj_a = schema.OperatorsSet('opsetA') + op_obj_b = schema.OperatorsSet('opsetB') + op_concat = schema.OperatorSetConcat(op_obj_a, op_obj_b) fw_tp = TargetPlatformCapabilities(hm) with fw_tp: @@ -177,15 +181,15 @@ def test_get_layers_by_opconcat(self): self.assertEqual(fw_tp.get_layers_by_opset(op_concat), opset_layers_a + opset_layers_b) def test_layer_attached_to_multiple_opsets(self): - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), tpc_minor_version=None, tpc_patch_version=None, tpc_platform_type=None, add_metadata=False) with hm: - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetA') - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetB') + schema.OperatorsSet('opsetA') + schema.OperatorsSet('opsetB') fw_tp = TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: @@ -195,15 +199,15 @@ def test_layer_attached_to_multiple_opsets(self): self.assertEqual('Found layer Conv2d in more than one OperatorsSet', str(e.exception)) def test_filter_layer_attached_to_multiple_opsets(self): - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( - model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + hm = schema.TargetPlatformModel( + schema.QuantizationConfigOptions([TEST_QC]), tpc_minor_version=None, tpc_patch_version=None, tpc_platform_type=None, add_metadata=False) with hm: - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetA') - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetB') + schema.OperatorsSet('opsetA') + schema.OperatorsSet('opsetB') fw_tp = TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: @@ -213,12 +217,12 @@ def test_filter_layer_attached_to_multiple_opsets(self): self.assertEqual('Found layer Softmax(dim=2) in more than one OperatorsSet', str(e.exception)) def test_opset_not_in_tp(self): - default_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_qco, - tpc_minor_version=None, - tpc_patch_version=None, - tpc_platform_type=None, - add_metadata=False) + default_qco = schema.QuantizationConfigOptions([TEST_QC]) + hm = schema.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) hm_pytorch = tp.TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: with hm_pytorch: @@ -228,19 +232,19 @@ def test_opset_not_in_tp(self): str(e.exception)) def test_pytorch_fusing_patterns(self): - default_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( + default_qco = schema.QuantizationConfigOptions( [TEST_QC]) - hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_qco, - tpc_minor_version=None, - tpc_patch_version=None, - tpc_platform_type=None, - add_metadata=False) + hm = schema.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + tpc_platform_type=None, + add_metadata=False) with hm: - a = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opA") - b = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opB") - c = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opC") - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([a, b, c]) - model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([a, c]) + a = schema.OperatorsSet("opA") + b = schema.OperatorsSet("opB") + c = schema.OperatorsSet("opC") + schema.Fusing([a, b, c]) + schema.Fusing([a, c]) hm_keras = tp.TargetPlatformCapabilities(hm) with hm_keras: @@ -281,7 +285,8 @@ def rep_data(): mixed_precision_config=mp_qc) quantized_model, _ = mct.ptq.pytorch_post_training_quantization(model, rep_data, - target_resource_utilization=mct.core.ResourceUtilization(np.inf), + target_resource_utilization=mct.core.ResourceUtilization( + np.inf), target_platform_capabilities=tpc, core_config=core_config) diff --git a/tests/pytorch_tests/model_tests/feature_models/bn_attributes_quantization_test.py b/tests/pytorch_tests/model_tests/feature_models/bn_attributes_quantization_test.py index 3b5aa9e7a..1515b69b9 100644 --- a/tests/pytorch_tests/model_tests/feature_models/bn_attributes_quantization_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/bn_attributes_quantization_test.py @@ -16,11 +16,12 @@ from torch import nn import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from mct_quantizers import QuantizationMethod, PytorchQuantizationWrapper from model_compression_toolkit import DefaultDict from model_compression_toolkit.core.pytorch.constants import GAMMA, BETA -from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS, BIAS_ATTR +from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS, \ + BIAS_ATTR from tests.common_tests.helpers.generate_test_tp_model import generate_test_attr_configs, \ DEFAULT_WEIGHT_ATTR_CONFIG, KERNEL_BASE_CONFIG, generate_test_op_qc, BIAS_CONFIG from model_compression_toolkit.target_platform_capabilities.schema.v1 import Signedness @@ -51,35 +52,35 @@ def _generate_bn_quantized_tpm(quantize_linear): bias_config=attr_cfgs_dict[BIAS_CONFIG], enable_activation_quantization=False) - bn_op_qc = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig(enable_activation_quantization=False, - default_weight_attr_config=default_attr_cfg, - attr_weights_configs_mapping={BETA: bn_attr_cfg, GAMMA: bn_attr_cfg}, - activation_n_bits=8, - supported_input_activation_n_bits=8, - activation_quantization_method=QuantizationMethod.POWER_OF_TWO, - quantization_preserving=False, - fixed_scale=None, - fixed_zero_point=None, - simd_size=32, - signedness=Signedness.AUTO) - - default_op_qc = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig(enable_activation_quantization=False, - default_weight_attr_config=default_attr_cfg, - attr_weights_configs_mapping={}, - activation_n_bits=8, - supported_input_activation_n_bits=8, - activation_quantization_method=QuantizationMethod.POWER_OF_TWO, - quantization_preserving=False, - fixed_scale=None, - fixed_zero_point=None, - simd_size=32, - signedness=Signedness.AUTO) - - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_op_qc]) - linear_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([linear_op_qc]) - bn_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([bn_op_qc]) - - generated_tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + bn_op_qc = schema.OpQuantizationConfig(enable_activation_quantization=False, + default_weight_attr_config=default_attr_cfg, + attr_weights_configs_mapping={BETA: bn_attr_cfg, GAMMA: bn_attr_cfg}, + activation_n_bits=8, + supported_input_activation_n_bits=8, + activation_quantization_method=QuantizationMethod.POWER_OF_TWO, + quantization_preserving=False, + fixed_scale=None, + fixed_zero_point=None, + simd_size=32, + signedness=Signedness.AUTO) + + default_op_qc = schema.OpQuantizationConfig(enable_activation_quantization=False, + default_weight_attr_config=default_attr_cfg, + attr_weights_configs_mapping={}, + activation_n_bits=8, + supported_input_activation_n_bits=8, + activation_quantization_method=QuantizationMethod.POWER_OF_TWO, + quantization_preserving=False, + fixed_scale=None, + fixed_zero_point=None, + simd_size=32, + signedness=Signedness.AUTO) + + default_configuration_options = schema.QuantizationConfigOptions([default_op_qc]) + linear_configuration_options = schema.QuantizationConfigOptions([linear_op_qc]) + bn_configuration_options = schema.QuantizationConfigOptions([bn_op_qc]) + + generated_tpm = schema.TargetPlatformModel( default_configuration_options, tpc_minor_version=None, tpc_patch_version=None, @@ -87,9 +88,8 @@ def _generate_bn_quantized_tpm(quantize_linear): add_metadata=False, name='bn_quantized_tpm') with generated_tpm: - - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", linear_configuration_options) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("BN", bn_configuration_options) + schema.OperatorsSet("Conv", linear_configuration_options) + schema.OperatorsSet("BN", bn_configuration_options) return generated_tpm diff --git a/tests/pytorch_tests/model_tests/feature_models/const_quantization_test.py b/tests/pytorch_tests/model_tests/feature_models/const_quantization_test.py index 69aa012db..4231169ac 100644 --- a/tests/pytorch_tests/model_tests/feature_models/const_quantization_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/const_quantization_test.py @@ -17,7 +17,7 @@ import torch.nn as nn import numpy as np import model_compression_toolkit as mct -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.target_platform_capabilities.schema.v1 import Signedness from model_compression_toolkit.core import MixedPrecisionQuantizationConfig from model_compression_toolkit.core.pytorch.utils import to_torch_tensor, torch_tensor_to_numpy, set_model @@ -61,7 +61,7 @@ def __init__(self, unit_test, func, const, input_reverse_order=False): self.input_reverse_order = input_reverse_order def generate_inputs(self): - return [np.random.random(in_shape)+1 for in_shape in self.get_input_shapes()] + return [np.random.random(in_shape) + 1 for in_shape in self.get_input_shapes()] def get_tpc(self): return mct.get_target_platform_capabilities(PYTORCH, IMX500_TP_MODEL, "v3") @@ -118,7 +118,7 @@ def get_resource_utilization(self): return mct.core.ResourceUtilization(9e3) def generate_inputs(self): - return [np.random.random(in_shape)+1 for in_shape in self.get_input_shapes()] + return [np.random.random(in_shape) + 1 for in_shape in self.get_input_shapes()] def get_tpc(self): return mct.get_target_platform_capabilities(PYTORCH, IMX500_TP_MODEL, "v3") @@ -155,7 +155,7 @@ def __init__(self): self.register_buffer('concatenate_const_3', to_torch_tensor(np.random.randint(-128, 127, size=(1, 3, 36, 36)))) self.register_buffer('stack_const_1', to_torch_tensor(np.random.randint(-128, 127, size=(1, 39, 36, 36)))) self.register_buffer('stack_const_2', to_torch_tensor(np.random.randint(-128, 127, size=(1, 39, 36, 36)))) - self.register_buffer('gather_const', to_torch_tensor(np.random.randint(-128, 127, size=(1, 2*36*36)))) + self.register_buffer('gather_const', to_torch_tensor(np.random.randint(-128, 127, size=(1, 2 * 36 * 36)))) def forward(self, x): x = torch.cat([self.cat_const_1, x, self.cat_const_2], dim=2) @@ -164,9 +164,9 @@ def forward(self, x): self.concatenate_const_2, x, self.concatenate_const_3, self.concatenate_const_1], dim=1) x = torch.stack([self.stack_const_1, x, self.stack_const_2], dim=1) - x = torch.reshape(x, (1, 3*39, 36, 36)) + x = torch.reshape(x, (1, 3 * 39, 36, 36)) - inds = torch.argmax(torch.reshape(x, (-1, 117, 36*36)), dim=2) + inds = torch.argmax(torch.reshape(x, (-1, 117, 36 * 36)), dim=2) b = torch.reshape(torch.gather(self.gather_const, 1, inds), (-1, 117, 1, 1)) return x + b @@ -204,9 +204,11 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= class ExpandConstQuantizationNet(nn.Module): def __init__(self, batch_size): super().__init__() - self.register_buffer('cat_const', to_torch_tensor(np.random.randint(-128, 127, size=(batch_size, 3, 32, 32)).astype(np.float32))) + self.register_buffer('cat_const', to_torch_tensor( + np.random.randint(-128, 127, size=(batch_size, 3, 32, 32)).astype(np.float32))) self.register_parameter('expand_const', - nn.Parameter(to_torch_tensor(np.random.randint(-128, 127, size=(1, 2, 32, 1)).astype(np.float32)), + nn.Parameter(to_torch_tensor( + np.random.randint(-128, 127, size=(1, 2, 32, 1)).astype(np.float32)), requires_grad=False)) def forward(self, x): @@ -226,34 +228,35 @@ def generate_inputs(self): def get_tpc(self): tp = mct.target_platform attr_cfg = generate_test_attr_configs() - base_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig(activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, - enable_activation_quantization=True, - activation_n_bits=32, - supported_input_activation_n_bits=32, - default_weight_attr_config=attr_cfg[DEFAULT_WEIGHT_ATTR_CONFIG], - attr_weights_configs_mapping={}, - quantization_preserving=False, - fixed_scale=1.0, - fixed_zero_point=0, - simd_size=32, - signedness=Signedness.AUTO) - - default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([base_cfg]) + base_cfg = schema.OpQuantizationConfig(activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, + enable_activation_quantization=True, + activation_n_bits=32, + supported_input_activation_n_bits=32, + default_weight_attr_config=attr_cfg[DEFAULT_WEIGHT_ATTR_CONFIG], + attr_weights_configs_mapping={}, + quantization_preserving=False, + fixed_scale=1.0, + fixed_zero_point=0, + simd_size=32, + signedness=Signedness.AUTO) + + default_configuration_options = schema.QuantizationConfigOptions([base_cfg]) const_config = base_cfg.clone_and_edit(enable_activation_quantization=False, default_weight_attr_config=base_cfg.default_weight_attr_config.clone_and_edit( - enable_weights_quantization=True, weights_per_channel_threshold=False, + enable_weights_quantization=True, + weights_per_channel_threshold=False, weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO)) - const_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_config]) + const_configuration_options = schema.QuantizationConfigOptions([const_config]) - tp_model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + tp_model = schema.TargetPlatformModel( default_configuration_options, tpc_minor_version=None, tpc_patch_version=None, tpc_platform_type=None, add_metadata=False) with tp_model: - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("WeightQuant", const_configuration_options) + schema.OperatorsSet("WeightQuant", const_configuration_options) tpc = tp.TargetPlatformCapabilities(tp_model) with tpc: diff --git a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py index 68e0c09ee..4e9d72f55 100644 --- a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py @@ -16,17 +16,19 @@ import numpy as np from torch.nn import Conv2d -import model_compression_toolkit.target_platform_capabilities.schema.v1 +import model_compression_toolkit.target_platform_capabilities.schema.v1 as schema from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.core import ResourceUtilization from model_compression_toolkit.core.common.mixed_precision.distance_weighting import MpDistanceWeighting from model_compression_toolkit.core.common.user_info import UserInformation from model_compression_toolkit.core.pytorch.constants import BIAS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR -from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities, OperationsSetToLayers +from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities, \ + OperationsSetToLayers from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, OperatorsSet, \ QuantizationConfigOptions -from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_tp_model, get_op_quantization_configs +from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_tp_model, \ + get_op_quantization_configs from tests.common_tests.helpers.generate_test_tp_model import generate_mixed_precision_test_tp_model from tests.pytorch_tests.tpc_pytorch import get_pytorch_test_tpc_dict from tests.pytorch_tests.model_tests.base_pytorch_test import BasePytorchTest @@ -139,25 +141,25 @@ def get_tpc(self): two_bit_cfg = mixed_precision_cfg_list[2] - weight_mixed_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( + weight_mixed_cfg = schema.QuantizationConfigOptions( mixed_precision_cfg_list, base_config=cfg, ) - weight_fixed_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( + weight_fixed_cfg = schema.QuantizationConfigOptions( [two_bit_cfg], base_config=two_bit_cfg, ) - tp_model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + tp_model = schema.TargetPlatformModel( weight_fixed_cfg, tpc_minor_version=None, tpc_patch_version=None, tpc_platform_type=None, name="mp_part_weights_layers_test") with tp_model: - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Weights_mp", weight_mixed_cfg) - model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Weights_fixed", weight_fixed_cfg) + schema.OperatorsSet("Weights_mp", weight_mixed_cfg) + schema.OperatorsSet("Weights_fixed", weight_fixed_cfg) pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) @@ -210,6 +212,7 @@ def compare(self, quantized_models, float_model, input_x=None, quantization_info self.unit_test.assertTrue( np.unique(q_weights[i, :]).flatten().shape[0] <= 4) + class MixedPrecisionSearch2Bit(MixedPrecisionBaseTest): def __init__(self, unit_test): super().__init__(unit_test) @@ -363,4 +366,4 @@ def forward(self, inp): x = self.conv1(inp) x = torch.add(x, x) output = self.relu(x) - return output \ No newline at end of file + return output