diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 56d5094b6b..1e26efb8f8 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -561,17 +561,11 @@ def __enter__(self): # fill causal mask in slightly different way for avoid overflow on some platforms patch_update_causal_mask(self._model, "4.39.0") - if is_transformers_version(">=", "4.39.0"): - register_sin_cos_buffer(self._model) - def __exit__(self, exc_type, exc_value, traceback): super().__exit__(exc_type, exc_value, traceback) if hasattr(self._model.model, "_orig_update_causal_mask"): self._model.model._update_causal_mask = self._model.model._orig_update_causal_mask - for layer in self._model.model.layers: - layer.self_attn.rotary_emb.forward = layer.self_attn.rotary_emb._orig_forward - # copied from https://github.com/huggingface/transformers/commit/57d7594a79a9f5d835abf2d4d384db0e4818e548 to unblock export with transformers 4.42 def _mistral_update_causal_mask( @@ -692,10 +686,6 @@ def __enter__(self): self._model.model._orig_update_causal_mask = self._model.model._update_causal_mask self._model.model._update_causal_mask = types.MethodType(_mistral_update_causal_mask, self._model.model) - # mistral has some accuracy issues with bf16 with transformers >= 4.42 - # prefill rotary emb sin/cos for avoid this issue - register_sin_cos_buffer(self._model) - def __exit__(self, exc_type, exc_value, traceback): super().__exit__(exc_type, exc_value, traceback) diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index 07e2672e68..3c1530d6af 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -41,7 +41,7 @@ ) from optimum.intel.openvino.configuration import _DEFAULT_4BIT_CONFIGS from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS -from optimum.intel.utils.import_utils import is_openvino_tokenizers_available, is_transformers_version +from optimum.intel.utils.import_utils import is_openvino_tokenizers_available class OVCLIExportTestCase(unittest.TestCase): @@ -95,21 +95,21 @@ class OVCLIExportTestCase(unittest.TestCase): "llama_awq", "int4 --ratio 1.0 --sym --group-size 8 --all-layers", 0, - 32 if is_transformers_version("<", "4.39.0") else 34, + 32, ), ( "text-generation-with-past", "llama_awq", "int4 --ratio 1.0 --sym --group-size 16 --awq --dataset wikitext2 --num-samples 100 " "--sensitivity-metric max_activation_variance", - 6 if is_transformers_version(">=", "4.39") else 4, + 4, 28, ), ( "text-generation-with-past", "llama_awq", "int4 --ratio 1.0 --sym --group-size 16 --scale-estimation --dataset wikitext2 --num-samples 100 ", - 6 if is_transformers_version(">=", "4.39") else 4, + 4, 28, ), ] diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index e36ea47df2..9e1ff3b9d2 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -236,7 +236,7 @@ class OVWeightCompressionTest(unittest.TestCase): quant_method=QuantizationMethod.AWQ, scale_estimation=True, ), - 18 if is_transformers_version(">=", "4.39") else 16, + 16, ), ( OVModelForCausalLM, @@ -250,7 +250,7 @@ class OVWeightCompressionTest(unittest.TestCase): dataset="c4", quant_method="awq", ), - 18 if is_transformers_version(">=", "4.39") else 16, + 16, ), )