Skip to content

Commit

Permalink
Revert rotary embedding patching for recovering gpu accuracy (#855)
Browse files Browse the repository at this point in the history
* revert rotary embedding patching for recovering gpu accuracy

* revert tests

* Update test_exporters_cli.py
  • Loading branch information
eaidova authored and IlyasMoutawwakil committed Aug 6, 2024
1 parent bb38cce commit 7c8650d
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 16 deletions.
10 changes: 0 additions & 10 deletions optimum/exporters/openvino/model_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,17 +561,11 @@ def __enter__(self):
# fill causal mask in slightly different way for avoid overflow on some platforms
patch_update_causal_mask(self._model, "4.39.0")

if is_transformers_version(">=", "4.39.0"):
register_sin_cos_buffer(self._model)

def __exit__(self, exc_type, exc_value, traceback):
super().__exit__(exc_type, exc_value, traceback)
if hasattr(self._model.model, "_orig_update_causal_mask"):
self._model.model._update_causal_mask = self._model.model._orig_update_causal_mask

for layer in self._model.model.layers:
layer.self_attn.rotary_emb.forward = layer.self_attn.rotary_emb._orig_forward


# copied from https://github.com/huggingface/transformers/commit/57d7594a79a9f5d835abf2d4d384db0e4818e548 to unblock export with transformers 4.42
def _mistral_update_causal_mask(
Expand Down Expand Up @@ -692,10 +686,6 @@ def __enter__(self):
self._model.model._orig_update_causal_mask = self._model.model._update_causal_mask
self._model.model._update_causal_mask = types.MethodType(_mistral_update_causal_mask, self._model.model)

# mistral has some accuracy issues with bf16 with transformers >= 4.42
# prefill rotary emb sin/cos for avoid this issue
register_sin_cos_buffer(self._model)

def __exit__(self, exc_type, exc_value, traceback):
super().__exit__(exc_type, exc_value, traceback)

Expand Down
8 changes: 4 additions & 4 deletions tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
)
from optimum.intel.openvino.configuration import _DEFAULT_4BIT_CONFIGS
from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS
from optimum.intel.utils.import_utils import is_openvino_tokenizers_available, is_transformers_version
from optimum.intel.utils.import_utils import is_openvino_tokenizers_available


class OVCLIExportTestCase(unittest.TestCase):
Expand Down Expand Up @@ -95,21 +95,21 @@ class OVCLIExportTestCase(unittest.TestCase):
"llama_awq",
"int4 --ratio 1.0 --sym --group-size 8 --all-layers",
0,
32 if is_transformers_version("<", "4.39.0") else 34,
32,
),
(
"text-generation-with-past",
"llama_awq",
"int4 --ratio 1.0 --sym --group-size 16 --awq --dataset wikitext2 --num-samples 100 "
"--sensitivity-metric max_activation_variance",
6 if is_transformers_version(">=", "4.39") else 4,
4,
28,
),
(
"text-generation-with-past",
"llama_awq",
"int4 --ratio 1.0 --sym --group-size 16 --scale-estimation --dataset wikitext2 --num-samples 100 ",
6 if is_transformers_version(">=", "4.39") else 4,
4,
28,
),
]
Expand Down
4 changes: 2 additions & 2 deletions tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ class OVWeightCompressionTest(unittest.TestCase):
quant_method=QuantizationMethod.AWQ,
scale_estimation=True,
),
18 if is_transformers_version(">=", "4.39") else 16,
16,
),
(
OVModelForCausalLM,
Expand All @@ -250,7 +250,7 @@ class OVWeightCompressionTest(unittest.TestCase):
dataset="c4",
quant_method="awq",
),
18 if is_transformers_version(">=", "4.39") else 16,
16,
),
)

Expand Down

0 comments on commit 7c8650d

Please sign in to comment.