Skip to content

Commit

Permalink
xfail for bug with f16 zp (openvinotoolkit#3220)
Browse files Browse the repository at this point in the history
### Changes

Mark as xfail PTWC torch tests counting number of int4/int8 ops. 

### Reason for changes

starting from OV 2025 ZP is represented as f16 in some cases (bug:
160006)

### Related tickets

159993
160006

### Tests

- [x] openvino-nightly/job/post_training_weight_compression/73

![image](https://github.com/user-attachments/assets/bac6b5f0-0d8a-4890-9558-3ce6028ca56c)
- [x] manual/job/post_training_weight_compression/303

![image](https://github.com/user-attachments/assets/0d76fa7a-baf6-4515-bdd5-7bdda11b046e)
  • Loading branch information
ljaljushkin authored Jan 29, 2025
1 parent 0333814 commit 553b21b
Show file tree
Hide file tree
Showing 9 changed files with 81 additions and 34 deletions.
8 changes: 8 additions & 0 deletions tests/post_training/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,11 @@ To mark a test as expected to fail (xfail) when a validation metric does not mee
...
metrics_xfail_reason: "Issue-<jira ticket number>"
```
To mark a test as expected to fail (xfail) when a number of compression operations do not meet expectations, add the following line to the reference data:
```yml
<Name from model scopes>_backend_<BACKEND>:
...
num_compressed_xfail_reason: "Issue-<jira ticket number>"
```
10 changes: 10 additions & 0 deletions tests/post_training/data/wc_reference_data_2025.0.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
tinyllama_int8_data_free_backend_TORCH:
metric_value: 0.95624
num_int4: 0
num_int8: 312
num_compressed_xfail_reason: "Issue-160006"
tinyllama_int4_data_free_backend_TORCH:
metric_value: 0.73873
num_int4: 114
num_int8: 84
num_compressed_xfail_reason: "Issue-160006"
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
from tests.post_training.pipelines.base import LIMIT_LENGTH_OF_STATUS
from tests.post_training.pipelines.base import PT_BACKENDS
from tests.post_training.pipelines.base import BackendType
from tests.post_training.pipelines.base import ErrorReason
from tests.post_training.pipelines.base import ErrorReport
from tests.post_training.pipelines.base import NumCompressNodes
from tests.post_training.pipelines.base import RunInfo
from tests.post_training.pipelines.image_classification_timm import ImageClassificationTimm
Expand Down Expand Up @@ -170,13 +172,14 @@ def _compress(self):
)

def _validate(self):
super()._validate()
errors = super()._validate()
ref_num_sparse_activations = self.reference_data.get("num_sparse_activations", 0)
num_sparse_activations = self.run_info.num_compress_nodes.num_sparse_activations
if num_sparse_activations != ref_num_sparse_activations:
status_msg = f"Regression: The number of sparse activations is {num_sparse_activations}, \
which differs from reference {ref_num_sparse_activations}."
raise ValueError(status_msg)
errors.append(ErrorReport(ErrorReason.NUM_COMPRESSED, status_msg))
return errors


class LMSparsifyActivations(SAPipelineMixin, LMWeightCompression):
Expand Down
56 changes: 43 additions & 13 deletions tests/post_training/pipelines/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from datetime import timedelta
from enum import Enum
from pathlib import Path
from typing import Dict, Optional
from typing import Dict, List, Optional

import numpy as np
import onnx
Expand All @@ -36,7 +36,18 @@
from tools.memory_monitor import memory_monitor_context

DEFAULT_VAL_THREADS = 4
METRICS_XFAIL_REASON = "metrics_xfail_reason"
XFAIL_SUFFIX = "_xfail_reason"


class ErrorReason(Enum):
METRICS = "metrics"
NUM_COMPRESSED = "num_compressed"


@dataclass
class ErrorReport:
reason: ErrorReason
msg: str


class BackendType(Enum):
Expand Down Expand Up @@ -278,9 +289,31 @@ def get_num_compressed(self) -> None:
def run_bench(self) -> None:
"""Run a benchmark to collect performance statistics."""

@abstractmethod
def _validate(self) -> None:
"""Validate IR."""
def _validate(self) -> List[ErrorReport]:
"""
Validates some test criteria.
returns:
A list of error reports generated during validation.
"""
return []

def _process_errors(self, errors) -> str:
"""
Processes a list of error reports and updates the run status.
:param errors: A list of error reports.
:return: A string representing the concatenated statuses of the processed errors.
"""
xfails, msg_list = [], []
for report in errors:
xfail_reason = report.reason.value + XFAIL_SUFFIX
if xfail_reason in self.reference_data:
xfails.append(f"XFAIL: {self.reference_data[xfail_reason]} - {report.msg}")
else:
msg_list.append(report.msg)
if msg_list:
raise ValueError("\n".join(msg_list))
self.run_info.status = "\n".join(xfails)

def prepare(self):
"""
Expand All @@ -302,7 +335,7 @@ def validate(self) -> None:
return
print("Validation...")

self._validate()
errors = self._validate()

metric_value = self.run_info.metric_value
metric_reference = self.reference_data.get("metric_value")
Expand All @@ -311,22 +344,19 @@ def validate(self) -> None:
if metric_value is not None and metric_value_fp32 is not None:
self.run_info.metric_diff = round(self.run_info.metric_value - self.reference_data["metric_value_fp32"], 5)

status_msg = None
if (
metric_value is not None
and metric_reference is not None
and not np.isclose(metric_value, metric_reference, atol=self.reference_data.get("atol", 0.001))
):
status_msg = None
if metric_value < metric_reference:
status_msg = f"Regression: Metric value is less than reference {metric_value} < {metric_reference}"
if metric_value > metric_reference:
status_msg = f"Improvement: Metric value is better than reference {metric_value} > {metric_reference}"

if status_msg is not None:
if METRICS_XFAIL_REASON in self.reference_data:
self.run_info.status = f"XFAIL: {self.reference_data[METRICS_XFAIL_REASON]} - {status_msg}"
else:
raise ValueError(status_msg)
if status_msg:
errors.append(ErrorReport(ErrorReason.METRICS, status_msg))
self._process_errors(errors)

def run(self) -> None:
"""
Expand Down
3 changes: 0 additions & 3 deletions tests/post_training/pipelines/causal_language_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,3 @@ def prepare_calibration_dataset(self):

if self.backend == BackendType.OPTIMUM:
self.calibration_dataset = calibration_dataset

def _validate(self):
pass
3 changes: 0 additions & 3 deletions tests/post_training/pipelines/gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,3 @@ def preprocess_function(examples):
self.calibration_dataset = calibration_dataset
else:
self.calibration_dataset = nncf.Dataset(calibration_dataset, self.get_transform_calibration_fn())

def _validate(self):
pass
5 changes: 4 additions & 1 deletion tests/post_training/pipelines/image_classification_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import copy
import os
from typing import List

import numpy as np
import openvino as ov
Expand All @@ -21,6 +22,7 @@
import nncf
from nncf.common.logging.track_progress import track
from tests.post_training.pipelines.base import DEFAULT_VAL_THREADS
from tests.post_training.pipelines.base import ErrorReport
from tests.post_training.pipelines.base import PTQTestPipeline


Expand All @@ -33,7 +35,7 @@ def prepare_calibration_dataset(self):

self.calibration_dataset = nncf.Dataset(loader, self.get_transform_calibration_fn())

def _validate(self):
def _validate(self) -> List[ErrorReport]:
val_dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=2, shuffle=False)

Expand Down Expand Up @@ -78,3 +80,4 @@ def process_result(request, userdata):

self.run_info.metric_name = "Acc@1"
self.run_info.metric_value = acc_top1
return []
20 changes: 11 additions & 9 deletions tests/post_training/pipelines/lm_weight_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import shutil
import time
from dataclasses import dataclass
from typing import Dict, Optional
from typing import Dict, List, Optional

import numpy as np
import openvino as ov
Expand All @@ -31,6 +31,8 @@
from tests.cross_fw.shared.paths import TEST_ROOT
from tests.post_training.pipelines.base import BackendType
from tests.post_training.pipelines.base import BaseTestPipeline
from tests.post_training.pipelines.base import ErrorReason
from tests.post_training.pipelines.base import ErrorReport
from tests.post_training.pipelines.base import StatsFromOutput
from tools.memory_monitor import MemoryType
from tools.memory_monitor import MemoryUnit
Expand Down Expand Up @@ -257,7 +259,8 @@ def _compress(self):
**self.compression_params,
)

def _validate(self):
def _validate(self) -> List[ErrorReport]:
errors = []
is_stateful = self.params.get("is_stateful", False)
core = ov.Core()

Expand Down Expand Up @@ -309,12 +312,11 @@ def _validate(self):
num_int4_value = self.run_info.num_compress_nodes.num_int4
num_int8_value = self.run_info.num_compress_nodes.num_int8

template = "Regression: The number of int{} ops is different than reference {} != {}"
if num_int4_reference != num_int4_value:
status_msg = f"Regression: The number of int4 ops is different \
than reference {num_int4_reference} != {num_int4_value}"
raise ValueError(status_msg)

status_msg = template.format(4, num_int4_reference, num_int4_value)
errors.append(ErrorReport(ErrorReason.NUM_COMPRESSED, status_msg))
if num_int8_reference != num_int8_value:
status_msg = f"Regression: The number of int8 ops is different \
than reference {num_int8_reference} != {num_int8_value}"
raise ValueError(status_msg)
status_msg = template.format(8, num_int8_reference, num_int8_value)
errors.append(ErrorReport(ErrorReason.NUM_COMPRESSED, status_msg))
return errors
3 changes: 0 additions & 3 deletions tests/post_training/pipelines/masked_language_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,3 @@ def preprocess_function(examples):
self.calibration_dataset = calibration_dataset
else:
self.calibration_dataset = nncf.Dataset(calibration_dataset, self.get_transform_calibration_fn())

def _validate(self):
pass

0 comments on commit 553b21b

Please sign in to comment.