Skip to content

Commit

Permalink
Fix several bugs & add Bert inc example to pipeline (#1492)
Browse files Browse the repository at this point in the history
## Describe your changes

Fix Olive bugs & Add Bert inc examples to example pipeline.

- `batch_size` is needed for Inc dataloader. Set default size to 1.
- Custom eval func doesn't have batch_size as input.
- For `QuantizationAwareTraining` pass, `train_data_config` is not
required if user provides `training_loop_func`.
- Latest transformers package will automatically save trained model as
safetensors format. Add `save_safetensors` as false to train argument.
- Some passes may have nested data_config in its config. Update
auto-fill data_config logic to achieve this.

## Checklist before requesting a review
- [ ] Add unit tests for this change.
- [ ] Make sure all tests can pass.
- [ ] Update documents if necessary.
- [ ] Lint and apply fixes to your code by running `lintrunner -a`
- [ ] Is this a user-facing change? If yes, give a description of this
change to be included in the release notes.
- [ ] Is this PR including examples changes? If yes, please remember to
update [example
documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md)
in a follow-up PR.

## (Optional) Issue link
  • Loading branch information
xiaoyu-work authored Nov 18, 2024
1 parent 40dc359 commit f47265f
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 9 deletions.
6 changes: 6 additions & 0 deletions .azure_pipelines/olive-examples.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ jobs:
bert_ptq_cpu_docker:
exampleFolder: bert
exampleName: bert_ptq_cpu_docker
bert_inc:
exampleFolder: bert
exampleName: bert_inc
resnet_vitis_ai_ptq_cpu:
exampleFolder: resnet
exampleName: resnet_vitis_ai_ptq_cpu
Expand Down Expand Up @@ -56,6 +59,9 @@ jobs:
onnxruntime: onnxruntime
python_version: '3.10'
examples:
bert_inc:
exampleFolder: bert
exampleName: bert_inc
bert_ptq_cpu_docker:
exampleFolder: bert
exampleName: bert_ptq_cpu_docker
Expand Down
3 changes: 1 addition & 2 deletions examples/bert/bert_inc_smoothquant_ptq_cpu.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
"quantization": {
"type": "IncStaticQuantization",
"quant_format": "QOperator",
"user_script": "user_script.py",
"data_config": "inc_quat_data_config",
"recipes": { "smooth_quant": true, "smooth_quant_args": { "alpha": 0.7 } },
"metric": {
Expand All @@ -73,5 +72,5 @@
},
"evaluator": "common_evaluator",
"cache_dir": "cache",
"output_dir": "models/bert_inc_static_ptq_cpu"
"output_dir": "models/bert_inc_smoothquant_ptq_cpu"
}
4 changes: 3 additions & 1 deletion examples/bert/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
azure-ai-ml
azure-identity
datasets
# TODO(anyone): load_metrics was removed since 3.0.0. Using evaluate instead
datasets<3.0.0
docker>=7.1.0
evaluate
neural-compressor
optimum
pytorch_lightning
scikit-learn
scipy
tabulate
Expand Down
5 changes: 3 additions & 2 deletions examples/bert/user_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def bert_inc_glue_calibration_dataset(data_dir, **kwargs):


@Registry.register_dataloader()
def bert_inc_glue_calibration_dataloader(dataset, batch_size, **kwargs):
def bert_inc_glue_calibration_dataloader(dataset, batch_size=1, **kwargs):
return DefaultDataLoader(dataset=dataset, batch_size=batch_size)


Expand All @@ -176,7 +176,7 @@ def bert_inc_glue_calibration_dataloader(dataset, batch_size, **kwargs):
# -------------------------------------------------------------------------


def eval_accuracy(model: OliveModelHandler, device, execution_providers, batch_size, **kwargs):
def eval_accuracy(model: OliveModelHandler, device, execution_providers, batch_size=1, **kwargs):
dataset = bert_dataset("Intel/bert-base-uncased-mrpc")
dataloader = bert_dataloader(dataset, batch_size)
preds = []
Expand Down Expand Up @@ -240,6 +240,7 @@ def training_loop_func(model):
training_args.save_strategy = "steps"
training_args.save_total_limit = 1
training_args.metric_for_best_model = "accuracy"
training_args.save_safetensors = False

dataset = BertDataset("Intel/bert-base-uncased-mrpc")

Expand Down
35 changes: 35 additions & 0 deletions examples/test/local/test_bert_inc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import json
import os

import pytest

from ..utils import check_output, get_example_dir


@pytest.fixture(scope="module", autouse=True)
def setup():
"""Setups any state specific to the execution of the given module."""
os.chdir(get_example_dir("bert"))


@pytest.mark.parametrize(
"olive_json",
[
"bert_inc_dynamic_ptq_cpu.json",
"bert_inc_ptq_cpu.json",
"bert_inc_smoothquant_ptq_cpu.json",
"bert_inc_static_ptq_cpu.json",
],
)
def test_bert(olive_json):
from olive.workflows import run as olive_run

with open(olive_json) as f:
olive_config = json.load(f)

footprint = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
check_output(footprint)
4 changes: 2 additions & 2 deletions olive/passes/pytorch/quantization_aware_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> Dict[str, PassCon
**get_user_script_data_config(),
"train_data_config": PassConfigParam(
type_=Union[DataConfig, Dict],
required=True,
description="Data config for training.",
),
"val_data_config": PassConfigParam(
Expand Down Expand Up @@ -102,7 +101,8 @@ def _run_for_config(
if Path(output_model_path).suffix != ".pt":
output_model_path += ".pt"

qat_trainer_config.train_data_config = validate_config(config["train_data_config"], DataConfig)
if config["train_data_config"]:
qat_trainer_config.train_data_config = validate_config(config["train_data_config"], DataConfig)
if config["val_data_config"]:
qat_trainer_config.val_data_config = validate_config(config["val_data_config"], DataConfig)
if config["training_loop_func"]:
Expand Down
15 changes: 13 additions & 2 deletions olive/workflows/run/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,8 @@ def validate_pass_search(cls, v, values):
for param_name in v["config"]:
if v["config"][param_name] == PassParamDefault.SEARCHABLE_VALUES:
searchable_configs.add(param_name)
if param_name.endswith("data_config"):
v["config"] = _resolve_data_config(v["config"], values, param_name)

resolve_all_data_configs(v["config"], values)

if not values["engine"].search_strategy and searchable_configs:
raise ValueError(
Expand All @@ -274,6 +274,17 @@ def validate_workflow_host(cls, v, values):
return _resolve_config(values, v)


def resolve_all_data_configs(config, values):
"""Recursively traverse the config dictionary to resolve all 'data_config' keys."""
for param_name, param_value in config.items():
if param_name.endswith("data_config"):
_resolve_data_config(config, values, param_name)
continue

if isinstance(param_value, dict):
resolve_all_data_configs(param_value, values)


def _insert_azureml_client(config, azureml_client):
"""Insert azureml_client into config recursively.
Expand Down

0 comments on commit f47265f

Please sign in to comment.