Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Save output model to output_dir #1430

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion examples/whisper/test_transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def main(raw_args=None):
ep = config["systems"]["local_system"]["accelerators"][0]["execution_providers"][0]

# load output model json
output_model_json_path = Path(config["output_dir"]) / "output_model" / "model_config.json"
output_model_json_path = Path(config["output_dir"]) / "model_config.json"
with output_model_json_path.open() as f:
output_model_json = json.load(f)

Expand Down
56 changes: 30 additions & 26 deletions olive/cli/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,14 @@

from olive.cli.constants import CONDA_CONFIG
from olive.common.user_module_loader import UserModuleLoader
from olive.common.utils import hardlink_copy_dir, hash_dict, hf_repo_exists, set_nested_dict_value, unescaped_str
from olive.common.utils import (
hardlink_copy_dir,
hardlink_copy_file,
hash_dict,
hf_repo_exists,
set_nested_dict_value,
unescaped_str,
)
from olive.hardware.accelerator import AcceleratorSpec
from olive.hardware.constants import DEVICE_TO_EXECUTION_PROVIDERS
from olive.resource_path import OLIVE_RESOURCE_ANNOTATIONS, find_all_resources
Expand Down Expand Up @@ -416,36 +423,33 @@ def save_output_model(config: Dict, output_model_dir: Union[str, Path]):

This assumes a single accelerator workflow.
"""
run_output_path = Path(config["output_dir"]) / "output_model"
if not any(run_output_path.rglob("model_config.json")):
# there must be an run_output_path with at least one model_config.json
run_output_path = Path(config["output_dir"])
Copy link
Contributor

@jambayk jambayk Nov 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

btw without this output_model nesting, now the output of the cli would also have the footprint and other files copied over even though they mean nothing to the user of the cli and is messy+confusing. I think we might need an option to disable saving these files like you mentioned once before.

Copy link
Contributor

@jambayk jambayk Nov 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or make it opt-in to save the extra files instead of opt-out. most users only care about the final output model.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call! I don't think users need those files. We should only copy model files here.

model_config_path = run_output_path / "model_config.json"
if not model_config_path.exists():
print("Command failed. Please set the log_level to 1 for more detailed logs.")
return

output_model_dir = Path(output_model_dir).resolve()

# hardlink/copy the output model to the output_model_dir
hardlink_copy_dir(run_output_path, output_model_dir)

# need to update the local path in the model_config.json
# should the path be relative or absolute? relative makes it easy to move the output
# around but the path needs to be updated when the model config is used
for model_config_file in output_model_dir.rglob("model_config.json"):
with model_config_file.open("r") as f:
model_config = json.load(f)

all_resources = find_all_resources(model_config)
for resource_key, resource_path in all_resources.items():
resource_path_str = resource_path.get_path()
if resource_path_str.startswith(str(run_output_path)):
set_nested_dict_value(
model_config,
resource_key,
resource_path_str.replace(str(run_output_path), str(output_model_dir)),
)

with model_config_file.open("w") as f:
json.dump(model_config, f, indent=4)
with model_config_path.open("r") as f:
model_config = json.load(f)

all_resources = find_all_resources(model_config)
for resource_key, resource_path in all_resources.items():
src_path = Path(resource_path.get_path()).resolve()
if src_path.is_dir():
hardlink_copy_dir(src_path, output_model_dir / src_path)
Copy link
Contributor

@jambayk jambayk Nov 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is correct. output_model_dir / src_path = src_path since src_path is a fully resolved path. you probably wanted str_path.name like in 448?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah true! I forgot to update this!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please also check how the outputs for a graph capture for with --use_ort_genai and --use_model_builder look. the additional_files attributes might make this copy the additional files into the output directory, even though they refer to files already in the model subdir.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like I described in the previous comment, I think it will be easier to just make saving the footprint, etc opt-in in the workflow config. that way, even for cli, we can just use the final output dir and not need to do any of this copy and path update.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My intention is copying additional files to output folder as they are also a part of the output model.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the additional files get copied twice in this. Once is as part of the model_path resource copy into model subdir. Then they are again copied individually directly into the output_dir itself.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are all additional files stored in the model path? I kinda remember it can be everywhere depending on the pass who created it. Was this logic updated before?

Copy link
Contributor

@jambayk jambayk Nov 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is stored in model path normally. some passes like modelbuilder with metadata saves it in a different folder but that was because we weren't sure if should copy/hardlink the existing model files. But both the pass carry forward https://github.com/microsoft/Olive/blob/main/olive/passes/olive_pass.py#L274 and cache model save have always saved in model_path

# we only have additional files for onnx models so saving to "model" is safe

Since the output of a workflow goes through the cache model save, the additional files are already in model_path resources for onnx models. this is different for composite models where they are saved in output_dir.

So i think it's less hacky to just save the cli output directly in output_path and opt out of saving the other footprints, etc. No need to temp directories or copy.

else:
hardlink_copy_file(src_path, output_model_dir)

set_nested_dict_value(
model_config,
resource_key,
str(output_model_dir / src_path.name),
)
output_model_config_path = output_model_dir / "model_config.json"
with output_model_config_path.open("w") as f:
json.dump(model_config, f, indent=4)

print(f"Command succeeded. Output model saved to {output_model_dir}")

Expand Down
28 changes: 16 additions & 12 deletions olive/engine/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from olive.common.config_utils import validate_config
from olive.common.constants import DEFAULT_WORKFLOW_ID, LOCAL_INPUT_MODEL_ID
from olive.engine.config import FAILED_CONFIG, INVALID_CONFIG, PRUNED_CONFIGS
from olive.engine.footprint import Footprint, FootprintNodeMetric
from olive.engine.footprint import Footprint, FootprintNode, FootprintNodeMetric, get_best_candidate_node
from olive.engine.packaging.packaging_generator import generate_output_artifacts
from olive.evaluator.metric import Metric
from olive.evaluator.metric_result import MetricResult, joint_metric_key
Expand Down Expand Up @@ -208,27 +208,27 @@ def run(
output_dir/pareto_frontier_footprints.json: pareto frontier footprints
output_dir/run_history.txt: run history
output_dir/input_model_metrics.json: evaluation results of the input model
output_dir/...: output model files

2. Multiple accelerator specs:
output_dir/{acclerator_spec}/...: Same as 1 but for each accelerator spec
output_dir/...: output model files

No search mode:
1. One accelerator spec
output_dir/footprints.json: footprint of the run
output_dir/run_history.txt: run history
output_dir/input_model_metrics.json: evaluation results of the input model
output_dir/output_footprints.json: footprint of the output models
output_dir/...: output model files

A. One pass flow:
output_dir/output_model/metrics.json: evaluation results of the output model
output_dir/output_model/model_config.json: output model configuration
output_dir/output_model/...: output model files

B. Multiple pass flows:
output_dir/output_model/{pass_flow}/...: Same as A but for each pass flow
output_dir/metrics.json: evaluation results of the output model
output_dir/...: output model files

2. Multiple accelerator specs
output_dir/{acclerator_spec}/...: Same as 1 but for each accelerator spec
output_dir/...: output model files

"""
if not accelerator_specs:
Expand Down Expand Up @@ -279,6 +279,14 @@ def run(
else:
logger.debug("No packaging config provided, skip packaging artifacts")

# TODO(team): refactor output structure
# Do not change condition order. For no search, values of outputs are MetricResult
# Consolidate the output structure for search and no search mode
if outputs and self.passes and not next(iter(outputs.values())).check_empty_nodes():
best_node: FootprintNode = get_best_candidate_node(outputs, self.footprints)
self.cache.save_model(model_id=best_node.model_id, output_dir=output_dir, overwrite=True)
logger.info("Saved output model to %s", outputs)

return outputs

def run_accelerator(
Expand Down Expand Up @@ -387,8 +395,7 @@ def run_no_search(
pass_name = pass_item["name"]
raise ValueError(f"Pass {pass_name} has search space but search strategy is None")

# output models will be saved in output_dir/output_model
output_model_dir = Path(output_dir) / "output_model"
output_model_dir = Path(output_dir)

output_model_ids = []
for pass_flow in self.pass_flows:
Expand Down Expand Up @@ -422,9 +429,6 @@ def run_no_search(
json.dump(signal.to_json(), f, indent=4)
logger.info("Saved evaluation results of output model to %s", results_path)

self.cache.save_model(model_id=model_ids[-1], output_dir=flow_output_dir, overwrite=True)
jambayk marked this conversation as resolved.
Show resolved Hide resolved
logger.info("Saved output model to %s", flow_output_dir)

output_model_ids.append(model_ids[-1])

output_footprints = self.footprints[accelerator_spec].create_footprints_by_model_ids(output_model_ids)
Expand Down
52 changes: 51 additions & 1 deletion olive/engine/footprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,15 @@
import logging
from collections import OrderedDict, defaultdict
from copy import deepcopy
from typing import DefaultDict, Dict, List, NamedTuple, Optional
from typing import TYPE_CHECKING, DefaultDict, Dict, List, NamedTuple, Optional

from olive.common.config_utils import ConfigBase, config_json_dumps, config_json_loads
from olive.evaluator.metric_result import MetricResult

if TYPE_CHECKING:
from olive.hardware import AcceleratorSpec


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -159,6 +163,9 @@ def trace_back_run_history(self, model_id) -> Dict[str, Dict]:
model_id = self.nodes[model_id].parent_model_id
return rls

def check_empty_nodes(self):
return self.nodes is None or len(self.nodes) == 0

def to_df(self):
# to pandas.DataFrame
raise NotImplementedError
Expand Down Expand Up @@ -422,3 +429,46 @@ def _plot_pareto_frontier(self, ranks=None, save_path=None, is_show=True, save_f

if is_show:
fig.show()


def get_best_candidate_node(
jambayk marked this conversation as resolved.
Show resolved Hide resolved
jambayk marked this conversation as resolved.
Show resolved Hide resolved
pf_footprints: Dict["AcceleratorSpec", Footprint], footprints: Dict["AcceleratorSpec", Footprint]
):
"""Select the best candidate node from the pareto frontier footprints.

This function evaluates nodes from the given pareto frontier footprints and selects the top-ranked node
based on specified objective metrics. It compares nodes from two dictionaries of footprints and
ranks them according to their metrics.

Args:
pf_footprints (Dict["AcceleratorSpec", Footprint]): A dictionary mapping accelerator specifications
to their corresponding pareto frontier footprints, which contain nodes and their metrics.
footprints (Dict["AcceleratorSpec", Footprint"]): A dictionary mapping accelerator specifications
to their corresponding footprints, which contain nodes and their metrics.

Returns:
Node: The top-ranked node based on the specified objective metrics.

"""
objective_dict = next(iter(pf_footprints.values())).objective_dict
top_nodes = []
for accelerator_spec, pf_footprint in pf_footprints.items():
footprint = footprints[accelerator_spec]
if pf_footprint.nodes and footprint.nodes:
top_nodes.append(next(iter(pf_footprint.get_top_ranked_nodes(1))))
return next(
iter(
sorted(
top_nodes,
key=lambda x: tuple(
(
x.metrics.value[metric].value
if x.metrics.cmp_direction[metric] == 1
else -x.metrics.value[metric].value
)
for metric in objective_dict
),
reverse=True,
)
)
)
34 changes: 4 additions & 30 deletions olive/engine/packaging/packaging_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from olive.common.constants import OS
from olive.common.utils import retry_func, run_subprocess
from olive.engine.footprint import get_best_candidate_node
from olive.engine.packaging.packaging_config import (
AzureMLDeploymentPackagingConfig,
DockerfilePackagingConfig,
Expand Down Expand Up @@ -68,7 +69,7 @@ def _package_dockerfile(
config: DockerfilePackagingConfig = packaging_config.config
logger.info("Packaging output models to Dockerfile")
base_image = config.base_image
best_node = _get_best_candidate_node(pf_footprints, footprints)
best_node = get_best_candidate_node(pf_footprints, footprints)

docker_context_path = "docker_content"
content_path = output_dir / docker_context_path
Expand Down Expand Up @@ -133,7 +134,7 @@ def _package_azureml_deployment(

try:
# Get best model from footprint
best_node = _get_best_candidate_node(pf_footprints, footprints)
best_node = get_best_candidate_node(pf_footprints, footprints)

with tempfile.TemporaryDirectory() as temp_dir:
tempdir = Path(temp_dir)
Expand Down Expand Up @@ -303,33 +304,6 @@ def _package_azureml_deployment(
raise


def _get_best_candidate_node(
pf_footprints: Dict["AcceleratorSpec", "Footprint"], footprints: Dict["AcceleratorSpec", "Footprint"]
):
objective_dict = next(iter(pf_footprints.values())).objective_dict
top_nodes = []
for accelerator_spec, pf_footprint in pf_footprints.items():
footprint = footprints[accelerator_spec]
if pf_footprint.nodes and footprint.nodes:
top_nodes.append(next(iter(pf_footprint.get_top_ranked_nodes(1))))
return next(
iter(
sorted(
top_nodes,
key=lambda x: tuple(
(
x.metrics.value[metric].value
if x.metrics.cmp_direction[metric] == 1
else -x.metrics.value[metric].value
)
for metric in objective_dict
),
reverse=True,
)
)
)


def _is_generative_model(config: Dict[str, Any]) -> bool:
model_attributes = config.get("model_attributes") or {}
return model_attributes.get("generative", False)
Expand All @@ -353,7 +327,7 @@ def _package_candidate_models(
tempdir = Path(temp_dir)

if packaging_type == PackagingType.Zipfile:
best_node: FootprintNode = _get_best_candidate_node(pf_footprints, footprints)
best_node: FootprintNode = get_best_candidate_node(pf_footprints, footprints)
is_generative = _is_generative_model(best_node.model_config["config"])

if packaging_config.include_runtime_packages:
Expand Down
12 changes: 4 additions & 8 deletions test/unit_test/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,7 @@ def test_finetune_command(_, mock_tempdir, mock_run, tmp_path):

# setup
mock_tempdir.return_value = tmpdir.resolve()
workflow_output_dir = tmpdir / "output_model"
workflow_output_dir.mkdir(parents=True)
workflow_output_dir = tmpdir
dummy_output = workflow_output_dir / "model_config.json"
with open(dummy_output, "w") as f:
json.dump({"dummy": "output"}, f)
Expand Down Expand Up @@ -196,8 +195,7 @@ def test_capture_onnx_command(_, mock_tempdir, mock_run, use_model_builder, tmp_

# setup
mock_tempdir.return_value = tmpdir.resolve()
workflow_output_dir = tmpdir / "output_model"
workflow_output_dir.mkdir(parents=True)
workflow_output_dir = tmpdir
dummy_output = workflow_output_dir / "model_config.json"
with open(dummy_output, "w") as f:
json.dump({"config": {"inference_settings": {"dummy-key": "dummy-value"}}}, f)
Expand Down Expand Up @@ -284,9 +282,7 @@ def test_quantize_command(mock_repo_exists, mock_tempdir, mock_run, algorithm_na
mock_tempdir.return_value = tmpdir.resolve()
mock_run.return_value = {}

workflow_output_dir = tmpdir / "output_model" / algorithm_name
workflow_output_dir.mkdir(parents=True)
model_config_path = workflow_output_dir / "model_config.json"
model_config_path = tmpdir / "model_config.json"
with model_config_path.open("w") as f:
f.write("{}")

Expand All @@ -309,7 +305,7 @@ def test_quantize_command(mock_repo_exists, mock_tempdir, mock_run, algorithm_na

config = mock_run.call_args[0][0]
assert config["input_model"]["model_path"] == "dummy_model"
assert {el.name for el in output_dir.iterdir()} == {algorithm_name}
assert {el.name for el in output_dir.iterdir()} == {"model_config.json"}


# TODO(anyone): Add tests for ManageAMLComputeCommand
Expand Down
Loading
Loading