Skip to content

Commit

Permalink
Allow skypilot to configure step or run full pipeline in one VM (zenm…
Browse files Browse the repository at this point in the history
…l-io#2276)

* Allow skypilot to configure step or run  full pipeline in one VM

* Chnage behaviour of using step based resources

* Update src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py

Co-authored-by: Alex Strick van Linschoten <strickvl@users.noreply.github.com>

* Refactor SkypilotBaseOrchestrator to support step-based VM configuration

* Update docs/book/stacks-and-components/component-guide/orchestrators/skypilot-vm.md

Co-authored-by: Hamza Tahir <hamza@zenml.io>

---------

Co-authored-by: Alex Strick van Linschoten <strickvl@users.noreply.github.com>
Co-authored-by: Hamza Tahir <hamza@zenml.io>
  • Loading branch information
3 people authored and kabinja committed Jan 29, 2024
1 parent 7f1453e commit 3ce2eb3
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ on-demand and managed spot VMs. While you can select the VM type you want to use
also includes an optimizer that automatically selects the cheapest VM/zone/region/cloud for your workloads.
Finally, the orchestrator includes an autostop feature that cleans up idle clusters, preventing unnecessary cloud costs.

{% hint style="info" %}
You can configure the SkyPilot VM Orchestrator to use a specific VM type, and
resources for each step of your pipeline can be configured individually.
Read more about how to configure step-specific resources [here](#configuring-step-specific-resources).
{% endhint %}

{% hint style="warning" %}
The SkyPilot VM Orchestrator does not currently support the ability to [schedule pipelines runs](/docs/book/user-guide/advanced-guide/pipelining-features/schedule-pipeline-runs.md)
{% endhint %}
Expand Down Expand Up @@ -378,7 +384,11 @@ One of the key features of the SkyPilot VM Orchestrator is the ability to run ea

The SkyPilot VM Orchestrator allows you to configure resources for each step individually. This means you can specify different VM types, CPU and memory requirements, and even use spot instances for certain steps while using on-demand instances for others.

To configure step-specific resources, you can pass a `SkypilotBaseOrchestratorSettings` object to the `settings` parameter of the `@step` decorator. This object allows you to define various attributes such as `instance_type`, `cpus`, `memory`, `use_spot`, `region`, and more.
If no step-specific settings are specified, the orchestrator will use the resources specified in the orchestrator settings for each step and run the entire pipeline in one VM. If step-specific settings are specified, an orchestrator VM will be spun up first, which will subsequently spin out new VMs dependant on the step settings. You can disable this behavior by setting the `disable_step_based_settings` parameter to `True` in the orchestrator configuration, using the following command:

```shell
zenml orchestrator update <ORCHESTRATOR_NAME> --disable_step_based_settings=True
```

Here's an example of how to configure specific resources for a step for the AWS cloud:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,16 @@ class SkypilotBaseOrchestratorSettings(BaseSettings):
class SkypilotBaseOrchestratorConfig( # type: ignore[misc] # https://github.com/pydantic/pydantic/issues/4173
BaseOrchestratorConfig, SkypilotBaseOrchestratorSettings
):
"""Skypilot orchestrator base config."""
"""Skypilot orchestrator base config.
Attributes:
disable_step_based_settings: whether to disable step-based settings.
If True, the orchestrator will run all steps with the pipeline
settings in one single VM. If False, the orchestrator will run
each step with its own settings in separate VMs if provided.
"""

disable_step_based_settings: bool = False

@property
def is_local(self) -> bool:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@
import re
from abc import abstractmethod
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, cast
from uuid import uuid4

import sky

from zenml.entrypoints import PipelineEntrypointConfiguration
from zenml.enums import StackComponentType
from zenml.environment import Environment
from zenml.integrations.skypilot.flavors.skypilot_orchestrator_base_vm_config import (
SkypilotBaseOrchestratorConfig,
SkypilotBaseOrchestratorSettings,
)
from zenml.integrations.skypilot.orchestrators.skypilot_orchestrator_entrypoint_configuration import (
Expand Down Expand Up @@ -109,6 +112,15 @@ def get_orchestrator_run_id(self) -> str:
f"{ENV_ZENML_SKYPILOT_ORCHESTRATOR_RUN_ID}."
)

@property
def config(self) -> SkypilotBaseOrchestratorConfig:
"""Returns the `SkypilotBaseOrchestratorConfig` config.
Returns:
The configuration.
"""
return cast(SkypilotBaseOrchestratorConfig, self._config)

@property
@abstractmethod
def cloud(self) -> sky.clouds.Cloud:
Expand Down Expand Up @@ -167,6 +179,12 @@ def prepare_or_run_pipeline(
"and the pipeline will be run immediately."
)

# Set up some variables for configuration
orchestrator_run_id = str(uuid4())
environment[
ENV_ZENML_SKYPILOT_ORCHESTRATOR_RUN_ID
] = orchestrator_run_id

settings = cast(
SkypilotBaseOrchestratorSettings,
self.get_settings(deployment),
Expand All @@ -189,14 +207,47 @@ def prepare_or_run_pipeline(
deployment=deployment, step_name=pipeline_step_name
)

# Build entrypoint command and args for the orchestrator pod.
# This will internally also build the command/args for all step pods.
command = SkypilotOrchestratorEntrypointConfiguration.get_entrypoint_command()
different_settings_found = False

if not self.config.disable_step_based_settings:
for _, step in deployment.step_configurations.items():
step_settings = cast(
SkypilotBaseOrchestratorSettings,
self.get_settings(step),
)
if step_settings != settings:
different_settings_found = True
logger.info(
"At least one step has different settings than the "
"pipeline. The step with different settings will be "
"run in a separate VM.\n"
"You can configure the orchestrator to disable this "
"behavior by updating the `disable_step_based_settings` "
"in your orchestrator configuration "
"by running the following command: "
"`zenml orchestrator update --disable-step-based-settings=True`"
)
break

# Decide which configuration to use based on whether different settings were found
if (
not self.config.disable_step_based_settings
and different_settings_found
):
# Run each step in a separate VM using SkypilotOrchestratorEntrypointConfiguration
command = SkypilotOrchestratorEntrypointConfiguration.get_entrypoint_command()
args = SkypilotOrchestratorEntrypointConfiguration.get_entrypoint_arguments(
run_name=orchestrator_run_name,
deployment_id=deployment.id,
)
else:
# Run the entire pipeline in one VM using PipelineEntrypointConfiguration
command = PipelineEntrypointConfiguration.get_entrypoint_command()
args = PipelineEntrypointConfiguration.get_entrypoint_arguments(
deployment_id=deployment.id
)

entrypoint_str = " ".join(command)
args = SkypilotOrchestratorEntrypointConfiguration.get_entrypoint_arguments(
run_name=orchestrator_run_name,
deployment_id=deployment.id,
)
arguments_str = " ".join(args)

docker_environment_str = " ".join(
Expand Down

0 comments on commit 3ce2eb3

Please sign in to comment.