From 95efde7cb84baff0e9c39e353f4b37eb8b43cfc8 Mon Sep 17 00:00:00 2001 From: Safoine El Khabich <34200873+safoinme@users.noreply.github.com> Date: Fri, 12 Jan 2024 16:58:06 +0100 Subject: [PATCH 1/5] Allow skypilot to configure step or run full pipeline in one VM --- .../orchestrators/skypilot-vm.md | 8 ++++- .../skypilot_orchestrator_base_vm_config.py | 12 ++++++- .../skypilot_base_vm_orchestrator.py | 32 +++++++++++++++---- 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/docs/book/stacks-and-components/component-guide/orchestrators/skypilot-vm.md b/docs/book/stacks-and-components/component-guide/orchestrators/skypilot-vm.md index 3d250d5cdd9..31b56c030a5 100644 --- a/docs/book/stacks-and-components/component-guide/orchestrators/skypilot-vm.md +++ b/docs/book/stacks-and-components/component-guide/orchestrators/skypilot-vm.md @@ -40,6 +40,12 @@ on-demand and managed spot VMs. While you can select the VM type you want to use also includes an optimizer that automatically selects the cheapest VM/zone/region/cloud for your workloads. Finally, the orchestrator includes an autostop feature that cleans up idle clusters, preventing unnecessary cloud costs. +{% hint style="info" %} +You can configure the SkyPilot VM Orchestrator to use a specific VM type, and +resources for each step of your pipeline can be configured individually. +Read more about how to configure step-specific resources [here](#configuring-step-specific-resources). +{% endhint %} + {% hint style="warning" %} The SkyPilot VM Orchestrator does not currently support the ability to [schedule pipelines runs](/docs/book/user-guide/advanced-guide/pipelining-features/schedule-pipeline-runs.md) {% endhint %} @@ -378,7 +384,7 @@ One of the key features of the SkyPilot VM Orchestrator is the ability to run ea The SkyPilot VM Orchestrator allows you to configure resources for each step individually. This means you can specify different VM types, CPU and memory requirements, and even use spot instances for certain steps while using on-demand instances for others. -To configure step-specific resources, you can pass a `SkypilotBaseOrchestratorSettings` object to the `settings` parameter of the `@step` decorator. This object allows you to define various attributes such as `instance_type`, `cpus`, `memory`, `use_spot`, `region`, and more. +In order to enable this, you will need to update your orchestrator configuration to use `configure_step_resources=True` or create a new orchestrator with this setting enabled. This setting allows the orchestrator to configure resources for each step individually. Once your orchestrator is configured to allow step-specific resources, you can pass a `SkypilotBaseOrchestratorSettings` object to the `settings` parameter of the `@step` decorator. This object allows you to define various attributes such as `instance_type`, `cpus`, `memory`, `use_spot`, `region`, and more. Here's an example of how to configure specific resources for a step for the AWS cloud: diff --git a/src/zenml/integrations/skypilot/flavors/skypilot_orchestrator_base_vm_config.py b/src/zenml/integrations/skypilot/flavors/skypilot_orchestrator_base_vm_config.py index 6f04b0445ec..c30c567f268 100644 --- a/src/zenml/integrations/skypilot/flavors/skypilot_orchestrator_base_vm_config.py +++ b/src/zenml/integrations/skypilot/flavors/skypilot_orchestrator_base_vm_config.py @@ -113,7 +113,17 @@ class SkypilotBaseOrchestratorSettings(BaseSettings): class SkypilotBaseOrchestratorConfig( # type: ignore[misc] # https://github.com/pydantic/pydantic/issues/4173 BaseOrchestratorConfig, SkypilotBaseOrchestratorSettings ): - """Skypilot orchestrator base config.""" + """Skypilot orchestrator base config. + + Attributes: + configure_step_resources: Enables the orchestrator to run configured steps. + This will be used to determine whether to run the entire pipeline + in one single VM or to run each step in a separate VM if the + orchestrator is configured to run steps separately with some custom + resources. + """ + + configure_step_resources: bool = False @property def is_local(self) -> bool: diff --git a/src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py b/src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py index f1ca4db56d9..98a70ef9367 100644 --- a/src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py +++ b/src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py @@ -20,9 +20,11 @@ import sky +from zenml.entrypoints import PipelineEntrypointConfiguration from zenml.enums import StackComponentType from zenml.environment import Environment from zenml.integrations.skypilot.flavors.skypilot_orchestrator_base_vm_config import ( + SkypilotBaseOrchestratorConfig, SkypilotBaseOrchestratorSettings, ) from zenml.integrations.skypilot.orchestrators.skypilot_orchestrator_entrypoint_configuration import ( @@ -109,6 +111,15 @@ def get_orchestrator_run_id(self) -> str: f"{ENV_ZENML_SKYPILOT_ORCHESTRATOR_RUN_ID}." ) + @property + def config(self) -> SkypilotBaseOrchestratorConfig: + """Returns the `SkypilotBaseOrchestratorConfig` config. + + Returns: + The configuration. + """ + return cast(SkypilotBaseOrchestratorConfig, self._config) + @property @abstractmethod def cloud(self) -> sky.clouds.Cloud: @@ -189,14 +200,21 @@ def prepare_or_run_pipeline( deployment=deployment, step_name=pipeline_step_name ) - # Build entrypoint command and args for the orchestrator pod. - # This will internally also build the command/args for all step pods. - command = SkypilotOrchestratorEntrypointConfiguration.get_entrypoint_command() + if self.config.configure_step_resources: + # Run each step in a separate VM if configured. + # Build entrypoint command and args for the orchestrator VM. + # This will internally also build the command/args for all step pods. + command = SkypilotOrchestratorEntrypointConfiguration.get_entrypoint_command() + args = SkypilotOrchestratorEntrypointConfiguration.get_entrypoint_arguments( + run_name=orchestrator_run_name, + deployment_id=deployment.id, + ) + else: + command = PipelineEntrypointConfiguration.get_entrypoint_command() + args = PipelineEntrypointConfiguration.get_entrypoint_arguments( + deployment_id=deployment.id + ) entrypoint_str = " ".join(command) - args = SkypilotOrchestratorEntrypointConfiguration.get_entrypoint_arguments( - run_name=orchestrator_run_name, - deployment_id=deployment.id, - ) arguments_str = " ".join(args) docker_environment_str = " ".join( From 8a51a4a687f31b21577648d853b87c12b1ecb55d Mon Sep 17 00:00:00 2001 From: Safoine El Khabich <34200873+safoinme@users.noreply.github.com> Date: Sat, 13 Jan 2024 17:18:28 +0100 Subject: [PATCH 2/5] Chnage behaviour of using step based resources --- .../orchestrators/skypilot-vm.md | 6 +++- .../skypilot_orchestrator_base_vm_config.py | 11 +++---- .../skypilot_base_vm_orchestrator.py | 33 +++++++++++++++---- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/docs/book/stacks-and-components/component-guide/orchestrators/skypilot-vm.md b/docs/book/stacks-and-components/component-guide/orchestrators/skypilot-vm.md index 31b56c030a5..936772d7214 100644 --- a/docs/book/stacks-and-components/component-guide/orchestrators/skypilot-vm.md +++ b/docs/book/stacks-and-components/component-guide/orchestrators/skypilot-vm.md @@ -384,7 +384,11 @@ One of the key features of the SkyPilot VM Orchestrator is the ability to run ea The SkyPilot VM Orchestrator allows you to configure resources for each step individually. This means you can specify different VM types, CPU and memory requirements, and even use spot instances for certain steps while using on-demand instances for others. -In order to enable this, you will need to update your orchestrator configuration to use `configure_step_resources=True` or create a new orchestrator with this setting enabled. This setting allows the orchestrator to configure resources for each step individually. Once your orchestrator is configured to allow step-specific resources, you can pass a `SkypilotBaseOrchestratorSettings` object to the `settings` parameter of the `@step` decorator. This object allows you to define various attributes such as `instance_type`, `cpus`, `memory`, `use_spot`, `region`, and more. +By default, the orchestrator will use the resources specified in the orchestrator settings for each step and make sure that the VMs are provisioned with the appropriate resources. However, you can disable this behavior by setting the `disable_step_based_settings` parameter to `True` in the orchestrator configuration. You can do this using the following command: + +```shell +zenml orchestrator update --disable_step_based_settings=True +``` Here's an example of how to configure specific resources for a step for the AWS cloud: diff --git a/src/zenml/integrations/skypilot/flavors/skypilot_orchestrator_base_vm_config.py b/src/zenml/integrations/skypilot/flavors/skypilot_orchestrator_base_vm_config.py index c30c567f268..cdc4b83cd94 100644 --- a/src/zenml/integrations/skypilot/flavors/skypilot_orchestrator_base_vm_config.py +++ b/src/zenml/integrations/skypilot/flavors/skypilot_orchestrator_base_vm_config.py @@ -116,14 +116,13 @@ class SkypilotBaseOrchestratorConfig( # type: ignore[misc] # https://github.com """Skypilot orchestrator base config. Attributes: - configure_step_resources: Enables the orchestrator to run configured steps. - This will be used to determine whether to run the entire pipeline - in one single VM or to run each step in a separate VM if the - orchestrator is configured to run steps separately with some custom - resources. + disable_step_based_settings: whether to disable step-based settings. + If True, the orchestrator will run all steps with the pipeline + settings in one single VM. If False, the orchestrator will run + each step with its own settings in separate VMs if provided. """ - configure_step_resources: bool = False + disable_step_based_settings: bool = False @property def is_local(self) -> bool: diff --git a/src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py b/src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py index 98a70ef9367..40a225fcfcd 100644 --- a/src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py +++ b/src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py @@ -200,20 +200,39 @@ def prepare_or_run_pipeline( deployment=deployment, step_name=pipeline_step_name ) - if self.config.configure_step_resources: + if self.config.disable_step_based_settings: + # Run the entire pipeline in one VM + command = PipelineEntrypointConfiguration.get_entrypoint_command() + args = PipelineEntrypointConfiguration.get_entrypoint_arguments( + deployment_id=deployment.id + ) + else: + for step_name, step in deployment.step_configurations.items(): + step_settings = cast( + SkypilotBaseOrchestratorSettings, + self.get_settings(step), + ) + if step_settings != settings: + logger.info( + "At least one step has different settings than the " + "pipeline. The step with different settings will be " + "run in a separate VM.\n" + "You can configure the orchestrator to disable this " + "behavior by updating the `disable_step_based_settings` " + "in your orchestrator configuration." + "By running the following command: " + "`zenml orchestrator update --disable-step-based-settings=True`" + ) + break # Run each step in a separate VM if configured. # Build entrypoint command and args for the orchestrator VM. - # This will internally also build the command/args for all step pods. + # This will internally also build the command/args for all step VMs. command = SkypilotOrchestratorEntrypointConfiguration.get_entrypoint_command() args = SkypilotOrchestratorEntrypointConfiguration.get_entrypoint_arguments( run_name=orchestrator_run_name, deployment_id=deployment.id, ) - else: - command = PipelineEntrypointConfiguration.get_entrypoint_command() - args = PipelineEntrypointConfiguration.get_entrypoint_arguments( - deployment_id=deployment.id - ) + entrypoint_str = " ".join(command) arguments_str = " ".join(args) From 7286397173af87d12c58b70e11cee018b8cb83e4 Mon Sep 17 00:00:00 2001 From: Safoine El Khabich <34200873+safoinme@users.noreply.github.com> Date: Mon, 15 Jan 2024 09:11:11 +0100 Subject: [PATCH 3/5] Update src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py Co-authored-by: Alex Strick van Linschoten --- .../skypilot/orchestrators/skypilot_base_vm_orchestrator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py b/src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py index 40a225fcfcd..929db2d47a5 100644 --- a/src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py +++ b/src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py @@ -219,8 +219,8 @@ def prepare_or_run_pipeline( "run in a separate VM.\n" "You can configure the orchestrator to disable this " "behavior by updating the `disable_step_based_settings` " - "in your orchestrator configuration." - "By running the following command: " + "in your orchestrator configuration " + "by running the following command: " "`zenml orchestrator update --disable-step-based-settings=True`" ) break From 541ba0faac37b706acbf46e4360de4a9e96666d0 Mon Sep 17 00:00:00 2001 From: Safoine El Khabich <34200873+safoinme@users.noreply.github.com> Date: Mon, 15 Jan 2024 10:21:33 +0100 Subject: [PATCH 4/5] Refactor SkypilotBaseOrchestrator to support step-based VM configuration --- .../skypilot_base_vm_orchestrator.py | 40 +++++++++++++------ 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py b/src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py index 40a225fcfcd..42757b3ca80 100644 --- a/src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py +++ b/src/zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py @@ -17,6 +17,7 @@ import re from abc import abstractmethod from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, cast +from uuid import uuid4 import sky @@ -178,6 +179,12 @@ def prepare_or_run_pipeline( "and the pipeline will be run immediately." ) + # Set up some variables for configuration + orchestrator_run_id = str(uuid4()) + environment[ + ENV_ZENML_SKYPILOT_ORCHESTRATOR_RUN_ID + ] = orchestrator_run_id + settings = cast( SkypilotBaseOrchestratorSettings, self.get_settings(deployment), @@ -200,38 +207,45 @@ def prepare_or_run_pipeline( deployment=deployment, step_name=pipeline_step_name ) - if self.config.disable_step_based_settings: - # Run the entire pipeline in one VM - command = PipelineEntrypointConfiguration.get_entrypoint_command() - args = PipelineEntrypointConfiguration.get_entrypoint_arguments( - deployment_id=deployment.id - ) - else: - for step_name, step in deployment.step_configurations.items(): + different_settings_found = False + + if not self.config.disable_step_based_settings: + for _, step in deployment.step_configurations.items(): step_settings = cast( SkypilotBaseOrchestratorSettings, self.get_settings(step), ) if step_settings != settings: + different_settings_found = True logger.info( "At least one step has different settings than the " "pipeline. The step with different settings will be " "run in a separate VM.\n" "You can configure the orchestrator to disable this " "behavior by updating the `disable_step_based_settings` " - "in your orchestrator configuration." - "By running the following command: " + "in your orchestrator configuration " + "by running the following command: " "`zenml orchestrator update --disable-step-based-settings=True`" ) break - # Run each step in a separate VM if configured. - # Build entrypoint command and args for the orchestrator VM. - # This will internally also build the command/args for all step VMs. + + # Decide which configuration to use based on whether different settings were found + if ( + not self.config.disable_step_based_settings + and different_settings_found + ): + # Run each step in a separate VM using SkypilotOrchestratorEntrypointConfiguration command = SkypilotOrchestratorEntrypointConfiguration.get_entrypoint_command() args = SkypilotOrchestratorEntrypointConfiguration.get_entrypoint_arguments( run_name=orchestrator_run_name, deployment_id=deployment.id, ) + else: + # Run the entire pipeline in one VM using PipelineEntrypointConfiguration + command = PipelineEntrypointConfiguration.get_entrypoint_command() + args = PipelineEntrypointConfiguration.get_entrypoint_arguments( + deployment_id=deployment.id + ) entrypoint_str = " ".join(command) arguments_str = " ".join(args) From 31dcedf96ed87da59b37d4b787c573379dc7ecc6 Mon Sep 17 00:00:00 2001 From: Safoine El Khabich <34200873+safoinme@users.noreply.github.com> Date: Mon, 15 Jan 2024 10:32:31 +0100 Subject: [PATCH 5/5] Update docs/book/stacks-and-components/component-guide/orchestrators/skypilot-vm.md Co-authored-by: Hamza Tahir --- .../component-guide/orchestrators/skypilot-vm.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/book/stacks-and-components/component-guide/orchestrators/skypilot-vm.md b/docs/book/stacks-and-components/component-guide/orchestrators/skypilot-vm.md index 936772d7214..f9b5b9210da 100644 --- a/docs/book/stacks-and-components/component-guide/orchestrators/skypilot-vm.md +++ b/docs/book/stacks-and-components/component-guide/orchestrators/skypilot-vm.md @@ -384,7 +384,7 @@ One of the key features of the SkyPilot VM Orchestrator is the ability to run ea The SkyPilot VM Orchestrator allows you to configure resources for each step individually. This means you can specify different VM types, CPU and memory requirements, and even use spot instances for certain steps while using on-demand instances for others. -By default, the orchestrator will use the resources specified in the orchestrator settings for each step and make sure that the VMs are provisioned with the appropriate resources. However, you can disable this behavior by setting the `disable_step_based_settings` parameter to `True` in the orchestrator configuration. You can do this using the following command: +If no step-specific settings are specified, the orchestrator will use the resources specified in the orchestrator settings for each step and run the entire pipeline in one VM. If step-specific settings are specified, an orchestrator VM will be spun up first, which will subsequently spin out new VMs dependant on the step settings. You can disable this behavior by setting the `disable_step_based_settings` parameter to `True` in the orchestrator configuration, using the following command: ```shell zenml orchestrator update --disable_step_based_settings=True