Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix speculative execution compatibility with coloring #2995

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion mars/deploy/oscar/base_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ scheduling:
scheduler_backlog_timeout: 60
worker_idle_timeout: 120
speculation:
# Enables (yes) or disables (no) speculative execution of subtasks
# Enables (yes) or disables (no) speculative execution of subtasks.
# If enabled, `initial_same_color_num` will be set to 1 to ensure enough homogeneous subtasks to
# calculate statistics
enabled: no
# Don't submit subtasks actually for slow subtasks
dry: no
Expand Down
3 changes: 0 additions & 3 deletions mars/deploy/oscar/tests/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,9 +772,6 @@ async def _exec():
@pytest.fixture
async def speculative_cluster():
config = _load_config()
# coloring based fusion will make subtask too heterogeneous such that the speculative scheduler can't
# get enough homogeneous subtasks to calculate statistics
config["task"]["default_config"]["fuse_enabled"] = False
config["scheduling"]["speculation"]["enabled"] = True
config["scheduling"]["speculation"]["interval"] = 0.5
config["scheduling"]["speculation"]["threshold"] = 0.2
Expand Down
3 changes: 0 additions & 3 deletions mars/deploy/oscar/tests/test_ray_scheduling.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,6 @@ async def speculative_cluster():
worker_mem=512 * 1024**2,
supervisor_mem=100 * 1024**2,
config={
# coloring based fusion will make subtask too heterogeneous such that the speculative scheduler can't
# get enough homogeneous subtasks to calculate statistics
"task": {"default_config": {"fuse_enabled": False}},
"scheduling": {
"speculation": {
"enabled": True,
Expand Down
14 changes: 12 additions & 2 deletions mars/deploy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import asyncio
import os
import time
import warnings
from typing import Callable, Dict, List, Union, TextIO

import yaml
Expand Down Expand Up @@ -150,10 +151,19 @@ def load_config(config: Union[str, Dict], default_config_file: str):
# use default config
if isinstance(config, str):
filename = config
return load_service_config_file(filename)
config = load_service_config_file(filename)
else:
full_config = load_service_config_file(default_config_file)
return _merge_config(full_config, config)
config = _merge_config(full_config, config)
if config["scheduling"]["speculation"]["enabled"] is True:
# if `initial_same_color_num` > 1, coloring based fusion will make subtask too heterogeneous such that
# the speculative scheduler can't get enough homogeneous subtasks to calculate statistics
warnings.warn(
"speculative execution is enabled, set initial_same_color_num to 1 to "
"ensure enough homogeneous subtasks to calculate statistics."
)
config["task"]["default_config"]["initial_same_color_num"] = 1
return config


async def wait_all_supervisors_ready(endpoint):
Expand Down