Skip to content

Commit

Permalink
Fix speculative execution compatibility with coloring (#2995)
Browse files Browse the repository at this point in the history
  • Loading branch information
chaokunyang authored May 6, 2022
1 parent 5b48d7e commit 940f2c3
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 9 deletions.
4 changes: 3 additions & 1 deletion mars/deploy/oscar/base_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ scheduling:
scheduler_backlog_timeout: 60
worker_idle_timeout: 120
speculation:
# Enables (yes) or disables (no) speculative execution of subtasks
# Enables (yes) or disables (no) speculative execution of subtasks.
# If enabled, `initial_same_color_num` will be set to 1 to ensure enough homogeneous subtasks to
# calculate statistics
enabled: no
# Don't submit subtasks actually for slow subtasks
dry: no
Expand Down
3 changes: 0 additions & 3 deletions mars/deploy/oscar/tests/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,9 +817,6 @@ async def _exec():
@pytest.fixture
async def speculative_cluster():
config = _load_config()
# coloring based fusion will make subtask too heterogeneous such that the speculative scheduler can't
# get enough homogeneous subtasks to calculate statistics
config["task"]["default_config"]["fuse_enabled"] = False
config["scheduling"]["speculation"]["enabled"] = True
config["scheduling"]["speculation"]["interval"] = 0.5
config["scheduling"]["speculation"]["threshold"] = 0.2
Expand Down
3 changes: 0 additions & 3 deletions mars/deploy/oscar/tests/test_ray_scheduling.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,6 @@ async def speculative_cluster():
worker_mem=512 * 1024**2,
supervisor_mem=100 * 1024**2,
config={
# coloring based fusion will make subtask too heterogeneous such that the speculative scheduler can't
# get enough homogeneous subtasks to calculate statistics
"task": {"default_config": {"fuse_enabled": False}},
"scheduling": {
"speculation": {
"enabled": True,
Expand Down
14 changes: 12 additions & 2 deletions mars/deploy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import asyncio
import os
import time
import warnings
from typing import Callable, Dict, List, Union, TextIO

import yaml
Expand Down Expand Up @@ -150,10 +151,19 @@ def load_config(config: Union[str, Dict], default_config_file: str):
# use default config
if isinstance(config, str):
filename = config
return load_service_config_file(filename)
config = load_service_config_file(filename)
else:
full_config = load_service_config_file(default_config_file)
return _merge_config(full_config, config)
config = _merge_config(full_config, config)
if config["scheduling"]["speculation"]["enabled"] is True:
# if `initial_same_color_num` > 1, coloring based fusion will make subtask too heterogeneous such that
# the speculative scheduler can't get enough homogeneous subtasks to calculate statistics
warnings.warn(
"speculative execution is enabled, set initial_same_color_num to 1 to "
"ensure enough homogeneous subtasks to calculate statistics."
)
config["task"]["default_config"]["initial_same_color_num"] = 1
return config


async def wait_all_supervisors_ready(endpoint):
Expand Down

0 comments on commit 940f2c3

Please sign in to comment.