Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🚑️ tuning down scheduler for dynamic sidecars #3025

Merged
merged 13 commits into from
May 10, 2022
4 changes: 1 addition & 3 deletions .env-devel
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,14 @@ CATALOG_DEV_FEATURES_ENABLED=0
DASK_SCHEDULER_HOST=dask-scheduler
DASK_SCHEDULER_PORT=8786


DYNAMIC_SIDECAR_IMAGE=${DOCKER_REGISTRY:-itisfoundation}/dynamic-sidecar:${DOCKER_IMAGE_TAG:-latest}

DIRECTOR_REGISTRY_CACHING_TTL=900
DIRECTOR_REGISTRY_CACHING=True

COMPUTATIONAL_BACKEND_DEFAULT_CLUSTER_URL=tcp://dask-scheduler:8786
DIRECTOR_V2_DEV_FEATURES_ENABLED=0

DYNAMIC_SIDECAR_IMAGE=${DOCKER_REGISTRY:-itisfoundation}/dynamic-sidecar:${DOCKER_IMAGE_TAG:-latest}
DYNAMIC_SIDECAR_LOG_LEVEL=DEBUG

FUNCTION_SERVICES_AUTHORS='{"UN": {"name": "Unknown", "email": "unknown@osparc.io", "affiliation": "unknown"}}'

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ class DynamicSidecarProxySettings(BaseCustomSettings):


class DynamicSidecarSettings(BaseCustomSettings):
DYNAMIC_SIDECAR_LOG_LEVEL: str = Field(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MINOR: ..._LOG_LEVEL: Literal["DEBUG", "WARNING", "INFO", "ERROR"] = ...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apparently the literal is not working. Something about inheritance, it cannot be subclassed. As an alternative I've added a validator.

"WARNING", description="log level of the dynamic sidecar"
)
SC_BOOT_MODE: BootModeEnum = Field(
BootModeEnum.PRODUCTION,
description="Used to compute where or not should start sidecar in development mode",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ def update_ok_status(self, info: str) -> None:
self._update(DynamicSidecarStatus.OK, info)

def update_failing_status(self, info: str) -> None:
logger.error(info)
self._update(DynamicSidecarStatus.FAILING, info)

def __eq__(self, other: "Status") -> bool:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ def _inject_proxy_network_configuration(
target_container_spec = service_spec["services"][target_container]
container_networks = target_container_spec.get("networks", [])
container_networks.append(dynamic_sidecar_network_name)
target_container_spec["networks"] = container_networks
# avoid duplicate entries, this is important when the dynamic-sidecar
# fails to run docker-compose up, otherwise it will
# continue adding lots of entries to this list
target_container_spec["networks"] = list(set(container_networks))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question, can't it just be a set instead of a list?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it has to be a list inside the yaml that docker-compose up receives.



class _environment_section:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def _get_environment_variables(
state_exclude = scheduler_data.paths_mapping.state_exclude

return {
"LOG_LEVEL": app_settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR.DYNAMIC_SIDECAR_LOG_LEVEL,
"SIMCORE_HOST_NAME": scheduler_data.service_name,
"DYNAMIC_SIDECAR_COMPOSE_NAMESPACE": compose_namespace,
"DY_SIDECAR_PATH_INPUTS": f"{scheduler_data.paths_mapping.inputs_path}",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ async def _apply_observation_cycle(
dynamic_sidecar_settings=dynamic_services_settings.DYNAMIC_SIDECAR,
)
):
# NOTE: once marked for removal the observation cycle needs
# to continue in order for the service to be removed
logger.warning(
"Removing service %s from observation", scheduler_data.service_name
)
Expand Down Expand Up @@ -168,7 +170,6 @@ async def mark_service_for_removal(
)
await update_scheduler_data_label(current.scheduler_data)

self._enqueue_observation_from_service_name(service_name)
logger.debug("Service '%s' marked for removal from scheduler", service_name)

async def finish_service_removal(self, node_uuid: NodeID) -> None:
Expand Down Expand Up @@ -399,7 +400,7 @@ async def observing_single_service(service_name: str) -> None:
# fire and forget about the task
asyncio.create_task(
observing_single_service(service_name),
name=f"observe {service_name}",
name=f"observe_{service_name}",
)

logger.info("Scheduler 'trigger observation queue task' was shut down")
Expand All @@ -417,13 +418,15 @@ async def _run_scheduler_task(self) -> None:
async with self._lock:
for service_name in self._to_observe:
self._enqueue_observation_from_service_name(service_name)

await sleep(settings.DIRECTOR_V2_DYNAMIC_SCHEDULER_INTERVAL_SECONDS)
except asyncio.CancelledError: # pragma: no cover
logger.info("Stopped dynamic scheduler")
raise
except Exception: # pylint: disable=broad-except
logger.error("Unexpected error in dynamic scheduler", exc_info=True)
logger.exception(
"Unexpected error while scheduling sidecars observation"
)

await sleep(settings.DIRECTOR_V2_DYNAMIC_SCHEDULER_INTERVAL_SECONDS)

async def _discover_running_services(self) -> None:
"""discover all services which were started before and add them to the scheduler"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ def mock_env(

monkeypatch.setenv("SC_BOOT_MODE", "production")
monkeypatch.setenv("DYNAMIC_SIDECAR_EXPOSE_PORT", "true")
monkeypatch.setenv("DYNAMIC_SIDECAR_LOG_LEVEL", "DEBUG")
monkeypatch.setenv("PROXY_EXPOSE_PORT", "true")
monkeypatch.setenv("SIMCORE_SERVICES_NETWORK_NAME", network_name)
monkeypatch.delenv("DYNAMIC_SIDECAR_MOUNT_PATH_DEV", raising=False)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
}

EXPECTED_DYNAMIC_SIDECAR_ENV_VAR_NAMES = {
"LOG_LEVEL",
"DY_SIDECAR_NODE_ID",
"DY_SIDECAR_PATH_INPUTS",
"DY_SIDECAR_PATH_OUTPUTS",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class DynamicSidecarSettings(BaseCustomSettings):
)

# LOGGING
LOG_LEVEL: str = Field("DEBUG")
LOG_LEVEL: str = Field("WARNING")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍


@validator("LOG_LEVEL")
@classmethod
Expand Down