Skip to content

Commit

Permalink
chore: all resources in values
Browse files Browse the repository at this point in the history
Signed-off-by: ThibaultFy <thibault.fouqueray@gmail.com>
  • Loading branch information
ThibaultFy committed Apr 25, 2024
1 parent e95b155 commit 8b9342a
Show file tree
Hide file tree
Showing 9 changed files with 61 additions and 39 deletions.
2 changes: 2 additions & 0 deletions backend/backend/settings/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@
"KANIKO_MIRROR": to_bool(os.environ.get("KANIKO_MIRROR", False)),
"KANIKO_IMAGE": os.environ.get("KANIKO_IMAGE"),
"KANIKO_DOCKER_CONFIG_SECRET_NAME": os.environ.get("KANIKO_DOCKER_CONFIG_SECRET_NAME"),
"KANIKO_RESOURCES": os.environ.get("KANIKO_RESOURCES"),
"COMPUTE_POD_STARTUP_TIMEOUT_SECONDS": int(os.environ.get("COMPUTE_POD_STARTUP_TIMEOUT_SECONDS", 300)),
"PRIVATE_CA_ENABLED": to_bool(os.environ.get("PRIVATE_CA_ENABLED")),
}
Expand All @@ -223,6 +224,7 @@
COMPUTE_POD_RUN_AS_GROUP = os.environ.get("COMPUTE_POD_RUN_AS_GROUP")
COMPUTE_POD_FS_GROUP = os.environ.get("COMPUTE_POD_FS_GROUP")
COMPUTE_POD_GKE_GPUS_LIMITS = int(os.environ.get("COMPUTE_POD_GKE_GPUS_LIMITS", 0))
COMPUTE_POD_RESOURCES = os.environ.get("COMPUTE_POD_RESOURCES")

# Prometheus configuration
ENABLE_METRICS = to_bool(os.environ.get("ENABLE_METRICS", False))
Expand Down
5 changes: 3 additions & 2 deletions backend/builder/image_builder/image_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from substrapp.compute_tasks.volumes import get_worker_subtuple_pvc_name
from substrapp.docker_registry import USER_IMAGE_REPOSITORY
from substrapp.kubernetes_utils import delete_pod
from substrapp.kubernetes_utils import get_resources_requirements
from substrapp.kubernetes_utils import get_resources_requirements_from_yaml
from substrapp.kubernetes_utils import get_security_context
from substrapp.lock_local import lock_resource
from substrapp.utils import timeit
Expand All @@ -43,6 +43,7 @@
IMAGE_BUILD_TIMEOUT = settings.IMAGE_BUILD_TIMEOUT
KANIKO_CONTAINER_NAME = "kaniko"
HOSTNAME = settings.HOSTNAME
KANIKO_RESOURCES = settings.KANIKO_RESOURCES


def container_image_tag_from_function(function: orchestrator.Function) -> str:
Expand Down Expand Up @@ -307,7 +308,7 @@ def _build_container(dockerfile_mount_path: str, image_tag: str) -> kubernetes.c
args=args,
volume_mounts=volume_mounts,
security_context=container_security_context,
resources=get_resources_requirements(cpu_request="1000m", memory_request="4Gi", memory_limit="32Gi"),
resources=get_resources_requirements_from_yaml(yaml_resources=KANIKO_RESOURCES),
)


Expand Down
5 changes: 3 additions & 2 deletions backend/substrapp/compute_tasks/compute_pod.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@

from substrapp.kubernetes_utils import delete_pod
from substrapp.kubernetes_utils import get_pod_security_context
from substrapp.kubernetes_utils import get_resources_requirements
from substrapp.kubernetes_utils import get_resources_requirements_from_yaml
from substrapp.kubernetes_utils import get_security_context

NAMESPACE = settings.NAMESPACE
COMPUTE_POD_RESOURCES = settings.COMPUTE_POD_RESOURCES
logger = structlog.get_logger(__name__)


Expand Down Expand Up @@ -113,7 +114,7 @@ def create_pod(
args=None,
volume_mounts=volume_mounts + gpu_volume_mounts,
security_context=get_security_context(),
resources=get_resources_requirements(cpu_request="1000m", memory_request="1Gi", memory_limit="64Gi"),
resources=get_resources_requirements_from_yaml(yaml_resources=COMPUTE_POD_RESOURCES),
env=[kubernetes.client.V1EnvVar(name=env_name, value=env_value) for env_name, env_value in environment.items()],
**container_optional_kwargs,
)
Expand Down
9 changes: 6 additions & 3 deletions backend/substrapp/kubernetes_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import kubernetes
import yaml
import structlog
from django.conf import settings

Expand Down Expand Up @@ -47,11 +48,13 @@ def get_security_context(root: bool = False, capabilities: list[str] = None) ->
return security_context


def get_resources_requirements(
*, cpu_request: str = "1000m", memory_request: str = "200M", memory_limit: str = "2G"
def get_resources_requirements_from_yaml(
*,
yaml_resources: str,
) -> kubernetes.client.V1ResourceRequirements:
resources_dict = yaml.load(yaml_resources, Loader=yaml.FullLoader)
return kubernetes.client.V1ResourceRequirements(
requests={"cpu": cpu_request, "memory": memory_request}, limits={"memory": memory_limit}
requests=resources_dict["requests"], limits=resources_dict["limits"]
)


Expand Down
41 changes: 22 additions & 19 deletions charts/substra-backend/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ spec:
- name: kaniko
image: {{ include "common.images.name" $.Values.kaniko.image }}
resources:
{{- toYaml $.Values.registryPrepopulate.kaniko.resources | nindent 12 }}
{{- toYaml $.Values.kaniko.resources | nindent 12 }}
args:
- "--context=/docker-context"
{{- if .dstImage }}
Expand Down
2 changes: 2 additions & 0 deletions charts/substra-backend/templates/statefulset-builder.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ spec:
value: {{ .Values.kaniko.dockerConfigSecretName | quote }}
- name: OBJECTSTORE_URL
value: {{ include "substra-backend.objectStore.url" . | quote }}
- name: KANIKO_RESOURCES
value: {{ toYaml .Values.kaniko.resources | quote }}
ports:
- name: http
containerPort: 8000
Expand Down
2 changes: 2 additions & 0 deletions charts/substra-backend/templates/statefulset-worker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ spec:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: COMPUTE_POD_RESOURCES
value: {{ toYaml .Values.worker.computePod.resources | quote }}
- name: COMPUTE_POD_MAX_STARTUP_WAIT_SECONDS
value: {{ .Values.worker.computePod.maxStartupWaitSeconds | quote }}
- name: OBJECTSTORE_URL
Expand Down
32 changes: 20 additions & 12 deletions charts/substra-backend/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,16 @@ worker:
fsGroup: 1001
runAsUser: 1001
runAsGroup: 1001
## @param worker.computePod.resources.requests.cpu Worker compute pod container cpu request
## @param worker.computePod.resources.requests.memory Worker compute pod container memory request
## @param worker.computePod.resources.limits.memory Worker compute pod container memory limit
##
resources:
requests:
cpu: "500m"
memory: "512Mi"
limits:
memory: "64Gi"
events:
## @param worker.events.enabled Enable event service
##
Expand Down Expand Up @@ -577,7 +587,6 @@ builder:
limits:
cpu: "2000m"
memory: "8Gi"

## @param builder.nodeSelector Node labels for pod assignment
##
nodeSelector: { }
Expand Down Expand Up @@ -749,6 +758,16 @@ kaniko:
registry: gcr.io
repository: kaniko-project/executor
tag: v1.8.1
## @param kaniko.resources.requests.cpu Kaniko container cpu request
## @param kaniko.resources.requests.memory Kaniko container memory request
## @param kaniko.resources.limits.memory Kaniko container memory limit
##
resources:
requests:
cpu: "500m"
memory: "256Mi"
limits:
memory: "32Gi"
## @param kaniko.mirror If set to `true` pull base images from the local registry.
##
mirror: false
Expand Down Expand Up @@ -814,17 +833,6 @@ registryPrepopulate:
cpu: "100m"
limits:
memory: "400Mi"
## @param registryPrepopulate.kaniko.resources.requests.cpu Kaniko container cpu request
## @param registryPrepopulate.kaniko.resources.requests.memory Kaniko container memory request
## @param registryPrepopulate.kaniko.resources.limits.memory Kaniko container memory limit
##
kaniko:
resources:
requests:
memory: "256Mi"
cpu: "500m"
limits:
memory: "8Gi"
## @param registryPrepopulate.pause.resources.requests.cpu Pause container cpu request
## @param registryPrepopulate.pause.resources.requests.memory Pause container memory request
## @param registryPrepopulate.pause.resources.limits.memory Pause container memory limit
Expand Down

0 comments on commit 8b9342a

Please sign in to comment.