From d6a0a5d37367a3ecc45685f919233ea1339345a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Barth=C3=A9s?= Date: Mon, 17 Jun 2024 17:42:16 +0200 Subject: [PATCH 01/22] feat: create env var `POD_NAME` and `COMPUTE_POD_AFFINITY` to replace dynamically generated pod affinity for compute pod MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Guilhem Barthés --- .../templates/statefulset-worker.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/charts/substra-backend/templates/statefulset-worker.yaml b/charts/substra-backend/templates/statefulset-worker.yaml index 58ec341df..1a4d475fb 100644 --- a/charts/substra-backend/templates/statefulset-worker.yaml +++ b/charts/substra-backend/templates/statefulset-worker.yaml @@ -131,6 +131,22 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: COMPUTE_POD_AFFINITY + value: |- + "affinity: + podAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: statefulset.kubernetes.io/pod-name + operator: In + values: + - $(POD_NAME) + topologyKey: kubernetes.io/hostname" - name: COMPUTE_POD_RESOURCES value: {{ toYaml .Values.worker.computePod.resources | quote }} - name: COMPUTE_POD_MAX_STARTUP_WAIT_SECONDS From 20bb3e1e50c603a75aca6de2a652144da4bfe7f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Barth=C3=A9s?= Date: Mon, 17 Jun 2024 17:48:06 +0200 Subject: [PATCH 02/22] wip: use affinity from env var MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Guilhem Barthés --- .../substrapp/compute_tasks/compute_pod.py | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/backend/substrapp/compute_tasks/compute_pod.py b/backend/substrapp/compute_tasks/compute_pod.py index 446448b76..3048f4015 100644 --- a/backend/substrapp/compute_tasks/compute_pod.py +++ b/backend/substrapp/compute_tasks/compute_pod.py @@ -1,3 +1,4 @@ +import json import os import kubernetes @@ -120,22 +121,6 @@ def create_pod( **container_optional_kwargs, ) - pod_affinity = kubernetes.client.V1Affinity( - pod_affinity=kubernetes.client.V1PodAffinity( - required_during_scheduling_ignored_during_execution=[ - kubernetes.client.V1PodAffinityTerm( - label_selector=kubernetes.client.V1LabelSelector( - match_expressions=[ - kubernetes.client.V1LabelSelectorRequirement( - key="statefulset.kubernetes.io/pod-name", operator="In", values=[os.getenv("HOSTNAME")] - ) - ] - ), - topology_key="kubernetes.io/hostname", - ) - ] - ) - ) image_pull_secret = os.getenv("DOCKER_CONFIG_SECRET_NAME") if image_pull_secret: @@ -144,7 +129,7 @@ def create_pod( image_pull_secrets = None spec = kubernetes.client.V1PodSpec( restart_policy="Never", - affinity=pod_affinity, + affinity=json.loads(os.getenv("COMPUTE_POD_AFFINITY")), containers=[container_compute], volumes=volumes + gpu_volume, security_context=get_pod_security_context(), From 823f6f83b994d5ea350385f70aba926552cba53f Mon Sep 17 00:00:00 2001 From: ThibaultFy Date: Thu, 20 Jun 2024 17:01:45 +0200 Subject: [PATCH 03/22] chore: affinite and access mode to values Signed-off-by: ThibaultFy --- .../templates/statefulset-worker.yaml | 16 +++------------- charts/substra-backend/values.yaml | 18 ++++++++++++++++-- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/charts/substra-backend/templates/statefulset-worker.yaml b/charts/substra-backend/templates/statefulset-worker.yaml index 1a4d475fb..f1e5d67c9 100644 --- a/charts/substra-backend/templates/statefulset-worker.yaml +++ b/charts/substra-backend/templates/statefulset-worker.yaml @@ -127,7 +127,7 @@ spec: valueFrom: fieldRef: fieldPath: metadata.namespace - - name: NODE_NAME + - name: NODE_NAME # TODO see if I can remove valueFrom: fieldRef: fieldPath: spec.nodeName @@ -136,17 +136,7 @@ spec: fieldRef: fieldPath: metadata.name - name: COMPUTE_POD_AFFINITY - value: |- - "affinity: - podAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: statefulset.kubernetes.io/pod-name - operator: In - values: - - $(POD_NAME) - topologyKey: kubernetes.io/hostname" + value: {{ .Values.worker.computePod.affinity | quote }} # TODO see if it works - name: COMPUTE_POD_RESOURCES value: {{ toYaml .Values.worker.computePod.resources | quote }} - name: COMPUTE_POD_MAX_STARTUP_WAIT_SECONDS @@ -247,7 +237,7 @@ spec: - metadata: name: subtuple spec: - accessModes: [ "ReadWriteOnce" ] + accessModes: [ {{ .Values.worker.volumeAccessMode }} ] {{ include "common.storage.class" .Values.worker.persistence }} resources: requests: diff --git a/charts/substra-backend/values.yaml b/charts/substra-backend/values.yaml index 497a61d9e..8d6525a51 100644 --- a/charts/substra-backend/values.yaml +++ b/charts/substra-backend/values.yaml @@ -375,7 +375,19 @@ worker: cpu: "1000m" memory: "1Gi" limits: - memory: "64Gi" + memory: "64Gi"$ + ## @param worker.computePod.affinity Worker compute pod container affinity + ## + affinity: + podAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: statefulset.kubernetes.io/pod-name + operator: In + values: + - $(POD_NAME) + topologyKey: kubernetes.io/hostname" events: ## @param worker.events.enabled Enable event service ## @@ -435,7 +447,9 @@ worker: ## If not set and create is true, a name is generated using the substra.fullname template ## name: "" - + ## @param worker.volumeAccessMode Access mode for volume + ## + volumeAccessMode: "ReadWriteOnce" ## @section Substra periodic tasks worker settings ## schedulerWorker: From d633e52692f3d2b2757b1135aa03f27ae633bb01 Mon Sep 17 00:00:00 2001 From: ThibaultFy Date: Fri, 21 Jun 2024 11:00:09 +0200 Subject: [PATCH 04/22] chore: add node selector and tolerations for compute pod Signed-off-by: ThibaultFy --- backend/substrapp/compute_tasks/compute_pod.py | 2 ++ charts/substra-backend/templates/statefulset-worker.yaml | 4 ++++ charts/substra-backend/values.yaml | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/backend/substrapp/compute_tasks/compute_pod.py b/backend/substrapp/compute_tasks/compute_pod.py index 3048f4015..1deb1b367 100644 --- a/backend/substrapp/compute_tasks/compute_pod.py +++ b/backend/substrapp/compute_tasks/compute_pod.py @@ -130,6 +130,8 @@ def create_pod( spec = kubernetes.client.V1PodSpec( restart_policy="Never", affinity=json.loads(os.getenv("COMPUTE_POD_AFFINITY")), + node_selector=json.loads(os.getenv("COMPUTE_POD_NODE_SELECTOR")), + tolerations=json.loads(os.getenv("COMPUTE_POD_TOLERATIONS")), containers=[container_compute], volumes=volumes + gpu_volume, security_context=get_pod_security_context(), diff --git a/charts/substra-backend/templates/statefulset-worker.yaml b/charts/substra-backend/templates/statefulset-worker.yaml index f1e5d67c9..d3d7c1d51 100644 --- a/charts/substra-backend/templates/statefulset-worker.yaml +++ b/charts/substra-backend/templates/statefulset-worker.yaml @@ -137,6 +137,10 @@ spec: fieldPath: metadata.name - name: COMPUTE_POD_AFFINITY value: {{ .Values.worker.computePod.affinity | quote }} # TODO see if it works + - name: COMPUTE_POD_NODE_SELECTOR + value: {{ .Values.worker.computePod.nodeSelector | quote }} + - name: COMPUTE_POD_TOLERATIONS + value: {{ .Values.worker.computePod.tolerations | quote }} - name: COMPUTE_POD_RESOURCES value: {{ toYaml .Values.worker.computePod.resources | quote }} - name: COMPUTE_POD_MAX_STARTUP_WAIT_SECONDS diff --git a/charts/substra-backend/values.yaml b/charts/substra-backend/values.yaml index 8d6525a51..278366596 100644 --- a/charts/substra-backend/values.yaml +++ b/charts/substra-backend/values.yaml @@ -376,6 +376,12 @@ worker: memory: "1Gi" limits: memory: "64Gi"$ + ## @param worker.computePod.nodeSelector Node labels for pod assignment + ## + nodeSelector: {} + ## @param worker.computePod.tolerations Toleration labels for pod assignment + ## + tolerations: [] ## @param worker.computePod.affinity Worker compute pod container affinity ## affinity: From 54197a191d12578eedfde8d62b45e9c0c6897539 Mon Sep 17 00:00:00 2001 From: ThibaultFy Date: Fri, 21 Jun 2024 11:09:15 +0200 Subject: [PATCH 05/22] chore: typo yaml Signed-off-by: ThibaultFy --- .../templates/statefulset-worker.yaml | 4 ++-- charts/substra-backend/values.yaml | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/charts/substra-backend/templates/statefulset-worker.yaml b/charts/substra-backend/templates/statefulset-worker.yaml index d3d7c1d51..93b298411 100644 --- a/charts/substra-backend/templates/statefulset-worker.yaml +++ b/charts/substra-backend/templates/statefulset-worker.yaml @@ -127,7 +127,7 @@ spec: valueFrom: fieldRef: fieldPath: metadata.namespace - - name: NODE_NAME # TODO see if I can remove + - name: NODE_NAME valueFrom: fieldRef: fieldPath: spec.nodeName @@ -136,7 +136,7 @@ spec: fieldRef: fieldPath: metadata.name - name: COMPUTE_POD_AFFINITY - value: {{ .Values.worker.computePod.affinity | quote }} # TODO see if it works + value: {{ .Values.worker.computePod.affinity | quote }} # TODO see if it works to inject POD_NAME venv in string - name: COMPUTE_POD_NODE_SELECTOR value: {{ .Values.worker.computePod.nodeSelector | quote }} - name: COMPUTE_POD_TOLERATIONS diff --git a/charts/substra-backend/values.yaml b/charts/substra-backend/values.yaml index 278366596..e33d619fb 100644 --- a/charts/substra-backend/values.yaml +++ b/charts/substra-backend/values.yaml @@ -375,7 +375,7 @@ worker: cpu: "1000m" memory: "1Gi" limits: - memory: "64Gi"$ + memory: "64Gi" ## @param worker.computePod.nodeSelector Node labels for pod assignment ## nodeSelector: {} @@ -387,13 +387,13 @@ worker: affinity: podAffinity: requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: + - labelSelector: matchExpressions: - - key: statefulset.kubernetes.io/pod-name - operator: In - values: - - $(POD_NAME) - topologyKey: kubernetes.io/hostname" + - key: statefulset.kubernetes.io/pod-name + operator: In + values: + - $(POD_NAME) + topologyKey: kubernetes.io/hostname" events: ## @param worker.events.enabled Enable event service ## From 37112e5d6786260bec0a6080ff04b80eb1d9fc75 Mon Sep 17 00:00:00 2001 From: ThibaultFy Date: Mon, 24 Jun 2024 14:43:32 +0200 Subject: [PATCH 06/22] chore: toYaml Signed-off-by: ThibaultFy --- backend/substrapp/compute_tasks/compute_pod.py | 8 ++++---- .../templates/statefulset-worker.yaml | 6 +++--- charts/substra-backend/values.yaml | 17 +++++++++-------- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/backend/substrapp/compute_tasks/compute_pod.py b/backend/substrapp/compute_tasks/compute_pod.py index 1deb1b367..63d65444f 100644 --- a/backend/substrapp/compute_tasks/compute_pod.py +++ b/backend/substrapp/compute_tasks/compute_pod.py @@ -1,4 +1,4 @@ -import json +import yaml import os import kubernetes @@ -129,9 +129,9 @@ def create_pod( image_pull_secrets = None spec = kubernetes.client.V1PodSpec( restart_policy="Never", - affinity=json.loads(os.getenv("COMPUTE_POD_AFFINITY")), - node_selector=json.loads(os.getenv("COMPUTE_POD_NODE_SELECTOR")), - tolerations=json.loads(os.getenv("COMPUTE_POD_TOLERATIONS")), + affinity=yaml.safe_load(os.getenv("COMPUTE_POD_AFFINITY")), + node_selector=yaml.safe_load(os.getenv("COMPUTE_POD_NODE_SELECTOR")), + tolerations=yaml.safe_load(os.getenv("COMPUTE_POD_TOLERATIONS")), containers=[container_compute], volumes=volumes + gpu_volume, security_context=get_pod_security_context(), diff --git a/charts/substra-backend/templates/statefulset-worker.yaml b/charts/substra-backend/templates/statefulset-worker.yaml index 93b298411..3ff2fd836 100644 --- a/charts/substra-backend/templates/statefulset-worker.yaml +++ b/charts/substra-backend/templates/statefulset-worker.yaml @@ -136,11 +136,11 @@ spec: fieldRef: fieldPath: metadata.name - name: COMPUTE_POD_AFFINITY - value: {{ .Values.worker.computePod.affinity | quote }} # TODO see if it works to inject POD_NAME venv in string + value: {{ toYaml .Values.worker.computePod.affinity | quote }} # TODO see if it works to inject POD_NAME venv in string - name: COMPUTE_POD_NODE_SELECTOR - value: {{ .Values.worker.computePod.nodeSelector | quote }} + value: {{ toYaml .Values.worker.computePod.nodeSelector | quote }} - name: COMPUTE_POD_TOLERATIONS - value: {{ .Values.worker.computePod.tolerations | quote }} + value: {{ toYaml .Values.worker.computePod.tolerations | quote }} - name: COMPUTE_POD_RESOURCES value: {{ toYaml .Values.worker.computePod.resources | quote }} - name: COMPUTE_POD_MAX_STARTUP_WAIT_SECONDS diff --git a/charts/substra-backend/values.yaml b/charts/substra-backend/values.yaml index e33d619fb..70c145af1 100644 --- a/charts/substra-backend/values.yaml +++ b/charts/substra-backend/values.yaml @@ -382,18 +382,19 @@ worker: ## @param worker.computePod.tolerations Toleration labels for pod assignment ## tolerations: [] - ## @param worker.computePod.affinity Worker compute pod container affinity + ## @param worker.computePod.affinity Worker compute pod container affinity. Pass as a string in order to catch the generated pod name on the statefuset environment variable PODNAME. ## + ## '{"podAffinity": {"requiredDuringSchedulingIgnoredDuringExecution": {"topologyKey":"kubernetes.io/hostname", "labelSelector": {"matchExpressions":{"key":"statefulset.kubernetes.io/pod-name", "operator":"In","values":"$POD_NAME"}}}}}' affinity: podAffinity: requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: statefulset.kubernetes.io/pod-name - operator: In - values: - - $(POD_NAME) - topologyKey: kubernetes.io/hostname" + - labelSelector: + matchExpressions: + - key: statefulset.kubernetes.io/pod-name + operator: In + values: + - $(POD_NAME) + topologyKey: kubernetes.io/hostname events: ## @param worker.events.enabled Enable event service ## From abc8d42dc4bf8046bd580e84ccb353b61a4587c8 Mon Sep 17 00:00:00 2001 From: ThibaultFy Date: Mon, 24 Jun 2024 14:51:02 +0200 Subject: [PATCH 07/22] chore: toYaml Signed-off-by: ThibaultFy --- charts/substra-backend/values.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/charts/substra-backend/values.yaml b/charts/substra-backend/values.yaml index 70c145af1..7f7d2d520 100644 --- a/charts/substra-backend/values.yaml +++ b/charts/substra-backend/values.yaml @@ -384,7 +384,6 @@ worker: tolerations: [] ## @param worker.computePod.affinity Worker compute pod container affinity. Pass as a string in order to catch the generated pod name on the statefuset environment variable PODNAME. ## - ## '{"podAffinity": {"requiredDuringSchedulingIgnoredDuringExecution": {"topologyKey":"kubernetes.io/hostname", "labelSelector": {"matchExpressions":{"key":"statefulset.kubernetes.io/pod-name", "operator":"In","values":"$POD_NAME"}}}}}' affinity: podAffinity: requiredDuringSchedulingIgnoredDuringExecution: From 2918ab0fb7529ae41653cdbfe5e0f10158e278d3 Mon Sep 17 00:00:00 2001 From: ThibaultFy Date: Mon, 24 Jun 2024 15:45:16 +0200 Subject: [PATCH 08/22] chore: podname to hostname Signed-off-by: ThibaultFy --- charts/substra-backend/values.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/charts/substra-backend/values.yaml b/charts/substra-backend/values.yaml index 7f7d2d520..1b876afcf 100644 --- a/charts/substra-backend/values.yaml +++ b/charts/substra-backend/values.yaml @@ -283,7 +283,7 @@ server: ## honorLabels: false -## @section Substra worker settings +## @section Substra worker settings. Note that you can access the worker pod name using $(POD_NAME) and its node using $(NODE_NAME). ## worker: ## @param worker.enabled Enable worker service @@ -382,7 +382,7 @@ worker: ## @param worker.computePod.tolerations Toleration labels for pod assignment ## tolerations: [] - ## @param worker.computePod.affinity Worker compute pod container affinity. Pass as a string in order to catch the generated pod name on the statefuset environment variable PODNAME. + ## @param worker.computePod.affinity Worker compute pod container affinity. ## affinity: podAffinity: @@ -392,7 +392,7 @@ worker: - key: statefulset.kubernetes.io/pod-name operator: In values: - - $(POD_NAME) + - $(HOSTNAME) topologyKey: kubernetes.io/hostname events: ## @param worker.events.enabled Enable event service From 5deffc77d4b607daa87d414626d5fa645431a3c9 Mon Sep 17 00:00:00 2001 From: ThibaultFy <50656860+ThibaultFy@users.noreply.github.com> Date: Mon, 24 Jun 2024 18:02:33 +0200 Subject: [PATCH 09/22] Update charts/substra-backend/values.yaml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Guilhem Barthés Signed-off-by: ThibaultFy <50656860+ThibaultFy@users.noreply.github.com> --- charts/substra-backend/values.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/charts/substra-backend/values.yaml b/charts/substra-backend/values.yaml index 1b876afcf..50b34fa4c 100644 --- a/charts/substra-backend/values.yaml +++ b/charts/substra-backend/values.yaml @@ -388,11 +388,11 @@ worker: podAffinity: requiredDuringSchedulingIgnoredDuringExecution: - labelSelector: - matchExpressions: - - key: statefulset.kubernetes.io/pod-name - operator: In - values: - - $(HOSTNAME) + matchExpressions: + - key: statefulset.kubernetes.io/pod-name + operator: In + values: + - $(POD_NAME) topologyKey: kubernetes.io/hostname events: ## @param worker.events.enabled Enable event service From 2b073359e12aaf172bb85b6bb8bd8db733491217 Mon Sep 17 00:00:00 2001 From: ThibaultFy Date: Mon, 24 Jun 2024 18:03:45 +0200 Subject: [PATCH 10/22] chore: change access mode Signed-off-by: ThibaultFy --- charts/substra-backend/templates/statefulset-worker.yaml | 2 +- charts/substra-backend/values.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/charts/substra-backend/templates/statefulset-worker.yaml b/charts/substra-backend/templates/statefulset-worker.yaml index 3ff2fd836..a1f80dc57 100644 --- a/charts/substra-backend/templates/statefulset-worker.yaml +++ b/charts/substra-backend/templates/statefulset-worker.yaml @@ -241,7 +241,7 @@ spec: - metadata: name: subtuple spec: - accessModes: [ {{ .Values.worker.volumeAccessMode }} ] + accessModes: {{ .Values.worker.accessMode }} {{ include "common.storage.class" .Values.worker.persistence }} resources: requests: diff --git a/charts/substra-backend/values.yaml b/charts/substra-backend/values.yaml index 50b34fa4c..68dec0903 100644 --- a/charts/substra-backend/values.yaml +++ b/charts/substra-backend/values.yaml @@ -453,9 +453,9 @@ worker: ## If not set and create is true, a name is generated using the substra.fullname template ## name: "" - ## @param worker.volumeAccessMode Access mode for volume + ## @param worker.accessMode Access mode for volume ## - volumeAccessMode: "ReadWriteOnce" + accessMode: ["ReadWriteOnce"] ## @section Substra periodic tasks worker settings ## schedulerWorker: From ed3dc928a3a806c67a37d785247ed3956fd2d7ba Mon Sep 17 00:00:00 2001 From: ThibaultFy Date: Tue, 25 Jun 2024 09:59:27 +0200 Subject: [PATCH 11/22] chore: alpha release Signed-off-by: ThibaultFy --- backend/substrapp/compute_tasks/compute_pod.py | 2 +- charts/substra-backend/CHANGELOG.md | 5 +++++ charts/substra-backend/Chart.yaml | 4 ++-- charts/substra-backend/changes/935.changed | 1 + charts/substra-backend/templates/statefulset-worker.yaml | 2 +- charts/substra-backend/values.yaml | 5 ++++- 6 files changed, 14 insertions(+), 5 deletions(-) create mode 100644 charts/substra-backend/changes/935.changed diff --git a/backend/substrapp/compute_tasks/compute_pod.py b/backend/substrapp/compute_tasks/compute_pod.py index 63d65444f..2570fec97 100644 --- a/backend/substrapp/compute_tasks/compute_pod.py +++ b/backend/substrapp/compute_tasks/compute_pod.py @@ -1,8 +1,8 @@ -import yaml import os import kubernetes import structlog +import yaml from django.conf import settings from substrapp.kubernetes_utils import delete_pod diff --git a/charts/substra-backend/CHANGELOG.md b/charts/substra-backend/CHANGELOG.md index 972c19c49..11d810262 100644 --- a/charts/substra-backend/CHANGELOG.md +++ b/charts/substra-backend/CHANGELOG.md @@ -1,6 +1,11 @@ # Changelog +## [26.9.0] - 2024-07-22 + +# Added + +Configuration of compute pod `affinity`, `nodeSelector` and `toleration` on `values.yaml` file. ## [26.8.3] - 2024-07-16 diff --git a/charts/substra-backend/Chart.yaml b/charts/substra-backend/Chart.yaml index 9f1d3a581..2d6b094ef 100644 --- a/charts/substra-backend/Chart.yaml +++ b/charts/substra-backend/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v2 name: substra-backend home: https://github.com/Substra -version: 26.8.3 -appVersion: 0.47.0 +version: "26.9.0" +appVersion: "0.48.0" kubeVersion: ">= 1.19.0-0" description: Main package for Substra type: application diff --git a/charts/substra-backend/changes/935.changed b/charts/substra-backend/changes/935.changed new file mode 100644 index 000000000..649b6a46f --- /dev/null +++ b/charts/substra-backend/changes/935.changed @@ -0,0 +1 @@ +Compute pod `affinity`, `nodeSelector` and `tolerations` are now configured for environment variable defined in the `values.yaml` file. diff --git a/charts/substra-backend/templates/statefulset-worker.yaml b/charts/substra-backend/templates/statefulset-worker.yaml index a1f80dc57..5eff94c0f 100644 --- a/charts/substra-backend/templates/statefulset-worker.yaml +++ b/charts/substra-backend/templates/statefulset-worker.yaml @@ -136,7 +136,7 @@ spec: fieldRef: fieldPath: metadata.name - name: COMPUTE_POD_AFFINITY - value: {{ toYaml .Values.worker.computePod.affinity | quote }} # TODO see if it works to inject POD_NAME venv in string + value: {{ toYaml .Values.worker.computePod.affinity | quote }} - name: COMPUTE_POD_NODE_SELECTOR value: {{ toYaml .Values.worker.computePod.nodeSelector | quote }} - name: COMPUTE_POD_TOLERATIONS diff --git a/charts/substra-backend/values.yaml b/charts/substra-backend/values.yaml index 68dec0903..35046aaaf 100644 --- a/charts/substra-backend/values.yaml +++ b/charts/substra-backend/values.yaml @@ -382,7 +382,10 @@ worker: ## @param worker.computePod.tolerations Toleration labels for pod assignment ## tolerations: [] - ## @param worker.computePod.affinity Worker compute pod container affinity. + ## @param worker.computePod.affinity.podAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[0].key Pod affinity rule defnition. + ## @param worker.computePod.affinity.podAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[0].operator Pod affinity rule defnition. + ## @param worker.computePod.affinity.podAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[0].values Pod affinity rule defnition. + ## @param worker.computePod.affinity.podAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].topologyKey Pod affinity rule defnition. ## affinity: podAffinity: From f48c7263bd248773ca28cb3ab930899ce35d8a5c Mon Sep 17 00:00:00 2001 From: ThibaultFy Date: Tue, 25 Jun 2024 10:02:32 +0200 Subject: [PATCH 12/22] chore: chart doc Signed-off-by: ThibaultFy --- charts/substra-backend/README.md | 117 ++++++++++++++++--------------- 1 file changed, 62 insertions(+), 55 deletions(-) diff --git a/charts/substra-backend/README.md b/charts/substra-backend/README.md index cb93bc930..07e6e9474 100644 --- a/charts/substra-backend/README.md +++ b/charts/substra-backend/README.md @@ -118,61 +118,68 @@ See [UPGRADE.md](https://github.com/Substra/substra-backend/blob/main/charts/sub | `server.metrics.serviceMonitor.metricRelabelings` | MetricRelabelConfigs to apply to samples before insertion | `[]` | | `server.metrics.serviceMonitor.honorLabels` | Specify honorLabels parameter of the scrape endpoint | `false` | -### Substra worker settings - -| Name | Description | Value | -| ---------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------- | -| `worker.enabled` | Enable worker service | `true` | -| `worker.replicaCount` | Replica count for the worker service | `1` | -| `worker.concurrency` | Maximum amount of tasks to process in parallel | `1` | -| `worker.image.registry` | Substra backend worker image registry | `ghcr.io` | -| `worker.image.repository` | Substra backend worker image repository | `substra/substra-backend` | -| `worker.image.tag` | Substra backend worker image tag (defaults to AppVersion) | `nil` | -| `worker.image.pullPolicy` | Substra backend worker image pull policy | `IfNotPresent` | -| `worker.image.pullSecrets` | Specify image pull secrets | `[]` | -| `worker.podSecurityContext.enabled` | Enable security context | `true` | -| `worker.podSecurityContext.runAsUser` | User ID for the pod | `1001` | -| `worker.podSecurityContext.runAsGroup` | Group ID for the pod | `1001` | -| `worker.podSecurityContext.fsGroup` | FileSystem group ID for the pod | `1001` | -| `worker.resources.requests.cpu` | Worker container cpu request | `1000m` | -| `worker.resources.requests.memory` | Worker container memory request | `4Gi` | -| `worker.resources.limits.cpu` | Worker container cpu limit | `2000m` | -| `worker.resources.limits.memory` | Worker container memory limit | `8Gi` | -| `worker.nodeSelector` | Node labels for pod assignment | `{}` | -| `worker.tolerations` | Toleration labels for pod assignment | `[]` | -| `worker.affinity` | Affinity settings for pod assignment, ignored if `DataSampleStorageInServerMedia` is `true` | `{}` | -| `worker.rbac.create` | Create a role for the worker | `true` | -| `worker.serviceAccount.create` | Create a service account for the worker | `true` | -| `worker.serviceAccount.name` | The name of the ServiceAccount to use. If not set and create is true, a name is generated using the substra.fullname template | `""` | -| `worker.persistence.storageClass` | Specify the _StorageClass_ used to provision the volume. Or the default _StorageClass_ will be used. Set it to `-` to disable dynamic provisioning | `""` | -| `worker.persistence.size` | The size of the volume. The size of this volume should be sufficient to store many assets. | `10Gi` | -| `worker.computePod.maxStartupWaitSeconds` | Set the maximum amount of time we will wait for the compute pod to be ready | `300` | -| `worker.computePod.securityContext.fsGroup` | Set the filesystem group for the Compute pod | `1001` | -| `worker.computePod.securityContext.runAsUser` | Set the user for the Compute pod | `1001` | -| `worker.computePod.securityContext.runAsGroup` | Set the group for the Compute pod | `1001` | -| `worker.computePod.resources.requests.cpu` | Worker compute pod container cpu request | `1000m` | -| `worker.computePod.resources.requests.memory` | Worker compute pod container memory request | `1Gi` | -| `worker.computePod.resources.limits.memory` | Worker compute pod container memory limit | `64Gi` | -| `worker.events.enabled` | Enable event service | `true` | -| `worker.events.image.registry` | Substra event app image registry | `ghcr.io` | -| `worker.events.image.repository` | Substra event app image repository | `substra/substra-backend` | -| `worker.events.image.tag` | Substra event app image tag (defaults to AppVersion) | `nil` | -| `worker.events.image.pullPolicy` | Substra event app image pull policy | `IfNotPresent` | -| `worker.events.image.pullSecrets` | Specify image pull secrets | `[]` | -| `worker.events.resources.requests.cpu` | Worker events container cpu request | `500m` | -| `worker.events.resources.requests.memory` | Worker events container memory request | `200Mi` | -| `worker.events.resources.limits.cpu` | Worker events container cpu limit | `500m` | -| `worker.events.resources.limits.memory` | Worker events container memory limit | `400Mi` | -| `worker.events.podSecurityContext.enabled` | Enable security context | `true` | -| `worker.events.podSecurityContext.runAsUser` | User ID for the pod | `1001` | -| `worker.events.podSecurityContext.runAsGroup` | Group ID for the pod | `1001` | -| `worker.events.podSecurityContext.fsGroup` | FileSystem group ID for the pod | `1001` | -| `worker.events.nodeSelector` | Node labels for pod assignment | `{}` | -| `worker.events.tolerations` | Toleration labels for pod assignment | `[]` | -| `worker.events.affinity` | Affinity settings for pod assignment | `{}` | -| `worker.events.rbac.create` | Create a role and service account for the event app | `true` | -| `worker.events.serviceAccount.create` | Create a service account for the event app | `true` | -| `worker.events.serviceAccount.name` | The name of the ServiceAccount to use | `""` | +### Substra worker settings. Note that you can access the worker pod name using $(POD_NAME) and its node using $(NODE_NAME). + +| Name | Description | Value | +| ------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------ | +| `worker.enabled` | Enable worker service | `true` | +| `worker.replicaCount` | Replica count for the worker service | `1` | +| `worker.concurrency` | Maximum amount of tasks to process in parallel | `1` | +| `worker.image.registry` | Substra backend worker image registry | `ghcr.io` | +| `worker.image.repository` | Substra backend worker image repository | `substra/substra-backend` | +| `worker.image.tag` | Substra backend worker image tag (defaults to AppVersion) | `nil` | +| `worker.image.pullPolicy` | Substra backend worker image pull policy | `IfNotPresent` | +| `worker.image.pullSecrets` | Specify image pull secrets | `[]` | +| `worker.podSecurityContext.enabled` | Enable security context | `true` | +| `worker.podSecurityContext.runAsUser` | User ID for the pod | `1001` | +| `worker.podSecurityContext.runAsGroup` | Group ID for the pod | `1001` | +| `worker.podSecurityContext.fsGroup` | FileSystem group ID for the pod | `1001` | +| `worker.resources.requests.cpu` | Worker container cpu request | `1000m` | +| `worker.resources.requests.memory` | Worker container memory request | `4Gi` | +| `worker.resources.limits.cpu` | Worker container cpu limit | `2000m` | +| `worker.resources.limits.memory` | Worker container memory limit | `8Gi` | +| `worker.nodeSelector` | Node labels for pod assignment | `{}` | +| `worker.tolerations` | Toleration labels for pod assignment | `[]` | +| `worker.affinity` | Affinity settings for pod assignment, ignored if `DataSampleStorageInServerMedia` is `true` | `{}` | +| `worker.rbac.create` | Create a role for the worker | `true` | +| `worker.serviceAccount.create` | Create a service account for the worker | `true` | +| `worker.serviceAccount.name` | The name of the ServiceAccount to use. If not set and create is true, a name is generated using the substra.fullname template | `""` | +| `worker.persistence.storageClass` | Specify the _StorageClass_ used to provision the volume. Or the default _StorageClass_ will be used. Set it to `-` to disable dynamic provisioning | `""` | +| `worker.persistence.size` | The size of the volume. The size of this volume should be sufficient to store many assets. | `10Gi` | +| `worker.computePod.maxStartupWaitSeconds` | Set the maximum amount of time we will wait for the compute pod to be ready | `300` | +| `worker.computePod.securityContext.fsGroup` | Set the filesystem group for the Compute pod | `1001` | +| `worker.computePod.securityContext.runAsUser` | Set the user for the Compute pod | `1001` | +| `worker.computePod.securityContext.runAsGroup` | Set the group for the Compute pod | `1001` | +| `worker.computePod.resources.requests.cpu` | Worker compute pod container cpu request | `1000m` | +| `worker.computePod.resources.requests.memory` | Worker compute pod container memory request | `1Gi` | +| `worker.computePod.resources.limits.memory` | Worker compute pod container memory limit | `64Gi` | +| `worker.computePod.nodeSelector` | Node labels for pod assignment | `{}` | +| `worker.computePod.tolerations` | Toleration labels for pod assignment | `[]` | +| `worker.computePod.affinity.podAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[0].key` | Pod affinity rule defnition. | `statefulset.kubernetes.io/pod-name` | +| `worker.computePod.affinity.podAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[0].operator` | Pod affinity rule defnition. | `In` | +| `worker.computePod.affinity.podAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[0].values` | Pod affinity rule defnition. | `["$(POD_NAME)"]` | +| `worker.computePod.affinity.podAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].topologyKey` | Pod affinity rule defnition. | `kubernetes.io/hostname` | +| `worker.events.enabled` | Enable event service | `true` | +| `worker.events.image.registry` | Substra event app image registry | `ghcr.io` | +| `worker.events.image.repository` | Substra event app image repository | `substra/substra-backend` | +| `worker.events.image.tag` | Substra event app image tag (defaults to AppVersion) | `nil` | +| `worker.events.image.pullPolicy` | Substra event app image pull policy | `IfNotPresent` | +| `worker.events.image.pullSecrets` | Specify image pull secrets | `[]` | +| `worker.events.resources.requests.cpu` | Worker events container cpu request | `500m` | +| `worker.events.resources.requests.memory` | Worker events container memory request | `200Mi` | +| `worker.events.resources.limits.cpu` | Worker events container cpu limit | `500m` | +| `worker.events.resources.limits.memory` | Worker events container memory limit | `400Mi` | +| `worker.events.podSecurityContext.enabled` | Enable security context | `true` | +| `worker.events.podSecurityContext.runAsUser` | User ID for the pod | `1001` | +| `worker.events.podSecurityContext.runAsGroup` | Group ID for the pod | `1001` | +| `worker.events.podSecurityContext.fsGroup` | FileSystem group ID for the pod | `1001` | +| `worker.events.nodeSelector` | Node labels for pod assignment | `{}` | +| `worker.events.tolerations` | Toleration labels for pod assignment | `[]` | +| `worker.events.affinity` | Affinity settings for pod assignment | `{}` | +| `worker.events.rbac.create` | Create a role and service account for the event app | `true` | +| `worker.events.serviceAccount.create` | Create a service account for the event app | `true` | +| `worker.events.serviceAccount.name` | The name of the ServiceAccount to use | `""` | +| `worker.accessMode` | Access mode for volume | `["ReadWriteOnce"]` | ### Substra periodic tasks worker settings From 6c255ca4155e5fc5cd7238c2383f1818d6464354 Mon Sep 17 00:00:00 2001 From: ThibaultFy Date: Tue, 25 Jun 2024 10:46:29 +0200 Subject: [PATCH 13/22] chore: access modes Signed-off-by: ThibaultFy --- charts/substra-backend/README.md | 2 +- charts/substra-backend/templates/statefulset-worker.yaml | 2 +- charts/substra-backend/values.yaml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/charts/substra-backend/README.md b/charts/substra-backend/README.md index 07e6e9474..4085376a0 100644 --- a/charts/substra-backend/README.md +++ b/charts/substra-backend/README.md @@ -179,7 +179,7 @@ See [UPGRADE.md](https://github.com/Substra/substra-backend/blob/main/charts/sub | `worker.events.rbac.create` | Create a role and service account for the event app | `true` | | `worker.events.serviceAccount.create` | Create a service account for the event app | `true` | | `worker.events.serviceAccount.name` | The name of the ServiceAccount to use | `""` | -| `worker.accessMode` | Access mode for volume | `["ReadWriteOnce"]` | +| `worker.accessModes` | Access modes for volume | `["ReadWriteOnce"]` | ### Substra periodic tasks worker settings diff --git a/charts/substra-backend/templates/statefulset-worker.yaml b/charts/substra-backend/templates/statefulset-worker.yaml index 5eff94c0f..33b9ebc77 100644 --- a/charts/substra-backend/templates/statefulset-worker.yaml +++ b/charts/substra-backend/templates/statefulset-worker.yaml @@ -241,7 +241,7 @@ spec: - metadata: name: subtuple spec: - accessModes: {{ .Values.worker.accessMode }} + accessModes: {{ .Values.worker.accessModes }} {{ include "common.storage.class" .Values.worker.persistence }} resources: requests: diff --git a/charts/substra-backend/values.yaml b/charts/substra-backend/values.yaml index 35046aaaf..0e5f19c49 100644 --- a/charts/substra-backend/values.yaml +++ b/charts/substra-backend/values.yaml @@ -456,9 +456,9 @@ worker: ## If not set and create is true, a name is generated using the substra.fullname template ## name: "" - ## @param worker.accessMode Access mode for volume + ## @param worker.accessModes Access modes for volume ## - accessMode: ["ReadWriteOnce"] + accessModes: ["ReadWriteOnce"] ## @section Substra periodic tasks worker settings ## schedulerWorker: From 85f4bb3d1aca623a1d152e775044ac9280d965fd Mon Sep 17 00:00:00 2001 From: ThibaultFy Date: Tue, 25 Jun 2024 15:07:06 +0200 Subject: [PATCH 14/22] chore: pass persistence.storageClass to backend chart Signed-off-by: ThibaultFy --- charts/substra-backend/templates/statefulset-worker.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/charts/substra-backend/templates/statefulset-worker.yaml b/charts/substra-backend/templates/statefulset-worker.yaml index 33b9ebc77..861240fb2 100644 --- a/charts/substra-backend/templates/statefulset-worker.yaml +++ b/charts/substra-backend/templates/statefulset-worker.yaml @@ -241,6 +241,7 @@ spec: - metadata: name: subtuple spec: + storageClassName: {{ .Values.worker.persistence.storageClass }} accessModes: {{ .Values.worker.accessModes }} {{ include "common.storage.class" .Values.worker.persistence }} resources: From 8761f22aaed6411e2e8ff386bd8f6d6e0de27ae6 Mon Sep 17 00:00:00 2001 From: ThibaultFy Date: Tue, 25 Jun 2024 15:59:15 +0200 Subject: [PATCH 15/22] chore(dev): remove storageClassName Signed-off-by: ThibaultFy --- charts/substra-backend/templates/statefulset-worker.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/charts/substra-backend/templates/statefulset-worker.yaml b/charts/substra-backend/templates/statefulset-worker.yaml index 861240fb2..33b9ebc77 100644 --- a/charts/substra-backend/templates/statefulset-worker.yaml +++ b/charts/substra-backend/templates/statefulset-worker.yaml @@ -241,7 +241,6 @@ spec: - metadata: name: subtuple spec: - storageClassName: {{ .Values.worker.persistence.storageClass }} accessModes: {{ .Values.worker.accessModes }} {{ include "common.storage.class" .Values.worker.persistence }} resources: From 6581aba7b8c6da73835aebbaefe12ee42335ec82 Mon Sep 17 00:00:00 2001 From: ThibaultFy Date: Wed, 3 Jul 2024 16:49:28 +0200 Subject: [PATCH 16/22] chore: changelog Signed-off-by: ThibaultFy --- charts/substra-backend/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/substra-backend/Chart.yaml b/charts/substra-backend/Chart.yaml index 2d6b094ef..551eccde1 100644 --- a/charts/substra-backend/Chart.yaml +++ b/charts/substra-backend/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: substra-backend home: https://github.com/Substra version: "26.9.0" -appVersion: "0.48.0" +appVersion: "0.47.0" kubeVersion: ">= 1.19.0-0" description: Main package for Substra type: application From 40fa54c18399941f1f1f97c8b9c2c2c02188fbba Mon Sep 17 00:00:00 2001 From: ThibaultFy Date: Mon, 8 Jul 2024 16:57:11 +0200 Subject: [PATCH 17/22] chore: add logging Signed-off-by: ThibaultFy --- backend/substrapp/compute_tasks/execute.py | 5 ++++- backend/substrapp/compute_tasks/image_builder.py | 7 ++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/backend/substrapp/compute_tasks/execute.py b/backend/substrapp/compute_tasks/execute.py index a3a137623..a395ff447 100644 --- a/backend/substrapp/compute_tasks/execute.py +++ b/backend/substrapp/compute_tasks/execute.py @@ -50,7 +50,10 @@ def _is_pod_creation_needed(label_selector: str, *, client: Optional[kubernetes. if not client: client = _get_k8s_client() - return not pod_exists_by_label_selector(client, label_selector) + pod_creation_needed = not pod_exists_by_label_selector(client, label_selector) + logger.info(f"Pod creation needed {pod_creation_needed}", label_selector=label_selector) + + return pod_creation_needed def _is_function_image_downloaded(function_key: str) -> bool: diff --git a/backend/substrapp/compute_tasks/image_builder.py b/backend/substrapp/compute_tasks/image_builder.py index ca7ff295b..92bd4ebc3 100644 --- a/backend/substrapp/compute_tasks/image_builder.py +++ b/backend/substrapp/compute_tasks/image_builder.py @@ -24,7 +24,11 @@ def push_blob_to_registry(blob: bytes, tag: str) -> None: os.makedirs(SUBTUPLE_TMP_DIR, exist_ok=True) with TemporaryDirectory(dir=SUBTUPLE_TMP_DIR) as tmp_dir: storage_path = pathlib.Path(tmp_dir) / f"{tag}.zip" + + logger.info("Starting writing payload", tag=tag) storage_path.write_bytes(blob) + logger.info("Writting payload succeed", tag=tag) + push_payload( storage_path, registry=REGISTRY, repository=USER_IMAGE_REPOSITORY, secure=REGISTRY_SCHEME == "https" ) @@ -33,12 +37,13 @@ def push_blob_to_registry(blob: bytes, tag: str) -> None: def load_remote_function_image(function: orchestrator.Function, channel: str) -> None: # Ask the backend owner of the function if it's available container_image_tag = utils.container_image_tag_from_function(function) - + logger.info("Starting to download image content", function_key=function.key) function_image_content = organization_client.get( channel=channel, organization_id=function.owner, url=function.image.uri, checksum=function.image.checksum, ) + logger.info("Download function succeed", function_key=function.key) push_blob_to_registry(function_image_content, tag=container_image_tag) From 77b2fdc7fdc9550e9b9951034770bf322d9732f1 Mon Sep 17 00:00:00 2001 From: ThibaultFy Date: Tue, 9 Jul 2024 09:26:49 +0200 Subject: [PATCH 18/22] chore: more logs Signed-off-by: ThibaultFy --- backend/substrapp/clients/organization.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/backend/substrapp/clients/organization.py b/backend/substrapp/clients/organization.py index 332a0e541..1e2c4ab6a 100644 --- a/backend/substrapp/clients/organization.py +++ b/backend/substrapp/clients/organization.py @@ -184,7 +184,12 @@ def get( ) -> bytes: """Get asset data.""" content = _http_request(_Method.GET, channel, organization_id, url).content + logger.info("Downloading content succeed") + + logger.info("Starting computing hash") new_checksum = compute_hash(content, key=salt) + logger.info("Computing hash succeed") + if new_checksum != checksum: raise IntegrityError(f"url {url}: checksum doesn't match {checksum} vs {new_checksum}") return content From 95581d6dff1b831296ea6c1f48477f9f7196284e Mon Sep 17 00:00:00 2001 From: ThibaultFy Date: Tue, 9 Jul 2024 10:05:16 +0200 Subject: [PATCH 19/22] chore: more logs Signed-off-by: ThibaultFy --- backend/substrapp/clients/organization.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/substrapp/clients/organization.py b/backend/substrapp/clients/organization.py index 1e2c4ab6a..46fa9902b 100644 --- a/backend/substrapp/clients/organization.py +++ b/backend/substrapp/clients/organization.py @@ -125,6 +125,7 @@ def _http_request( response = None try: + logger.ingo("Sending http request", headers=headers, timeout=_HTTP_TIMEOUT, verify=_HTTP_VERIFY) response = _HTTP_METHOD_TO_FUNC[method]( url, headers=_add_mandatory_headers(headers, channel), @@ -133,6 +134,7 @@ def _http_request( timeout=_HTTP_TIMEOUT, **_http_request_kwargs(data, stream), ) + logger.ingo("Fecthing http response success", response=response) except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as exc: raise OrganizationError(f"Failed to connect to {organization_id}") from exc From c13c67b0d647440a91ad460f79b78939e65f4de8 Mon Sep 17 00:00:00 2001 From: ThibaultFy Date: Tue, 9 Jul 2024 11:40:25 +0200 Subject: [PATCH 20/22] chore(dev): bump to alpha.4 for more logs Signed-off-by: ThibaultFy --- backend/substrapp/clients/organization.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/substrapp/clients/organization.py b/backend/substrapp/clients/organization.py index 46fa9902b..316f6d160 100644 --- a/backend/substrapp/clients/organization.py +++ b/backend/substrapp/clients/organization.py @@ -125,7 +125,7 @@ def _http_request( response = None try: - logger.ingo("Sending http request", headers=headers, timeout=_HTTP_TIMEOUT, verify=_HTTP_VERIFY) + logger.info("Sending http request", headers=headers, timeout=_HTTP_TIMEOUT, verify=_HTTP_VERIFY) response = _HTTP_METHOD_TO_FUNC[method]( url, headers=_add_mandatory_headers(headers, channel), @@ -134,7 +134,7 @@ def _http_request( timeout=_HTTP_TIMEOUT, **_http_request_kwargs(data, stream), ) - logger.ingo("Fecthing http response success", response=response) + logger.info("Fecthing http response success", response=response) except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as exc: raise OrganizationError(f"Failed to connect to {organization_id}") from exc @@ -186,7 +186,7 @@ def get( ) -> bytes: """Get asset data.""" content = _http_request(_Method.GET, channel, organization_id, url).content - logger.info("Downloading content succeed") + logger.info("Http request succeed") logger.info("Starting computing hash") new_checksum = compute_hash(content, key=salt) From b78d8e4acb36d44b46af10865f7ac0432c77496d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Barth=C3=A9s?= Date: Wed, 10 Jul 2024 15:22:23 +0200 Subject: [PATCH 21/22] chore: remove debug logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Guilhem Barthés --- backend/substrapp/clients/organization.py | 5 ----- backend/substrapp/compute_tasks/execute.py | 5 +---- backend/substrapp/compute_tasks/image_builder.py | 6 ------ 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/backend/substrapp/clients/organization.py b/backend/substrapp/clients/organization.py index 316f6d160..cf447c4a2 100644 --- a/backend/substrapp/clients/organization.py +++ b/backend/substrapp/clients/organization.py @@ -125,7 +125,6 @@ def _http_request( response = None try: - logger.info("Sending http request", headers=headers, timeout=_HTTP_TIMEOUT, verify=_HTTP_VERIFY) response = _HTTP_METHOD_TO_FUNC[method]( url, headers=_add_mandatory_headers(headers, channel), @@ -134,7 +133,6 @@ def _http_request( timeout=_HTTP_TIMEOUT, **_http_request_kwargs(data, stream), ) - logger.info("Fecthing http response success", response=response) except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as exc: raise OrganizationError(f"Failed to connect to {organization_id}") from exc @@ -186,11 +184,8 @@ def get( ) -> bytes: """Get asset data.""" content = _http_request(_Method.GET, channel, organization_id, url).content - logger.info("Http request succeed") - logger.info("Starting computing hash") new_checksum = compute_hash(content, key=salt) - logger.info("Computing hash succeed") if new_checksum != checksum: raise IntegrityError(f"url {url}: checksum doesn't match {checksum} vs {new_checksum}") diff --git a/backend/substrapp/compute_tasks/execute.py b/backend/substrapp/compute_tasks/execute.py index a395ff447..a3a137623 100644 --- a/backend/substrapp/compute_tasks/execute.py +++ b/backend/substrapp/compute_tasks/execute.py @@ -50,10 +50,7 @@ def _is_pod_creation_needed(label_selector: str, *, client: Optional[kubernetes. if not client: client = _get_k8s_client() - pod_creation_needed = not pod_exists_by_label_selector(client, label_selector) - logger.info(f"Pod creation needed {pod_creation_needed}", label_selector=label_selector) - - return pod_creation_needed + return not pod_exists_by_label_selector(client, label_selector) def _is_function_image_downloaded(function_key: str) -> bool: diff --git a/backend/substrapp/compute_tasks/image_builder.py b/backend/substrapp/compute_tasks/image_builder.py index 92bd4ebc3..fc4d50429 100644 --- a/backend/substrapp/compute_tasks/image_builder.py +++ b/backend/substrapp/compute_tasks/image_builder.py @@ -24,11 +24,7 @@ def push_blob_to_registry(blob: bytes, tag: str) -> None: os.makedirs(SUBTUPLE_TMP_DIR, exist_ok=True) with TemporaryDirectory(dir=SUBTUPLE_TMP_DIR) as tmp_dir: storage_path = pathlib.Path(tmp_dir) / f"{tag}.zip" - - logger.info("Starting writing payload", tag=tag) storage_path.write_bytes(blob) - logger.info("Writting payload succeed", tag=tag) - push_payload( storage_path, registry=REGISTRY, repository=USER_IMAGE_REPOSITORY, secure=REGISTRY_SCHEME == "https" ) @@ -37,13 +33,11 @@ def push_blob_to_registry(blob: bytes, tag: str) -> None: def load_remote_function_image(function: orchestrator.Function, channel: str) -> None: # Ask the backend owner of the function if it's available container_image_tag = utils.container_image_tag_from_function(function) - logger.info("Starting to download image content", function_key=function.key) function_image_content = organization_client.get( channel=channel, organization_id=function.owner, url=function.image.uri, checksum=function.image.checksum, ) - logger.info("Download function succeed", function_key=function.key) push_blob_to_registry(function_image_content, tag=container_image_tag) From 8a13af27c0642ada270f8e84df500d7899ca1d84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Barth=C3=A9s?= Date: Wed, 10 Jul 2024 15:37:24 +0200 Subject: [PATCH 22/22] feat: raise uncatched exceptions in `image_transfer/encoder.py::get_manifests_and_list_of_all_blobs` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Guilhem Barthés --- backend/image_transfer/encoder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/image_transfer/encoder.py b/backend/image_transfer/encoder.py index ab046f582..9977ddae9 100644 --- a/backend/image_transfer/encoder.py +++ b/backend/image_transfer/encoder.py @@ -98,6 +98,7 @@ def get_manifests_and_list_of_all_blobs( raise RegistryPreconditionFailedException( f"{docker_image} is either not scanned yet or not passing the vulnerability checks." ) from e + raise e manifests.append(manifest) blobs_to_pull += blobs return manifests, blobs_to_pull