From 5bc9ddc923b9b49af605f405ae2dc6170efa2920 Mon Sep 17 00:00:00 2001 From: Tamir David Date: Sun, 27 Oct 2024 17:07:38 +0200 Subject: [PATCH] fix: new runtime inspection to run only when all containers are ready and started --- .../kube/runtime_details/pods_controller.go | 82 ++++++++++++++++--- 1 file changed, 72 insertions(+), 10 deletions(-) diff --git a/odiglet/pkg/kube/runtime_details/pods_controller.go b/odiglet/pkg/kube/runtime_details/pods_controller.go index 77f229c52..e615e0d6c 100644 --- a/odiglet/pkg/kube/runtime_details/pods_controller.go +++ b/odiglet/pkg/kube/runtime_details/pods_controller.go @@ -4,6 +4,7 @@ import ( "context" odigosv1 "github.com/odigos-io/odigos/api/odigos/v1alpha1" + "github.com/odigos-io/odigos/common" k8sutils "github.com/odigos-io/odigos/k8sutils/pkg/utils" "github.com/odigos-io/odigos/k8sutils/pkg/workload" corev1 "k8s.io/api/core/v1" @@ -18,20 +19,36 @@ import ( type podPredicate struct{} func (p *podPredicate) Create(e event.CreateEvent) bool { - // this function is called when a new entity is created in the controller-runtime cache. - // 1. When odiglet restarts, it will receive create events for all running pods. - // 2. When a new pod is created in k8s, should not have status phase as running, but it does not harm to check. pod, ok := e.Object.(*corev1.Pod) if !ok { return false } - return pod.Status.Phase == corev1.PodRunning + + // Check if pod is in Running phase. This is the first requirement + if pod.Status.Phase != corev1.PodRunning { + return false + } + + // If pod has no containers, return false as we can't determine readiness + if len(pod.Status.ContainerStatuses) == 0 { + return false + } + + // Iterate over all containers in the pod + // Return false if any container is: + // 1. Not Ready + // 2. Started is nil or false + for _, containerStatus := range pod.Status.ContainerStatuses { + if !containerStatus.Ready || containerStatus.Started == nil || !*containerStatus.Started { + return false + } + } + + // All conditions met - pod is running and all containers are ready and started + return true } func (p *podPredicate) Update(e event.UpdateEvent) bool { - // this function is called when an entity is updated in the controller-runtime cache. - // We care about pod once it's status changes from not running to running - - // this is the point in time we want to apply runtime details detection. oldPod, oldOk := e.ObjectOld.(*corev1.Pod) newPod, newOk := e.ObjectNew.(*corev1.Pod) @@ -39,7 +56,37 @@ func (p *podPredicate) Update(e event.UpdateEvent) bool { return false } - return oldPod.Status.Phase != corev1.PodRunning && newPod.Status.Phase == corev1.PodRunning + // If pod has no containers, return false as we can't determine readiness + if len(newPod.Status.ContainerStatuses) == 0 { + return false + } + + // First check if all containers in newPod are ready and started + allNewContainersReady := true + for _, containerStatus := range newPod.Status.ContainerStatuses { + if !containerStatus.Ready || containerStatus.Started == nil || !*containerStatus.Started { + allNewContainersReady = false + break + } + } + + // If new containers aren't all ready, return false + if !allNewContainersReady { + return false + } + + // Now check if any container in oldPod was not ready or not started + allOldContainersReady := true + for _, containerStatus := range oldPod.Status.ContainerStatuses { + if !containerStatus.Ready || containerStatus.Started == nil || !*containerStatus.Started { + allOldContainersReady = false + break + } + } + + // Return true only if old pods had at least one container not ready/not started + // and new pod has all containers ready/started + return !allOldContainersReady && allNewContainersReady } func (p *podPredicate) Delete(e event.DeleteEvent) bool { @@ -91,14 +138,20 @@ func (p *PodsReconciler) Reconcile(ctx context.Context, request reconcile.Reques if err != nil { return reconcile.Result{}, client.IgnoreNotFound(err) } - podGeneration, err := GetPodGeneration(ctx, p.Clientset, &pod) if err != nil { return reconcile.Result{}, err } // prevent runtime inspection on pods for which we already have the runtime details for this generation - if podGeneration == 0 || podGeneration <= instrumentationConfig.Status.ObservedWorkloadGeneration { + // if instrumentation config contains unknown language we need to re-inspect the pod + failedToGetPodGeneration := podGeneration == 0 + isNewPodGeneration := podGeneration > instrumentationConfig.Status.ObservedWorkloadGeneration + instrumentedConfigContainUnknown := InstrumentationConfigContainsUnknownLanguage(instrumentationConfig) + + shouldSkipDetection := failedToGetPodGeneration || (!isNewPodGeneration && !instrumentedConfigContainUnknown) + + if shouldSkipDetection { logger.V(3).Info("skipping redundant runtime details detection since generation is not newer", "name", request.Name, "namespace", request.Namespace, "currentPodGeneration", podGeneration, "observedWorkloadGeneration", instrumentationConfig.Status.ObservedWorkloadGeneration) return reconcile.Result{}, nil } @@ -143,3 +196,12 @@ func (p *PodsReconciler) getPodWorkloadObject(ctx context.Context, pod *corev1.P // Pod does not necessarily have to be managed by a controller return nil, nil } + +func InstrumentationConfigContainsUnknownLanguage(config odigosv1.InstrumentationConfig) bool { + for _, containerDetails := range config.Status.RuntimeDetailsByContainer { + if containerDetails.Language == common.UnknownProgrammingLanguage { + return true + } + } + return false +}