From ad85bb56c8f48eb675c219f6b2b179c1c58c0232 Mon Sep 17 00:00:00 2001 From: Tamir David Date: Sun, 27 Oct 2024 13:14:26 +0200 Subject: [PATCH 1/2] feat: delete odigos MutatingWebhookConfiguration object (#1632) Co-authored-by: Tamir David --- cli/cmd/uninstall.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/cli/cmd/uninstall.go b/cli/cmd/uninstall.go index cd97726c93..c3ff21465c 100644 --- a/cli/cmd/uninstall.go +++ b/cli/cmd/uninstall.go @@ -102,6 +102,10 @@ var uninstallCmd = &cobra.Command{ createKubeResourceWithLogging(ctx, "Uninstalling Odigos CRDs", client, cmd, ns, uninstallCRDs) + + createKubeResourceWithLogging(ctx, "Uninstalling Odigos MutatingWebhookConfigurations", + client, cmd, ns, uninstallMutatingWebhookConfigs) + fmt.Printf("\n\u001B[32mSUCCESS:\u001B[0m Odigos uninstalled.\n") }, } @@ -398,6 +402,26 @@ func uninstallCRDs(ctx context.Context, cmd *cobra.Command, client *kube.Client, return nil } +func uninstallMutatingWebhookConfigs(ctx context.Context, cmd *cobra.Command, client *kube.Client, ns string) error { + list, err := client.AdmissionregistrationV1().MutatingWebhookConfigurations().List(ctx, metav1.ListOptions{ + LabelSelector: metav1.FormatLabelSelector(&metav1.LabelSelector{ + MatchLabels: labels.OdigosSystem, + }), + }) + if err != nil { + return err + } + + for _, webhook := range list.Items { + err = client.AdmissionregistrationV1().MutatingWebhookConfigurations().Delete(ctx, webhook.Name, metav1.DeleteOptions{}) + if err != nil { + return err + } + } + + return nil +} + func uninstallRBAC(ctx context.Context, cmd *cobra.Command, client *kube.Client, ns string) error { list, err := client.RbacV1().ClusterRoles().List(ctx, metav1.ListOptions{ LabelSelector: metav1.FormatLabelSelector(&metav1.LabelSelector{ From fd310cc82af089288c069f1f84f73444e8cdca68 Mon Sep 17 00:00:00 2001 From: Amir Blum Date: Sun, 27 Oct 2024 13:51:13 +0200 Subject: [PATCH 2/2] feat: add odigos describe analyze phase to make the info accessible to tools (#1633) Until this PR, everytime we calculated the `odigos describe` output, we transformed it to text, which makes it less useful to be used in odigos ui and other tools, as the data is human readable and not machine readable. This PR separates the 2 tasks. Now one can simply get the results of the `odigos describe` command and now immediately if something is still in transit or has errors. Tools can process this data to display it nicely or act upon it. This function offers an handy aggregation for the relevant data in the context of odigos entities. --- cli/cmd/describe.go | 7 +- frontend/endpoints/describe.go | 28 +- k8sutils/pkg/describe/common.go | 4 + k8sutils/pkg/describe/odigos.go | 219 +++-------- k8sutils/pkg/describe/odigos/analyze.go | 368 ++++++++++++++++++ k8sutils/pkg/describe/odigos/resources.go | 8 + .../pkg/describe/properties/properties.go | 28 ++ k8sutils/pkg/describe/properties/utils.go | 25 ++ 8 files changed, 511 insertions(+), 176 deletions(-) create mode 100644 k8sutils/pkg/describe/odigos/analyze.go create mode 100644 k8sutils/pkg/describe/properties/properties.go create mode 100644 k8sutils/pkg/describe/properties/utils.go diff --git a/cli/cmd/describe.go b/cli/cmd/describe.go index 9426ea5955..7efbfc7db6 100644 --- a/cli/cmd/describe.go +++ b/cli/cmd/describe.go @@ -42,7 +42,12 @@ var describeCmd = &cobra.Command{ if describeRemoteFlag { describeText = executeRemoteOdigosDescribe(ctx, client, odigosNs) } else { - describeText = describe.DescribeOdigos(ctx, client, client.OdigosClient, odigosNs) + describeAnalyze, err := describe.DescribeOdigos(ctx, client, client.OdigosClient, odigosNs) + if err != nil { + describeText = fmt.Sprintf("Failed to describe odigos: %s", err) + } else { + describeText = describe.DescribeOdigosToText(describeAnalyze) + } } fmt.Println(describeText) }, diff --git a/frontend/endpoints/describe.go b/frontend/endpoints/describe.go index c67e4ef4a4..fb9e43b4a6 100644 --- a/frontend/endpoints/describe.go +++ b/frontend/endpoints/describe.go @@ -10,8 +10,32 @@ import ( func DescribeOdigos(c *gin.Context) { ctx := c.Request.Context() odiogosNs := env.GetCurrentNamespace() - describeText := describe.DescribeOdigos(ctx, kube.DefaultClient, kube.DefaultClient.OdigosClient, odiogosNs) - c.Writer.WriteString(describeText) + desc, err := describe.DescribeOdigos(ctx, kube.DefaultClient, kube.DefaultClient.OdigosClient, odiogosNs) + if err != nil { + c.JSON(500, gin.H{ + "message": err.Error(), + }) + return + } + + // construct the http response code based on the status of the odigos + returnCode := 200 + if desc.HasErrors { + returnCode = 500 + } else if !desc.IsSettled { + returnCode = 202 + } + + // Check for the Accept header + acceptHeader := c.GetHeader("Accept") + + if acceptHeader == "application/json" { + // Return JSON response if Accept header is "application/json" + c.JSON(returnCode, desc) + } else { + describeText := describe.DescribeOdigosToText(desc) + c.String(returnCode, describeText) + } } func DescribeSource(c *gin.Context, ns string, kind string, name string) { diff --git a/k8sutils/pkg/describe/common.go b/k8sutils/pkg/describe/common.go index 3fe0261545..e501de8862 100644 --- a/k8sutils/pkg/describe/common.go +++ b/k8sutils/pkg/describe/common.go @@ -13,6 +13,10 @@ func wrapTextInGreen(text string) string { return "\033[32m" + text + "\033[0m" } +func wrapTextInYellow(text string) string { + return "\033[33m" + text + "\033[0m" +} + func wrapTextSuccessOfFailure(text string, success bool) string { if success { return wrapTextInGreen(text) diff --git a/k8sutils/pkg/describe/odigos.go b/k8sutils/pkg/describe/odigos.go index 91822a3a0e..cd477b21f8 100644 --- a/k8sutils/pkg/describe/odigos.go +++ b/k8sutils/pkg/describe/odigos.go @@ -7,206 +7,79 @@ import ( odigosclientset "github.com/odigos-io/odigos/api/generated/odigos/clientset/versioned/typed/odigos/v1alpha1" odigos "github.com/odigos-io/odigos/k8sutils/pkg/describe/odigos" - "github.com/odigos-io/odigos/k8sutils/pkg/getters" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/odigos-io/odigos/k8sutils/pkg/describe/properties" "k8s.io/client-go/kubernetes" ) -func printOdigosVersion(odigosVersion string, sb *strings.Builder) { - describeText(sb, 0, "Odigos Version: %s", odigosVersion) -} - -func printClusterCollectorStatus(resources *odigos.OdigosResources, sb *strings.Builder) { - - expectingClusterCollector := len(resources.Destinations.Items) > 0 - - describeText(sb, 1, "Cluster Collector:") - clusterCollector := resources.ClusterCollector - - if expectingClusterCollector { - describeText(sb, 2, "Status: Cluster Collector is expected to be created because there are destinations") - } else { - describeText(sb, 2, "Status: Cluster Collector is not expected to be created because there are no destinations") +func printProperty(sb *strings.Builder, indent int, property *properties.EntityProperty) { + if property == nil { + return } - - if clusterCollector.CollectorsGroup == nil { - describeText(sb, 2, wrapTextSuccessOfFailure("Collectors Group Not Created", !expectingClusterCollector)) - } else { - describeText(sb, 2, wrapTextSuccessOfFailure("Collectors Group Created", expectingClusterCollector)) - - var deployedCondition *metav1.Condition - for _, condition := range clusterCollector.CollectorsGroup.Status.Conditions { - if condition.Type == "Deployed" { - deployedCondition = &condition - break - } - } - if deployedCondition == nil { - describeText(sb, 2, wrapTextInRed("Deployed: Status Unavailable")) - } else { - if deployedCondition.Status == metav1.ConditionTrue { - describeText(sb, 2, wrapTextInGreen("Deployed: true")) - } else { - describeText(sb, 2, wrapTextInRed("Deployed: false")) - describeText(sb, 2, wrapTextInRed(fmt.Sprintf("Reason: %s", deployedCondition.Message))) - } - } - - ready := clusterCollector.CollectorsGroup.Status.Ready - describeText(sb, 2, wrapTextSuccessOfFailure(fmt.Sprintf("Ready: %t", ready), ready)) - } - - expectedReplicas := int32(0) - if clusterCollector.Deployment == nil { - describeText(sb, 2, wrapTextSuccessOfFailure("Deployment: Not Found", !expectingClusterCollector)) - } else { - describeText(sb, 2, wrapTextSuccessOfFailure("Deployment: Found", expectingClusterCollector)) - expectedReplicas = *clusterCollector.Deployment.Spec.Replicas - describeText(sb, 2, fmt.Sprintf("Expected Replicas: %d", expectedReplicas)) - } - - if clusterCollector.LatestRevisionPods != nil { - runningReplicas := 0 - failureReplicas := 0 - var failureText string - for _, pod := range clusterCollector.LatestRevisionPods.Items { - var condition *corev1.PodCondition - for i := range pod.Status.Conditions { - c := pod.Status.Conditions[i] - if c.Type == corev1.PodReady { - condition = &c - break - } - } - if condition == nil { - failureReplicas++ - } else { - if condition.Status == corev1.ConditionTrue { - runningReplicas++ - } else { - failureReplicas++ - failureText = condition.Message - } - } - } - podReplicasText := fmt.Sprintf("Actual Replicas: %d running, %d failed", runningReplicas, failureReplicas) - deploymentSuccessful := runningReplicas == int(expectedReplicas) && failureReplicas == 0 - describeText(sb, 2, wrapTextSuccessOfFailure(podReplicasText, deploymentSuccessful)) - if !deploymentSuccessful { - describeText(sb, 2, wrapTextInRed(fmt.Sprintf("Replicas Not Ready Reason: %s", failureText))) - } + text := fmt.Sprintf("%s: %v", property.Name, property.Value) + switch property.Status { + case properties.PropertyStatusSuccess: + text = wrapTextInGreen(text) + case properties.PropertyStatusError: + text = wrapTextInRed(text) + case properties.PropertyStatusTransitioning: + text = wrapTextInYellow(text) } + describeText(sb, indent, text) } -func printAndCalculateIsNodeCollectorStatus(resources *odigos.OdigosResources, sb *strings.Builder) bool { - - numInstrumentationConfigs := len(resources.InstrumentationConfigs.Items) - if numInstrumentationConfigs == 0 { - describeText(sb, 2, "Status: Node Collectors not expected as there are no sources") - return false - } - - if resources.ClusterCollector.CollectorsGroup == nil { - describeText(sb, 2, "Status: Node Collectors not expected as there are no destinations") - return false - } - - if !resources.ClusterCollector.CollectorsGroup.Status.Ready { - describeText(sb, 2, "Status: Node Collectors not expected as the Cluster Collector is not ready") - return false - } - - describeText(sb, 2, "Status: Node Collectors expected as cluster collector is ready and there are sources") - return true +func printClusterCollectorStatus(analyze *odigos.OdigosAnalyze, sb *strings.Builder) { + describeText(sb, 1, "Cluster Collector:") + printProperty(sb, 2, &analyze.ClusterCollector.Enabled) + printProperty(sb, 2, &analyze.ClusterCollector.CollectorGroup) + printProperty(sb, 2, analyze.ClusterCollector.Deployed) + printProperty(sb, 2, analyze.ClusterCollector.DeployedError) + printProperty(sb, 2, analyze.ClusterCollector.CollectorReady) + printProperty(sb, 2, &analyze.ClusterCollector.DeploymentCreated) + printProperty(sb, 2, analyze.ClusterCollector.ExpectedReplicas) + printProperty(sb, 2, analyze.ClusterCollector.HealthyReplicas) + printProperty(sb, 2, analyze.ClusterCollector.FailedReplicas) + printProperty(sb, 2, analyze.ClusterCollector.FailedReplicasReason) } -func printNodeCollectorStatus(resources *odigos.OdigosResources, sb *strings.Builder) { - +func printNodeCollectorStatus(analyze *odigos.OdigosAnalyze, sb *strings.Builder) { describeText(sb, 1, "Node Collector:") - nodeCollector := resources.NodeCollector - - expectingNodeCollector := printAndCalculateIsNodeCollectorStatus(resources, sb) - - if nodeCollector.CollectorsGroup == nil { - describeText(sb, 2, wrapTextSuccessOfFailure("Collectors Group Not Created", !expectingNodeCollector)) - } else { - describeText(sb, 2, wrapTextSuccessOfFailure("Collectors Group Created", expectingNodeCollector)) - - var deployedCondition *metav1.Condition - for _, condition := range nodeCollector.CollectorsGroup.Status.Conditions { - if condition.Type == "Deployed" { - deployedCondition = &condition - break - } - } - if deployedCondition == nil { - describeText(sb, 2, wrapTextInRed("Deployed: Status Unavailable")) - } else { - if deployedCondition.Status == metav1.ConditionTrue { - describeText(sb, 2, wrapTextInGreen("Deployed: True")) - } else { - describeText(sb, 2, wrapTextInRed("Deployed: False")) - describeText(sb, 2, wrapTextInRed(fmt.Sprintf("Reason: %s", deployedCondition.Message))) - } - } - - ready := nodeCollector.CollectorsGroup.Status.Ready - describeText(sb, 2, wrapTextSuccessOfFailure(fmt.Sprintf("Ready: %t", ready), ready)) - } - - if nodeCollector.DaemonSet == nil { - describeText(sb, 2, wrapTextSuccessOfFailure("DaemonSet: Not Found", !expectingNodeCollector)) - } else { - describeText(sb, 2, wrapTextSuccessOfFailure("DaemonSet: Found", expectingNodeCollector)) - - // this is copied from k8sutils/pkg/describe/describe.go - // I hope the info is accurate since there can be many edge cases - describeText(sb, 2, "Desired Number of Nodes Scheduled: %d", nodeCollector.DaemonSet.Status.DesiredNumberScheduled) - currentMeetsDesired := nodeCollector.DaemonSet.Status.DesiredNumberScheduled == nodeCollector.DaemonSet.Status.CurrentNumberScheduled - describeText(sb, 2, wrapTextSuccessOfFailure(fmt.Sprintf("Current Number of Nodes Scheduled: %d", nodeCollector.DaemonSet.Status.CurrentNumberScheduled), currentMeetsDesired)) - updatedMeetsDesired := nodeCollector.DaemonSet.Status.DesiredNumberScheduled == nodeCollector.DaemonSet.Status.UpdatedNumberScheduled - describeText(sb, 2, wrapTextSuccessOfFailure(fmt.Sprintf("Number of Nodes Scheduled with Up-to-date Pods: %d", nodeCollector.DaemonSet.Status.UpdatedNumberScheduled), updatedMeetsDesired)) - availableMeetsDesired := nodeCollector.DaemonSet.Status.DesiredNumberScheduled == nodeCollector.DaemonSet.Status.NumberAvailable - describeText(sb, 2, wrapTextSuccessOfFailure(fmt.Sprintf("Number of Nodes Scheduled with Available Pods: %d", nodeCollector.DaemonSet.Status.NumberAvailable), availableMeetsDesired)) - noMisscheduled := nodeCollector.DaemonSet.Status.NumberMisscheduled == 0 - describeText(sb, 2, wrapTextSuccessOfFailure(fmt.Sprintf("Number of Nodes Misscheduled: %d", nodeCollector.DaemonSet.Status.NumberMisscheduled), noMisscheduled)) - } + printProperty(sb, 2, &analyze.NodeCollector.Enabled) + printProperty(sb, 2, &analyze.NodeCollector.CollectorGroup) + printProperty(sb, 2, analyze.NodeCollector.Deployed) + printProperty(sb, 2, analyze.NodeCollector.DeployedError) + printProperty(sb, 2, analyze.NodeCollector.CollectorReady) + printProperty(sb, 2, &analyze.NodeCollector.DaemonSet) + printProperty(sb, 2, analyze.NodeCollector.DesiredNodes) + printProperty(sb, 2, analyze.NodeCollector.CurrentNodes) + printProperty(sb, 2, analyze.NodeCollector.UpdatedNodes) + printProperty(sb, 2, analyze.NodeCollector.AvailableNodes) } -func printOdigosPipeline(resources *odigos.OdigosResources, sb *strings.Builder) { +func printOdigosPipeline(analyze *odigos.OdigosAnalyze, sb *strings.Builder) { describeText(sb, 0, "Odigos Pipeline:") - numDestinations := len(resources.Destinations.Items) - numInstrumentationConfigs := len(resources.InstrumentationConfigs.Items) - - describeText(sb, 1, "Status: there are %d sources and %d destinations\n", numInstrumentationConfigs, numDestinations) - printClusterCollectorStatus(resources, sb) + describeText(sb, 1, "Status: there are %d sources and %d destinations\n", analyze.NumberOfSources, analyze.NumberOfDestinations) + printClusterCollectorStatus(analyze, sb) sb.WriteString("\n") - printNodeCollectorStatus(resources, sb) + printNodeCollectorStatus(analyze, sb) } -func printDescribeOdigos(odigosVersion string, resources *odigos.OdigosResources) string { +func DescribeOdigosToText(analyze *odigos.OdigosAnalyze) string { var sb strings.Builder - printOdigosVersion(odigosVersion, &sb) + printProperty(&sb, 0, &analyze.OdigosVersion) sb.WriteString("\n") - printOdigosPipeline(resources, &sb) + printOdigosPipeline(analyze, &sb) return sb.String() } -func DescribeOdigos(ctx context.Context, kubeClient kubernetes.Interface, odigosClient odigosclientset.OdigosV1alpha1Interface, odigosNs string) string { - - odigosVersion, err := getters.GetOdigosVersionInClusterFromConfigMap(ctx, kubeClient, odigosNs) - if err != nil { - return fmt.Sprintf("Error: %v\n", err) - } +func DescribeOdigos(ctx context.Context, kubeClient kubernetes.Interface, odigosClient odigosclientset.OdigosV1alpha1Interface, odigosNs string) (*odigos.OdigosAnalyze, error) { odigosResources, err := odigos.GetRelevantOdigosResources(ctx, kubeClient, odigosClient, odigosNs) if err != nil { - return fmt.Sprintf("Error: %v\n", err) + return nil, err } - return printDescribeOdigos(odigosVersion, odigosResources) + return odigos.AnalyzeOdigos(odigosResources), nil } diff --git a/k8sutils/pkg/describe/odigos/analyze.go b/k8sutils/pkg/describe/odigos/analyze.go new file mode 100644 index 0000000000..2bfaebd4f4 --- /dev/null +++ b/k8sutils/pkg/describe/odigos/analyze.go @@ -0,0 +1,368 @@ +package odigos + +import ( + odigosv1 "github.com/odigos-io/odigos/api/odigos/v1alpha1" + "github.com/odigos-io/odigos/k8sutils/pkg/describe/properties" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type ClusterCollectorAnalyze struct { + Enabled properties.EntityProperty `json:"enabled"` + CollectorGroup properties.EntityProperty `json:"collectorGroup"` + Deployed *properties.EntityProperty `json:"deployed,omitempty"` + DeployedError *properties.EntityProperty `json:"deployedError,omitempty"` + CollectorReady *properties.EntityProperty `json:"collectorReady,omitempty"` + DeploymentCreated properties.EntityProperty `json:"deployment,omitempty"` + ExpectedReplicas *properties.EntityProperty `json:"expectedReplicas,omitempty"` + HealthyReplicas *properties.EntityProperty `json:"healthyReplicas,omitempty"` + FailedReplicas *properties.EntityProperty `json:"failedReplicas,omitempty"` + FailedReplicasReason *properties.EntityProperty `json:"failedReplicasReason,omitempty"` +} + +type NodeCollectorAnalyze struct { + Enabled properties.EntityProperty `json:"enabled"` + CollectorGroup properties.EntityProperty `json:"collectorGroup"` + Deployed *properties.EntityProperty `json:"deployed,omitempty"` + DeployedError *properties.EntityProperty `json:"deployedError,omitempty"` + CollectorReady *properties.EntityProperty `json:"collectorReady,omitempty"` + DaemonSet properties.EntityProperty `json:"daemonSet,omitempty"` + DesiredNodes *properties.EntityProperty `json:"desiredNodes,omitempty"` + CurrentNodes *properties.EntityProperty `json:"currentNodes,omitempty"` + UpdatedNodes *properties.EntityProperty `json:"updatedNodes,omitempty"` + AvailableNodes *properties.EntityProperty `json:"availableNodes,omitempty"` +} + +type OdigosAnalyze struct { + OdigosVersion properties.EntityProperty `json:"odigosVersion"` + NumberOfDestinations int `json:"numberOfDestinations"` + NumberOfSources int `json:"numberOfSources"` + ClusterCollector ClusterCollectorAnalyze `json:"clusterCollector"` + NodeCollector NodeCollectorAnalyze `json:"nodeCollector"` + + // is settled is true if all resources are created and ready + IsSettled bool `json:"isSettled"` + HasErrors bool `json:"hasErrors"` +} + +func analyzeDeployed(cg *odigosv1.CollectorsGroup) (*properties.EntityProperty, *properties.EntityProperty) { + if cg == nil { + return nil, nil + } + + var deployedCondition *metav1.Condition + for _, condition := range cg.Status.Conditions { + if condition.Type == "Deployed" { + deployedCondition = &condition + break + } + } + if deployedCondition == nil { + // scheduler created the cg but autoscaler did not reconcile it yet + return &properties.EntityProperty{ + Name: "Deployed", + Value: false, + Status: properties.PropertyStatusTransitioning, + }, &properties.EntityProperty{ + Name: "Deployed Error", + Value: "waiting for reconciliation", + Status: properties.PropertyStatusTransitioning, + } + } + + if deployedCondition.Status == metav1.ConditionTrue { + // successfully reconciled to collectors deployment + return &properties.EntityProperty{ + Name: "Deployed", + Value: true, + Status: properties.PropertyStatusSuccess, + }, nil + } else { + // had an error during reconciliation to k8s deployment + return &properties.EntityProperty{ + Name: "Deployed", + Value: false, + Status: properties.PropertyStatusError, + }, &properties.EntityProperty{ + Name: "Deployed Error", + Value: deployedCondition.Message, + Status: properties.PropertyStatusError, + } + } +} + +func analyzeCollectorReady(cg *odigosv1.CollectorsGroup) *properties.EntityProperty { + if cg == nil { + return nil + } + + // TODO: ready is true only once deployment is ready + // but there is no difference between deployment starting and deployment failed to start + ready := cg.Status.Ready + + return &properties.EntityProperty{ + Name: "Ready", + Value: ready, + Status: properties.GetSuccessOrTransitioning(ready), + } +} + +func analyzeDeployment(dep *appsv1.Deployment, enabled bool) (properties.EntityProperty, *properties.EntityProperty, int) { + depFound := dep != nil + deployment := properties.EntityProperty{ + Name: "Deployment", + Value: properties.GetTextCreated(depFound), + Status: properties.GetSuccessOrTransitioning(depFound == enabled), + } + if !depFound { + return deployment, nil, 0 + } else { + expectedReplicas := int(*dep.Spec.Replicas) + return deployment, &properties.EntityProperty{ + Name: "Expected Replicas", + Value: expectedReplicas, + }, expectedReplicas + } +} + +func analyzeDaemonSet(ds *appsv1.DaemonSet, enabled bool) properties.EntityProperty { + dsFound := ds != nil + return properties.EntityProperty{ + Name: "DaemonSet", + Value: properties.GetTextCreated(dsFound), + Status: properties.GetSuccessOrTransitioning(dsFound == enabled), + } +} + +func analyzeDsReplicas(ds *appsv1.DaemonSet) (*properties.EntityProperty, *properties.EntityProperty, *properties.EntityProperty, *properties.EntityProperty) { + if ds == nil { + return nil, nil, nil, nil + } + + desiredNodes := int(ds.Status.DesiredNumberScheduled) + currentReplicas := int(ds.Status.CurrentNumberScheduled) + updatedReplicas := int(ds.Status.UpdatedNumberScheduled) + availableNodes := int(ds.Status.NumberAvailable) + return &properties.EntityProperty{ + // The total number of nodes that should be running this daemon. + // Regardless of what is actually running (0, 1, or more), rollouts, failures, etc. + // this number can be less than the number of nodes in the cluster if affinity rules and node selectors are used. + Name: "Desired Nodes", + Value: desiredNodes, + }, &properties.EntityProperty{ + // The number of nodes that are running at least 1 + // daemon pod and are supposed to run the daemon pod. + // if this number is less than the desired number, the daemonset is not fully scheduled. + // it can be due to an active rollout (which is ok), or due to a problem with the nodes / pods + // this prevents the daemonset pod from being scheduled. + Name: "Current Nodes", + Value: currentReplicas, + Status: properties.GetSuccessOrTransitioning(currentReplicas == desiredNodes), + }, &properties.EntityProperty{ + // The number of nodes that are running pods from the latest version of the daemonset and do not have old pods from previous versions. + // if this number is less than the desired number, the daemonset is not fully updated. + // it can be due to an active rollout (which is ok), or due to a problem with the nodes / pods + // this prevents the daemonset pod from being updated. + // this number does not indicate if the pods are indeed running and healthy, only that the only pods scheduled to them is only the latest. + Name: "Updated Nodes", + Value: updatedReplicas, + Status: properties.GetSuccessOrTransitioning(updatedReplicas == desiredNodes), + }, &properties.EntityProperty{ + // available nodes are the nodes for which the oldest pod is ready and available. + // it can count nodes that are running an old version of the daemonset, + // so it alone cannot be used to determine if the daemonset is updated and healthy. + Name: "Available Nodes", + Value: availableNodes, + Status: properties.GetSuccessOrTransitioning(availableNodes == desiredNodes), + } +} + +func analyzePodsHealth(pods *corev1.PodList, expectedReplicas int) (*properties.EntityProperty, *properties.EntityProperty, *properties.EntityProperty) { + if pods == nil { // should not happen, but check just in case + return nil, nil, nil + } + + runningReplicas := 0 + failureReplicas := 0 + var failureText string + for _, pod := range pods.Items { + var condition *corev1.PodCondition + for i := range pod.Status.Conditions { + c := pod.Status.Conditions[i] + if c.Type == corev1.PodReady { + condition = &c + break + } + } + if condition == nil { + failureReplicas++ + } else { + if condition.Status == corev1.ConditionTrue { + runningReplicas++ + } else { + failureReplicas++ + failureText = condition.Message + } + } + } + + healthyReplicas := properties.EntityProperty{ + Name: "Healthy Replicas", + Value: runningReplicas, + Status: properties.GetSuccessOrTransitioning(runningReplicas == expectedReplicas), + } + unhealthyReplicas := properties.EntityProperty{ + Name: "Failed Replicas", + Value: failureReplicas, + Status: properties.GetSuccessOrError(failureReplicas == 0), + } + if failureText == "" { + return &healthyReplicas, &unhealthyReplicas, nil + } else { + return &healthyReplicas, &unhealthyReplicas, &properties.EntityProperty{ + Name: "Failed Replicas Reason", + Value: failureText, + Status: properties.PropertyStatusError, + } + } +} + +func analyzeClusterCollector(resources *OdigosResources) ClusterCollectorAnalyze { + + isEnabled := len(resources.Destinations.Items) > 0 + + enabled := properties.EntityProperty{ + Name: "Enabled", + Value: isEnabled, + // There is no expected state for this property, so not status is set + } + + hasCg := resources.ClusterCollector.CollectorsGroup != nil + cg := properties.EntityProperty{ + Name: "Collector Group", + Value: properties.GetTextCreated(hasCg), + Status: properties.GetSuccessOrTransitioning(hasCg == isEnabled), + } + + deployed, deployedError := analyzeDeployed(resources.ClusterCollector.CollectorsGroup) + ready := analyzeCollectorReady(resources.ClusterCollector.CollectorsGroup) + dep, depExpected, expectedReplicas := analyzeDeployment(resources.ClusterCollector.Deployment, isEnabled) + healthyPodsCount, failedPodsCount, failedPodsReason := analyzePodsHealth(resources.ClusterCollector.LatestRevisionPods, expectedReplicas) + + return ClusterCollectorAnalyze{ + Enabled: enabled, + CollectorGroup: cg, + Deployed: deployed, + DeployedError: deployedError, + CollectorReady: ready, + DeploymentCreated: dep, + ExpectedReplicas: depExpected, + HealthyReplicas: healthyPodsCount, + FailedReplicas: failedPodsCount, + FailedReplicasReason: failedPodsReason, + } +} + +func analyzeNodeCollector(resources *OdigosResources) NodeCollectorAnalyze { + + hasClusterCollector := resources.ClusterCollector.CollectorsGroup != nil + isClusterCollectorReady := hasClusterCollector && resources.ClusterCollector.CollectorsGroup.Status.Ready + hasInstrumentedSources := len(resources.InstrumentationConfigs.Items) > 0 + isEnabled := hasClusterCollector && isClusterCollectorReady && hasInstrumentedSources + + enabled := properties.EntityProperty{ + Name: "Enabled", + Value: isEnabled, + // There is no expected state for this property, so not status is set + } + + hasCg := resources.ClusterCollector.CollectorsGroup != nil + cg := properties.EntityProperty{ + Name: "Collector Group", + Value: properties.GetTextCreated(hasCg), + Status: properties.GetSuccessOrTransitioning(hasCg == isEnabled), + } + + deployed, deployedError := analyzeDeployed(resources.ClusterCollector.CollectorsGroup) + ready := analyzeCollectorReady(resources.ClusterCollector.CollectorsGroup) + ds := analyzeDaemonSet(resources.NodeCollector.DaemonSet, isEnabled) + // TODO: implement our oun pod lister to figure out how many are updated and ready which isn't available in the daemonset status + desiredNodes, currentNodes, updatedNodes, availableNodes := analyzeDsReplicas(resources.NodeCollector.DaemonSet) + + return NodeCollectorAnalyze{ + Enabled: enabled, + CollectorGroup: cg, + Deployed: deployed, + DeployedError: deployedError, + CollectorReady: ready, + DaemonSet: ds, + DesiredNodes: desiredNodes, + CurrentNodes: currentNodes, + UpdatedNodes: updatedNodes, + AvailableNodes: availableNodes, + } +} + +func summarizeStatus(clusterCollector ClusterCollectorAnalyze, nodeCollector NodeCollectorAnalyze) (bool, bool) { + isSettled := true // everything is settled, unless we find property with status transitioning + hasErrors := false // there is no error, unless we find property with status error + + var allProperties = []*properties.EntityProperty{ + &clusterCollector.Enabled, + &clusterCollector.CollectorGroup, + clusterCollector.Deployed, + clusterCollector.DeployedError, + clusterCollector.CollectorReady, + &clusterCollector.DeploymentCreated, + clusterCollector.ExpectedReplicas, + clusterCollector.HealthyReplicas, + clusterCollector.FailedReplicas, + clusterCollector.FailedReplicasReason, + &nodeCollector.Enabled, + &nodeCollector.CollectorGroup, + nodeCollector.Deployed, + nodeCollector.DeployedError, + nodeCollector.CollectorReady, + &nodeCollector.DaemonSet, + nodeCollector.DesiredNodes, + nodeCollector.CurrentNodes, + nodeCollector.UpdatedNodes, + nodeCollector.AvailableNodes, + } + + for _, property := range allProperties { + if property == nil { + continue + } + switch property.Status { + case properties.PropertyStatusError: + hasErrors = true + case properties.PropertyStatusTransitioning: + isSettled = false + } + } + + return isSettled, hasErrors +} + +func AnalyzeOdigos(resources *OdigosResources) *OdigosAnalyze { + clusterCollector := analyzeClusterCollector(resources) + nodeCollector := analyzeNodeCollector(resources) + isSettled, hasErrors := summarizeStatus(clusterCollector, nodeCollector) + odigosVersion := properties.EntityProperty{ + Name: "Odigos Version", + Value: resources.OdigosVersion, + } + + return &OdigosAnalyze{ + OdigosVersion: odigosVersion, + NumberOfDestinations: len(resources.Destinations.Items), + NumberOfSources: len(resources.InstrumentationConfigs.Items), + ClusterCollector: clusterCollector, + NodeCollector: nodeCollector, + + IsSettled: isSettled, + HasErrors: hasErrors, + } +} diff --git a/k8sutils/pkg/describe/odigos/resources.go b/k8sutils/pkg/describe/odigos/resources.go index db6fc389f9..e0080befab 100644 --- a/k8sutils/pkg/describe/odigos/resources.go +++ b/k8sutils/pkg/describe/odigos/resources.go @@ -7,6 +7,7 @@ import ( odigosclientset "github.com/odigos-io/odigos/api/generated/odigos/clientset/versioned/typed/odigos/v1alpha1" odigosv1 "github.com/odigos-io/odigos/api/odigos/v1alpha1" "github.com/odigos-io/odigos/k8sutils/pkg/consts" + "github.com/odigos-io/odigos/k8sutils/pkg/getters" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -26,6 +27,7 @@ type NodeCollectorResources struct { } type OdigosResources struct { + OdigosVersion string ClusterCollector ClusterCollectorResources NodeCollector NodeCollectorResources Destinations *odigosv1.DestinationList @@ -115,6 +117,12 @@ func GetRelevantOdigosResources(ctx context.Context, kubeClient kubernetes.Inter odigos := OdigosResources{} + odigosVersion, err := getters.GetOdigosVersionInClusterFromConfigMap(ctx, kubeClient, odigosNs) + if err != nil { + return nil, err + } + odigos.OdigosVersion = odigosVersion + cc, err := getClusterCollectorResources(ctx, kubeClient, odigosClient, odigosNs) if err != nil { return nil, err diff --git a/k8sutils/pkg/describe/properties/properties.go b/k8sutils/pkg/describe/properties/properties.go new file mode 100644 index 0000000000..f6ef4d6574 --- /dev/null +++ b/k8sutils/pkg/describe/properties/properties.go @@ -0,0 +1,28 @@ +package properties + +type PropertyStatus string + +const ( + + // the property is in it's desired state + PropertyStatusSuccess PropertyStatus = "success" + + // the property is not in it's desired state, but it's state might be temporary + // if wait some time, it might reconcile to the desired state (or not) + PropertyStatusTransitioning PropertyStatus = "transitioning" + + // the property is not in it's desired state, and it's state is not expected to change + PropertyStatusError PropertyStatus = "error" +) + +type EntityProperty struct { + + // The name of the property being described + Name string `json:"name"` + + // The value to display for this property + Value interface{} `json:"value"` + + // The status of the property actual state + Status PropertyStatus `json:"status,omitempty"` +} diff --git a/k8sutils/pkg/describe/properties/utils.go b/k8sutils/pkg/describe/properties/utils.go new file mode 100644 index 0000000000..b182f73672 --- /dev/null +++ b/k8sutils/pkg/describe/properties/utils.go @@ -0,0 +1,25 @@ +package properties + +func GetTextCreated(created bool) string { + if created { + return "created" + } else { + return "not created" + } +} + +func GetSuccessOrTransitioning(matchExpected bool) PropertyStatus { + if matchExpected { + return PropertyStatusSuccess + } else { + return PropertyStatusTransitioning + } +} + +func GetSuccessOrError(matchExpected bool) PropertyStatus { + if matchExpected { + return PropertyStatusSuccess + } else { + return PropertyStatusError + } +}