diff --git a/cli/cmd/describe.go b/cli/cmd/describe.go index 9426ea5955..7efbfc7db6 100644 --- a/cli/cmd/describe.go +++ b/cli/cmd/describe.go @@ -42,7 +42,12 @@ var describeCmd = &cobra.Command{ if describeRemoteFlag { describeText = executeRemoteOdigosDescribe(ctx, client, odigosNs) } else { - describeText = describe.DescribeOdigos(ctx, client, client.OdigosClient, odigosNs) + describeAnalyze, err := describe.DescribeOdigos(ctx, client, client.OdigosClient, odigosNs) + if err != nil { + describeText = fmt.Sprintf("Failed to describe odigos: %s", err) + } else { + describeText = describe.DescribeOdigosToText(describeAnalyze) + } } fmt.Println(describeText) }, diff --git a/cli/cmd/uninstall.go b/cli/cmd/uninstall.go index cd97726c93..c3ff21465c 100644 --- a/cli/cmd/uninstall.go +++ b/cli/cmd/uninstall.go @@ -102,6 +102,10 @@ var uninstallCmd = &cobra.Command{ createKubeResourceWithLogging(ctx, "Uninstalling Odigos CRDs", client, cmd, ns, uninstallCRDs) + + createKubeResourceWithLogging(ctx, "Uninstalling Odigos MutatingWebhookConfigurations", + client, cmd, ns, uninstallMutatingWebhookConfigs) + fmt.Printf("\n\u001B[32mSUCCESS:\u001B[0m Odigos uninstalled.\n") }, } @@ -398,6 +402,26 @@ func uninstallCRDs(ctx context.Context, cmd *cobra.Command, client *kube.Client, return nil } +func uninstallMutatingWebhookConfigs(ctx context.Context, cmd *cobra.Command, client *kube.Client, ns string) error { + list, err := client.AdmissionregistrationV1().MutatingWebhookConfigurations().List(ctx, metav1.ListOptions{ + LabelSelector: metav1.FormatLabelSelector(&metav1.LabelSelector{ + MatchLabels: labels.OdigosSystem, + }), + }) + if err != nil { + return err + } + + for _, webhook := range list.Items { + err = client.AdmissionregistrationV1().MutatingWebhookConfigurations().Delete(ctx, webhook.Name, metav1.DeleteOptions{}) + if err != nil { + return err + } + } + + return nil +} + func uninstallRBAC(ctx context.Context, cmd *cobra.Command, client *kube.Client, ns string) error { list, err := client.RbacV1().ClusterRoles().List(ctx, metav1.ListOptions{ LabelSelector: metav1.FormatLabelSelector(&metav1.LabelSelector{ diff --git a/frontend/endpoints/describe.go b/frontend/endpoints/describe.go index c67e4ef4a4..fb9e43b4a6 100644 --- a/frontend/endpoints/describe.go +++ b/frontend/endpoints/describe.go @@ -10,8 +10,32 @@ import ( func DescribeOdigos(c *gin.Context) { ctx := c.Request.Context() odiogosNs := env.GetCurrentNamespace() - describeText := describe.DescribeOdigos(ctx, kube.DefaultClient, kube.DefaultClient.OdigosClient, odiogosNs) - c.Writer.WriteString(describeText) + desc, err := describe.DescribeOdigos(ctx, kube.DefaultClient, kube.DefaultClient.OdigosClient, odiogosNs) + if err != nil { + c.JSON(500, gin.H{ + "message": err.Error(), + }) + return + } + + // construct the http response code based on the status of the odigos + returnCode := 200 + if desc.HasErrors { + returnCode = 500 + } else if !desc.IsSettled { + returnCode = 202 + } + + // Check for the Accept header + acceptHeader := c.GetHeader("Accept") + + if acceptHeader == "application/json" { + // Return JSON response if Accept header is "application/json" + c.JSON(returnCode, desc) + } else { + describeText := describe.DescribeOdigosToText(desc) + c.String(returnCode, describeText) + } } func DescribeSource(c *gin.Context, ns string, kind string, name string) { diff --git a/k8sutils/pkg/describe/common.go b/k8sutils/pkg/describe/common.go index 3fe0261545..e501de8862 100644 --- a/k8sutils/pkg/describe/common.go +++ b/k8sutils/pkg/describe/common.go @@ -13,6 +13,10 @@ func wrapTextInGreen(text string) string { return "\033[32m" + text + "\033[0m" } +func wrapTextInYellow(text string) string { + return "\033[33m" + text + "\033[0m" +} + func wrapTextSuccessOfFailure(text string, success bool) string { if success { return wrapTextInGreen(text) diff --git a/k8sutils/pkg/describe/odigos.go b/k8sutils/pkg/describe/odigos.go index 91822a3a0e..cd477b21f8 100644 --- a/k8sutils/pkg/describe/odigos.go +++ b/k8sutils/pkg/describe/odigos.go @@ -7,206 +7,79 @@ import ( odigosclientset "github.com/odigos-io/odigos/api/generated/odigos/clientset/versioned/typed/odigos/v1alpha1" odigos "github.com/odigos-io/odigos/k8sutils/pkg/describe/odigos" - "github.com/odigos-io/odigos/k8sutils/pkg/getters" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/odigos-io/odigos/k8sutils/pkg/describe/properties" "k8s.io/client-go/kubernetes" ) -func printOdigosVersion(odigosVersion string, sb *strings.Builder) { - describeText(sb, 0, "Odigos Version: %s", odigosVersion) -} - -func printClusterCollectorStatus(resources *odigos.OdigosResources, sb *strings.Builder) { - - expectingClusterCollector := len(resources.Destinations.Items) > 0 - - describeText(sb, 1, "Cluster Collector:") - clusterCollector := resources.ClusterCollector - - if expectingClusterCollector { - describeText(sb, 2, "Status: Cluster Collector is expected to be created because there are destinations") - } else { - describeText(sb, 2, "Status: Cluster Collector is not expected to be created because there are no destinations") +func printProperty(sb *strings.Builder, indent int, property *properties.EntityProperty) { + if property == nil { + return } - - if clusterCollector.CollectorsGroup == nil { - describeText(sb, 2, wrapTextSuccessOfFailure("Collectors Group Not Created", !expectingClusterCollector)) - } else { - describeText(sb, 2, wrapTextSuccessOfFailure("Collectors Group Created", expectingClusterCollector)) - - var deployedCondition *metav1.Condition - for _, condition := range clusterCollector.CollectorsGroup.Status.Conditions { - if condition.Type == "Deployed" { - deployedCondition = &condition - break - } - } - if deployedCondition == nil { - describeText(sb, 2, wrapTextInRed("Deployed: Status Unavailable")) - } else { - if deployedCondition.Status == metav1.ConditionTrue { - describeText(sb, 2, wrapTextInGreen("Deployed: true")) - } else { - describeText(sb, 2, wrapTextInRed("Deployed: false")) - describeText(sb, 2, wrapTextInRed(fmt.Sprintf("Reason: %s", deployedCondition.Message))) - } - } - - ready := clusterCollector.CollectorsGroup.Status.Ready - describeText(sb, 2, wrapTextSuccessOfFailure(fmt.Sprintf("Ready: %t", ready), ready)) - } - - expectedReplicas := int32(0) - if clusterCollector.Deployment == nil { - describeText(sb, 2, wrapTextSuccessOfFailure("Deployment: Not Found", !expectingClusterCollector)) - } else { - describeText(sb, 2, wrapTextSuccessOfFailure("Deployment: Found", expectingClusterCollector)) - expectedReplicas = *clusterCollector.Deployment.Spec.Replicas - describeText(sb, 2, fmt.Sprintf("Expected Replicas: %d", expectedReplicas)) - } - - if clusterCollector.LatestRevisionPods != nil { - runningReplicas := 0 - failureReplicas := 0 - var failureText string - for _, pod := range clusterCollector.LatestRevisionPods.Items { - var condition *corev1.PodCondition - for i := range pod.Status.Conditions { - c := pod.Status.Conditions[i] - if c.Type == corev1.PodReady { - condition = &c - break - } - } - if condition == nil { - failureReplicas++ - } else { - if condition.Status == corev1.ConditionTrue { - runningReplicas++ - } else { - failureReplicas++ - failureText = condition.Message - } - } - } - podReplicasText := fmt.Sprintf("Actual Replicas: %d running, %d failed", runningReplicas, failureReplicas) - deploymentSuccessful := runningReplicas == int(expectedReplicas) && failureReplicas == 0 - describeText(sb, 2, wrapTextSuccessOfFailure(podReplicasText, deploymentSuccessful)) - if !deploymentSuccessful { - describeText(sb, 2, wrapTextInRed(fmt.Sprintf("Replicas Not Ready Reason: %s", failureText))) - } + text := fmt.Sprintf("%s: %v", property.Name, property.Value) + switch property.Status { + case properties.PropertyStatusSuccess: + text = wrapTextInGreen(text) + case properties.PropertyStatusError: + text = wrapTextInRed(text) + case properties.PropertyStatusTransitioning: + text = wrapTextInYellow(text) } + describeText(sb, indent, text) } -func printAndCalculateIsNodeCollectorStatus(resources *odigos.OdigosResources, sb *strings.Builder) bool { - - numInstrumentationConfigs := len(resources.InstrumentationConfigs.Items) - if numInstrumentationConfigs == 0 { - describeText(sb, 2, "Status: Node Collectors not expected as there are no sources") - return false - } - - if resources.ClusterCollector.CollectorsGroup == nil { - describeText(sb, 2, "Status: Node Collectors not expected as there are no destinations") - return false - } - - if !resources.ClusterCollector.CollectorsGroup.Status.Ready { - describeText(sb, 2, "Status: Node Collectors not expected as the Cluster Collector is not ready") - return false - } - - describeText(sb, 2, "Status: Node Collectors expected as cluster collector is ready and there are sources") - return true +func printClusterCollectorStatus(analyze *odigos.OdigosAnalyze, sb *strings.Builder) { + describeText(sb, 1, "Cluster Collector:") + printProperty(sb, 2, &analyze.ClusterCollector.Enabled) + printProperty(sb, 2, &analyze.ClusterCollector.CollectorGroup) + printProperty(sb, 2, analyze.ClusterCollector.Deployed) + printProperty(sb, 2, analyze.ClusterCollector.DeployedError) + printProperty(sb, 2, analyze.ClusterCollector.CollectorReady) + printProperty(sb, 2, &analyze.ClusterCollector.DeploymentCreated) + printProperty(sb, 2, analyze.ClusterCollector.ExpectedReplicas) + printProperty(sb, 2, analyze.ClusterCollector.HealthyReplicas) + printProperty(sb, 2, analyze.ClusterCollector.FailedReplicas) + printProperty(sb, 2, analyze.ClusterCollector.FailedReplicasReason) } -func printNodeCollectorStatus(resources *odigos.OdigosResources, sb *strings.Builder) { - +func printNodeCollectorStatus(analyze *odigos.OdigosAnalyze, sb *strings.Builder) { describeText(sb, 1, "Node Collector:") - nodeCollector := resources.NodeCollector - - expectingNodeCollector := printAndCalculateIsNodeCollectorStatus(resources, sb) - - if nodeCollector.CollectorsGroup == nil { - describeText(sb, 2, wrapTextSuccessOfFailure("Collectors Group Not Created", !expectingNodeCollector)) - } else { - describeText(sb, 2, wrapTextSuccessOfFailure("Collectors Group Created", expectingNodeCollector)) - - var deployedCondition *metav1.Condition - for _, condition := range nodeCollector.CollectorsGroup.Status.Conditions { - if condition.Type == "Deployed" { - deployedCondition = &condition - break - } - } - if deployedCondition == nil { - describeText(sb, 2, wrapTextInRed("Deployed: Status Unavailable")) - } else { - if deployedCondition.Status == metav1.ConditionTrue { - describeText(sb, 2, wrapTextInGreen("Deployed: True")) - } else { - describeText(sb, 2, wrapTextInRed("Deployed: False")) - describeText(sb, 2, wrapTextInRed(fmt.Sprintf("Reason: %s", deployedCondition.Message))) - } - } - - ready := nodeCollector.CollectorsGroup.Status.Ready - describeText(sb, 2, wrapTextSuccessOfFailure(fmt.Sprintf("Ready: %t", ready), ready)) - } - - if nodeCollector.DaemonSet == nil { - describeText(sb, 2, wrapTextSuccessOfFailure("DaemonSet: Not Found", !expectingNodeCollector)) - } else { - describeText(sb, 2, wrapTextSuccessOfFailure("DaemonSet: Found", expectingNodeCollector)) - - // this is copied from k8sutils/pkg/describe/describe.go - // I hope the info is accurate since there can be many edge cases - describeText(sb, 2, "Desired Number of Nodes Scheduled: %d", nodeCollector.DaemonSet.Status.DesiredNumberScheduled) - currentMeetsDesired := nodeCollector.DaemonSet.Status.DesiredNumberScheduled == nodeCollector.DaemonSet.Status.CurrentNumberScheduled - describeText(sb, 2, wrapTextSuccessOfFailure(fmt.Sprintf("Current Number of Nodes Scheduled: %d", nodeCollector.DaemonSet.Status.CurrentNumberScheduled), currentMeetsDesired)) - updatedMeetsDesired := nodeCollector.DaemonSet.Status.DesiredNumberScheduled == nodeCollector.DaemonSet.Status.UpdatedNumberScheduled - describeText(sb, 2, wrapTextSuccessOfFailure(fmt.Sprintf("Number of Nodes Scheduled with Up-to-date Pods: %d", nodeCollector.DaemonSet.Status.UpdatedNumberScheduled), updatedMeetsDesired)) - availableMeetsDesired := nodeCollector.DaemonSet.Status.DesiredNumberScheduled == nodeCollector.DaemonSet.Status.NumberAvailable - describeText(sb, 2, wrapTextSuccessOfFailure(fmt.Sprintf("Number of Nodes Scheduled with Available Pods: %d", nodeCollector.DaemonSet.Status.NumberAvailable), availableMeetsDesired)) - noMisscheduled := nodeCollector.DaemonSet.Status.NumberMisscheduled == 0 - describeText(sb, 2, wrapTextSuccessOfFailure(fmt.Sprintf("Number of Nodes Misscheduled: %d", nodeCollector.DaemonSet.Status.NumberMisscheduled), noMisscheduled)) - } + printProperty(sb, 2, &analyze.NodeCollector.Enabled) + printProperty(sb, 2, &analyze.NodeCollector.CollectorGroup) + printProperty(sb, 2, analyze.NodeCollector.Deployed) + printProperty(sb, 2, analyze.NodeCollector.DeployedError) + printProperty(sb, 2, analyze.NodeCollector.CollectorReady) + printProperty(sb, 2, &analyze.NodeCollector.DaemonSet) + printProperty(sb, 2, analyze.NodeCollector.DesiredNodes) + printProperty(sb, 2, analyze.NodeCollector.CurrentNodes) + printProperty(sb, 2, analyze.NodeCollector.UpdatedNodes) + printProperty(sb, 2, analyze.NodeCollector.AvailableNodes) } -func printOdigosPipeline(resources *odigos.OdigosResources, sb *strings.Builder) { +func printOdigosPipeline(analyze *odigos.OdigosAnalyze, sb *strings.Builder) { describeText(sb, 0, "Odigos Pipeline:") - numDestinations := len(resources.Destinations.Items) - numInstrumentationConfigs := len(resources.InstrumentationConfigs.Items) - - describeText(sb, 1, "Status: there are %d sources and %d destinations\n", numInstrumentationConfigs, numDestinations) - printClusterCollectorStatus(resources, sb) + describeText(sb, 1, "Status: there are %d sources and %d destinations\n", analyze.NumberOfSources, analyze.NumberOfDestinations) + printClusterCollectorStatus(analyze, sb) sb.WriteString("\n") - printNodeCollectorStatus(resources, sb) + printNodeCollectorStatus(analyze, sb) } -func printDescribeOdigos(odigosVersion string, resources *odigos.OdigosResources) string { +func DescribeOdigosToText(analyze *odigos.OdigosAnalyze) string { var sb strings.Builder - printOdigosVersion(odigosVersion, &sb) + printProperty(&sb, 0, &analyze.OdigosVersion) sb.WriteString("\n") - printOdigosPipeline(resources, &sb) + printOdigosPipeline(analyze, &sb) return sb.String() } -func DescribeOdigos(ctx context.Context, kubeClient kubernetes.Interface, odigosClient odigosclientset.OdigosV1alpha1Interface, odigosNs string) string { - - odigosVersion, err := getters.GetOdigosVersionInClusterFromConfigMap(ctx, kubeClient, odigosNs) - if err != nil { - return fmt.Sprintf("Error: %v\n", err) - } +func DescribeOdigos(ctx context.Context, kubeClient kubernetes.Interface, odigosClient odigosclientset.OdigosV1alpha1Interface, odigosNs string) (*odigos.OdigosAnalyze, error) { odigosResources, err := odigos.GetRelevantOdigosResources(ctx, kubeClient, odigosClient, odigosNs) if err != nil { - return fmt.Sprintf("Error: %v\n", err) + return nil, err } - return printDescribeOdigos(odigosVersion, odigosResources) + return odigos.AnalyzeOdigos(odigosResources), nil } diff --git a/k8sutils/pkg/describe/odigos/analyze.go b/k8sutils/pkg/describe/odigos/analyze.go new file mode 100644 index 0000000000..2bfaebd4f4 --- /dev/null +++ b/k8sutils/pkg/describe/odigos/analyze.go @@ -0,0 +1,368 @@ +package odigos + +import ( + odigosv1 "github.com/odigos-io/odigos/api/odigos/v1alpha1" + "github.com/odigos-io/odigos/k8sutils/pkg/describe/properties" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type ClusterCollectorAnalyze struct { + Enabled properties.EntityProperty `json:"enabled"` + CollectorGroup properties.EntityProperty `json:"collectorGroup"` + Deployed *properties.EntityProperty `json:"deployed,omitempty"` + DeployedError *properties.EntityProperty `json:"deployedError,omitempty"` + CollectorReady *properties.EntityProperty `json:"collectorReady,omitempty"` + DeploymentCreated properties.EntityProperty `json:"deployment,omitempty"` + ExpectedReplicas *properties.EntityProperty `json:"expectedReplicas,omitempty"` + HealthyReplicas *properties.EntityProperty `json:"healthyReplicas,omitempty"` + FailedReplicas *properties.EntityProperty `json:"failedReplicas,omitempty"` + FailedReplicasReason *properties.EntityProperty `json:"failedReplicasReason,omitempty"` +} + +type NodeCollectorAnalyze struct { + Enabled properties.EntityProperty `json:"enabled"` + CollectorGroup properties.EntityProperty `json:"collectorGroup"` + Deployed *properties.EntityProperty `json:"deployed,omitempty"` + DeployedError *properties.EntityProperty `json:"deployedError,omitempty"` + CollectorReady *properties.EntityProperty `json:"collectorReady,omitempty"` + DaemonSet properties.EntityProperty `json:"daemonSet,omitempty"` + DesiredNodes *properties.EntityProperty `json:"desiredNodes,omitempty"` + CurrentNodes *properties.EntityProperty `json:"currentNodes,omitempty"` + UpdatedNodes *properties.EntityProperty `json:"updatedNodes,omitempty"` + AvailableNodes *properties.EntityProperty `json:"availableNodes,omitempty"` +} + +type OdigosAnalyze struct { + OdigosVersion properties.EntityProperty `json:"odigosVersion"` + NumberOfDestinations int `json:"numberOfDestinations"` + NumberOfSources int `json:"numberOfSources"` + ClusterCollector ClusterCollectorAnalyze `json:"clusterCollector"` + NodeCollector NodeCollectorAnalyze `json:"nodeCollector"` + + // is settled is true if all resources are created and ready + IsSettled bool `json:"isSettled"` + HasErrors bool `json:"hasErrors"` +} + +func analyzeDeployed(cg *odigosv1.CollectorsGroup) (*properties.EntityProperty, *properties.EntityProperty) { + if cg == nil { + return nil, nil + } + + var deployedCondition *metav1.Condition + for _, condition := range cg.Status.Conditions { + if condition.Type == "Deployed" { + deployedCondition = &condition + break + } + } + if deployedCondition == nil { + // scheduler created the cg but autoscaler did not reconcile it yet + return &properties.EntityProperty{ + Name: "Deployed", + Value: false, + Status: properties.PropertyStatusTransitioning, + }, &properties.EntityProperty{ + Name: "Deployed Error", + Value: "waiting for reconciliation", + Status: properties.PropertyStatusTransitioning, + } + } + + if deployedCondition.Status == metav1.ConditionTrue { + // successfully reconciled to collectors deployment + return &properties.EntityProperty{ + Name: "Deployed", + Value: true, + Status: properties.PropertyStatusSuccess, + }, nil + } else { + // had an error during reconciliation to k8s deployment + return &properties.EntityProperty{ + Name: "Deployed", + Value: false, + Status: properties.PropertyStatusError, + }, &properties.EntityProperty{ + Name: "Deployed Error", + Value: deployedCondition.Message, + Status: properties.PropertyStatusError, + } + } +} + +func analyzeCollectorReady(cg *odigosv1.CollectorsGroup) *properties.EntityProperty { + if cg == nil { + return nil + } + + // TODO: ready is true only once deployment is ready + // but there is no difference between deployment starting and deployment failed to start + ready := cg.Status.Ready + + return &properties.EntityProperty{ + Name: "Ready", + Value: ready, + Status: properties.GetSuccessOrTransitioning(ready), + } +} + +func analyzeDeployment(dep *appsv1.Deployment, enabled bool) (properties.EntityProperty, *properties.EntityProperty, int) { + depFound := dep != nil + deployment := properties.EntityProperty{ + Name: "Deployment", + Value: properties.GetTextCreated(depFound), + Status: properties.GetSuccessOrTransitioning(depFound == enabled), + } + if !depFound { + return deployment, nil, 0 + } else { + expectedReplicas := int(*dep.Spec.Replicas) + return deployment, &properties.EntityProperty{ + Name: "Expected Replicas", + Value: expectedReplicas, + }, expectedReplicas + } +} + +func analyzeDaemonSet(ds *appsv1.DaemonSet, enabled bool) properties.EntityProperty { + dsFound := ds != nil + return properties.EntityProperty{ + Name: "DaemonSet", + Value: properties.GetTextCreated(dsFound), + Status: properties.GetSuccessOrTransitioning(dsFound == enabled), + } +} + +func analyzeDsReplicas(ds *appsv1.DaemonSet) (*properties.EntityProperty, *properties.EntityProperty, *properties.EntityProperty, *properties.EntityProperty) { + if ds == nil { + return nil, nil, nil, nil + } + + desiredNodes := int(ds.Status.DesiredNumberScheduled) + currentReplicas := int(ds.Status.CurrentNumberScheduled) + updatedReplicas := int(ds.Status.UpdatedNumberScheduled) + availableNodes := int(ds.Status.NumberAvailable) + return &properties.EntityProperty{ + // The total number of nodes that should be running this daemon. + // Regardless of what is actually running (0, 1, or more), rollouts, failures, etc. + // this number can be less than the number of nodes in the cluster if affinity rules and node selectors are used. + Name: "Desired Nodes", + Value: desiredNodes, + }, &properties.EntityProperty{ + // The number of nodes that are running at least 1 + // daemon pod and are supposed to run the daemon pod. + // if this number is less than the desired number, the daemonset is not fully scheduled. + // it can be due to an active rollout (which is ok), or due to a problem with the nodes / pods + // this prevents the daemonset pod from being scheduled. + Name: "Current Nodes", + Value: currentReplicas, + Status: properties.GetSuccessOrTransitioning(currentReplicas == desiredNodes), + }, &properties.EntityProperty{ + // The number of nodes that are running pods from the latest version of the daemonset and do not have old pods from previous versions. + // if this number is less than the desired number, the daemonset is not fully updated. + // it can be due to an active rollout (which is ok), or due to a problem with the nodes / pods + // this prevents the daemonset pod from being updated. + // this number does not indicate if the pods are indeed running and healthy, only that the only pods scheduled to them is only the latest. + Name: "Updated Nodes", + Value: updatedReplicas, + Status: properties.GetSuccessOrTransitioning(updatedReplicas == desiredNodes), + }, &properties.EntityProperty{ + // available nodes are the nodes for which the oldest pod is ready and available. + // it can count nodes that are running an old version of the daemonset, + // so it alone cannot be used to determine if the daemonset is updated and healthy. + Name: "Available Nodes", + Value: availableNodes, + Status: properties.GetSuccessOrTransitioning(availableNodes == desiredNodes), + } +} + +func analyzePodsHealth(pods *corev1.PodList, expectedReplicas int) (*properties.EntityProperty, *properties.EntityProperty, *properties.EntityProperty) { + if pods == nil { // should not happen, but check just in case + return nil, nil, nil + } + + runningReplicas := 0 + failureReplicas := 0 + var failureText string + for _, pod := range pods.Items { + var condition *corev1.PodCondition + for i := range pod.Status.Conditions { + c := pod.Status.Conditions[i] + if c.Type == corev1.PodReady { + condition = &c + break + } + } + if condition == nil { + failureReplicas++ + } else { + if condition.Status == corev1.ConditionTrue { + runningReplicas++ + } else { + failureReplicas++ + failureText = condition.Message + } + } + } + + healthyReplicas := properties.EntityProperty{ + Name: "Healthy Replicas", + Value: runningReplicas, + Status: properties.GetSuccessOrTransitioning(runningReplicas == expectedReplicas), + } + unhealthyReplicas := properties.EntityProperty{ + Name: "Failed Replicas", + Value: failureReplicas, + Status: properties.GetSuccessOrError(failureReplicas == 0), + } + if failureText == "" { + return &healthyReplicas, &unhealthyReplicas, nil + } else { + return &healthyReplicas, &unhealthyReplicas, &properties.EntityProperty{ + Name: "Failed Replicas Reason", + Value: failureText, + Status: properties.PropertyStatusError, + } + } +} + +func analyzeClusterCollector(resources *OdigosResources) ClusterCollectorAnalyze { + + isEnabled := len(resources.Destinations.Items) > 0 + + enabled := properties.EntityProperty{ + Name: "Enabled", + Value: isEnabled, + // There is no expected state for this property, so not status is set + } + + hasCg := resources.ClusterCollector.CollectorsGroup != nil + cg := properties.EntityProperty{ + Name: "Collector Group", + Value: properties.GetTextCreated(hasCg), + Status: properties.GetSuccessOrTransitioning(hasCg == isEnabled), + } + + deployed, deployedError := analyzeDeployed(resources.ClusterCollector.CollectorsGroup) + ready := analyzeCollectorReady(resources.ClusterCollector.CollectorsGroup) + dep, depExpected, expectedReplicas := analyzeDeployment(resources.ClusterCollector.Deployment, isEnabled) + healthyPodsCount, failedPodsCount, failedPodsReason := analyzePodsHealth(resources.ClusterCollector.LatestRevisionPods, expectedReplicas) + + return ClusterCollectorAnalyze{ + Enabled: enabled, + CollectorGroup: cg, + Deployed: deployed, + DeployedError: deployedError, + CollectorReady: ready, + DeploymentCreated: dep, + ExpectedReplicas: depExpected, + HealthyReplicas: healthyPodsCount, + FailedReplicas: failedPodsCount, + FailedReplicasReason: failedPodsReason, + } +} + +func analyzeNodeCollector(resources *OdigosResources) NodeCollectorAnalyze { + + hasClusterCollector := resources.ClusterCollector.CollectorsGroup != nil + isClusterCollectorReady := hasClusterCollector && resources.ClusterCollector.CollectorsGroup.Status.Ready + hasInstrumentedSources := len(resources.InstrumentationConfigs.Items) > 0 + isEnabled := hasClusterCollector && isClusterCollectorReady && hasInstrumentedSources + + enabled := properties.EntityProperty{ + Name: "Enabled", + Value: isEnabled, + // There is no expected state for this property, so not status is set + } + + hasCg := resources.ClusterCollector.CollectorsGroup != nil + cg := properties.EntityProperty{ + Name: "Collector Group", + Value: properties.GetTextCreated(hasCg), + Status: properties.GetSuccessOrTransitioning(hasCg == isEnabled), + } + + deployed, deployedError := analyzeDeployed(resources.ClusterCollector.CollectorsGroup) + ready := analyzeCollectorReady(resources.ClusterCollector.CollectorsGroup) + ds := analyzeDaemonSet(resources.NodeCollector.DaemonSet, isEnabled) + // TODO: implement our oun pod lister to figure out how many are updated and ready which isn't available in the daemonset status + desiredNodes, currentNodes, updatedNodes, availableNodes := analyzeDsReplicas(resources.NodeCollector.DaemonSet) + + return NodeCollectorAnalyze{ + Enabled: enabled, + CollectorGroup: cg, + Deployed: deployed, + DeployedError: deployedError, + CollectorReady: ready, + DaemonSet: ds, + DesiredNodes: desiredNodes, + CurrentNodes: currentNodes, + UpdatedNodes: updatedNodes, + AvailableNodes: availableNodes, + } +} + +func summarizeStatus(clusterCollector ClusterCollectorAnalyze, nodeCollector NodeCollectorAnalyze) (bool, bool) { + isSettled := true // everything is settled, unless we find property with status transitioning + hasErrors := false // there is no error, unless we find property with status error + + var allProperties = []*properties.EntityProperty{ + &clusterCollector.Enabled, + &clusterCollector.CollectorGroup, + clusterCollector.Deployed, + clusterCollector.DeployedError, + clusterCollector.CollectorReady, + &clusterCollector.DeploymentCreated, + clusterCollector.ExpectedReplicas, + clusterCollector.HealthyReplicas, + clusterCollector.FailedReplicas, + clusterCollector.FailedReplicasReason, + &nodeCollector.Enabled, + &nodeCollector.CollectorGroup, + nodeCollector.Deployed, + nodeCollector.DeployedError, + nodeCollector.CollectorReady, + &nodeCollector.DaemonSet, + nodeCollector.DesiredNodes, + nodeCollector.CurrentNodes, + nodeCollector.UpdatedNodes, + nodeCollector.AvailableNodes, + } + + for _, property := range allProperties { + if property == nil { + continue + } + switch property.Status { + case properties.PropertyStatusError: + hasErrors = true + case properties.PropertyStatusTransitioning: + isSettled = false + } + } + + return isSettled, hasErrors +} + +func AnalyzeOdigos(resources *OdigosResources) *OdigosAnalyze { + clusterCollector := analyzeClusterCollector(resources) + nodeCollector := analyzeNodeCollector(resources) + isSettled, hasErrors := summarizeStatus(clusterCollector, nodeCollector) + odigosVersion := properties.EntityProperty{ + Name: "Odigos Version", + Value: resources.OdigosVersion, + } + + return &OdigosAnalyze{ + OdigosVersion: odigosVersion, + NumberOfDestinations: len(resources.Destinations.Items), + NumberOfSources: len(resources.InstrumentationConfigs.Items), + ClusterCollector: clusterCollector, + NodeCollector: nodeCollector, + + IsSettled: isSettled, + HasErrors: hasErrors, + } +} diff --git a/k8sutils/pkg/describe/odigos/resources.go b/k8sutils/pkg/describe/odigos/resources.go index db6fc389f9..e0080befab 100644 --- a/k8sutils/pkg/describe/odigos/resources.go +++ b/k8sutils/pkg/describe/odigos/resources.go @@ -7,6 +7,7 @@ import ( odigosclientset "github.com/odigos-io/odigos/api/generated/odigos/clientset/versioned/typed/odigos/v1alpha1" odigosv1 "github.com/odigos-io/odigos/api/odigos/v1alpha1" "github.com/odigos-io/odigos/k8sutils/pkg/consts" + "github.com/odigos-io/odigos/k8sutils/pkg/getters" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -26,6 +27,7 @@ type NodeCollectorResources struct { } type OdigosResources struct { + OdigosVersion string ClusterCollector ClusterCollectorResources NodeCollector NodeCollectorResources Destinations *odigosv1.DestinationList @@ -115,6 +117,12 @@ func GetRelevantOdigosResources(ctx context.Context, kubeClient kubernetes.Inter odigos := OdigosResources{} + odigosVersion, err := getters.GetOdigosVersionInClusterFromConfigMap(ctx, kubeClient, odigosNs) + if err != nil { + return nil, err + } + odigos.OdigosVersion = odigosVersion + cc, err := getClusterCollectorResources(ctx, kubeClient, odigosClient, odigosNs) if err != nil { return nil, err diff --git a/k8sutils/pkg/describe/properties/properties.go b/k8sutils/pkg/describe/properties/properties.go new file mode 100644 index 0000000000..f6ef4d6574 --- /dev/null +++ b/k8sutils/pkg/describe/properties/properties.go @@ -0,0 +1,28 @@ +package properties + +type PropertyStatus string + +const ( + + // the property is in it's desired state + PropertyStatusSuccess PropertyStatus = "success" + + // the property is not in it's desired state, but it's state might be temporary + // if wait some time, it might reconcile to the desired state (or not) + PropertyStatusTransitioning PropertyStatus = "transitioning" + + // the property is not in it's desired state, and it's state is not expected to change + PropertyStatusError PropertyStatus = "error" +) + +type EntityProperty struct { + + // The name of the property being described + Name string `json:"name"` + + // The value to display for this property + Value interface{} `json:"value"` + + // The status of the property actual state + Status PropertyStatus `json:"status,omitempty"` +} diff --git a/k8sutils/pkg/describe/properties/utils.go b/k8sutils/pkg/describe/properties/utils.go new file mode 100644 index 0000000000..b182f73672 --- /dev/null +++ b/k8sutils/pkg/describe/properties/utils.go @@ -0,0 +1,25 @@ +package properties + +func GetTextCreated(created bool) string { + if created { + return "created" + } else { + return "not created" + } +} + +func GetSuccessOrTransitioning(matchExpected bool) PropertyStatus { + if matchExpected { + return PropertyStatusSuccess + } else { + return PropertyStatusTransitioning + } +} + +func GetSuccessOrError(matchExpected bool) PropertyStatus { + if matchExpected { + return PropertyStatusSuccess + } else { + return PropertyStatusError + } +}