Skip to content

Commit

Permalink
feat: odigos describe command (#1595)
Browse files Browse the repository at this point in the history
Changes:

- Add `odigos describe` command to describe the general status of odigos
itself. Currently implemented Cluster Collector Information
- Add conditions to collectors group status to record if the collectors
deployment failed to succeed. For example - this can happen in the
present of validation webhook that rejects the deployment if the image
is not signed according to some requirements
- move some code from `cli` to `k8sutils` to make it available for all
modules in the code.
- some cleanups on logging, so if the deployment fails to succeed, it is
only printed once to autoscaler logs
  • Loading branch information
blumamir authored Oct 17, 2024
1 parent 86f3e41 commit 06f421a
Show file tree
Hide file tree
Showing 25 changed files with 548 additions and 106 deletions.
62 changes: 62 additions & 0 deletions api/config/crd/bases/odigos.io_collectorsgroups.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,68 @@ spec:
status:
description: CollectorsGroupStatus defines the observed state of Collector
properties:
conditions:
description: |-
Represents the observations of a collectorsroup's current state.
Known .status.conditions.type are: "Available", "Progressing"
items:
description: Condition contains details for one aspect of the current
state of this API Resource.
properties:
lastTransitionTime:
description: |-
lastTransitionTime is the last time the condition transitioned from one status to another.
This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
format: date-time
type: string
message:
description: |-
message is a human readable message indicating details about the transition.
This may be an empty string.
maxLength: 32768
type: string
observedGeneration:
description: |-
observedGeneration represents the .metadata.generation that the condition was set based upon.
For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
with respect to the current state of the instance.
format: int64
minimum: 0
type: integer
reason:
description: |-
reason contains a programmatic identifier indicating the reason for the condition's last transition.
Producers of specific condition types may define expected values and meanings for this field,
and whether the values are considered a guaranteed API.
The value should be a CamelCase string.
This field may not be empty.
maxLength: 1024
minLength: 1
pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
type: string
status:
description: status of the condition, one of True, False, Unknown.
enum:
- "True"
- "False"
- Unknown
type: string
type:
description: type of condition in CamelCase or in foo.example.com/CamelCase.
maxLength: 316
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
type: string
required:
- lastTransitionTime
- message
- reason
- status
- type
type: object
type: array
x-kubernetes-list-map-keys:
- type
x-kubernetes-list-type: map
ready:
type: boolean
receiverSignals:
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions api/odigos/v1alpha1/collectorsgroup_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ type CollectorsGroupStatus struct {
// this is used to determine if a workload should export each signal or not.
// this list is calculated based on the odigos destinations that were configured
ReceiverSignals []common.ObservabilitySignal `json:"receiverSignals,omitempty"`

// Represents the observations of a collectorsroup's current state.
// Known .status.conditions.type are: "Available", "Progressing"
// +patchMergeKey=type
// +patchStrategy=merge
// +listType=map
// +listMapKey=type
Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type" protobuf:"bytes,1,rep,name=conditions"`
}

//+genclient
Expand Down
19 changes: 13 additions & 6 deletions api/odigos/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

55 changes: 22 additions & 33 deletions autoscaler/controllers/gateway/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import (

"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"

"errors"

"github.com/odigos-io/odigos/autoscaler/utils"
"github.com/odigos-io/odigos/k8sutils/pkg/consts"

Expand Down Expand Up @@ -37,49 +39,37 @@ func syncDeployment(dests *odigosv1.DestinationList, gateway *odigosv1.Collector

secretsVersionHash, err := destinationsSecretsVersionsHash(ctx, c, dests)
if err != nil {
logger.Error(err, "Failed to get secrets hash")
return nil, err
return nil, errors.Join(err, errors.New("failed to get secrets hash"))
}

// Calculate the hash of the config data and the secrets version hash, this is used to make sure the gateway will restart when the config changes
configDataHash := common.Sha256Hash(fmt.Sprintf("%s-%s", configData, secretsVersionHash))
desiredDeployment, err := getDesiredDeployment(dests, configDataHash, gateway, scheme, imagePullSecrets, odigosVersion, memConfig)
if err != nil {
logger.Error(err, "Failed to get desired deployment")
return nil, err
return nil, errors.Join(err, errors.New("failed to get desired deployment"))
}

existing := &appsv1.Deployment{}
if err := c.Get(ctx, client.ObjectKey{Name: gateway.Name, Namespace: gateway.Namespace}, existing); err != nil {
if apierrors.IsNotFound(err) {
logger.V(0).Info("Creating deployment")
newDeployment, err := createDeployment(desiredDeployment, ctx, c)
if err != nil {
logger.Error(err, "failed to create deployment")
return nil, err
}
return newDeployment, nil
} else {
logger.Error(err, "failed to get deployment")
return nil, err
}
existingDeployment := &appsv1.Deployment{}
getError := c.Get(ctx, client.ObjectKey{Name: gateway.Name, Namespace: gateway.Namespace}, existingDeployment)
if getError != nil && !apierrors.IsNotFound(getError) {
return nil, errors.Join(getError, errors.New("failed to get gateway deployment"))
}

logger.V(0).Info("Patching deployment")
newDep, err := patchDeployment(existing, desiredDeployment, ctx, c)
if err != nil {
logger.Error(err, "failed to patch deployment")
return nil, err
}

return newDep, nil
}

func createDeployment(desired *appsv1.Deployment, ctx context.Context, c client.Client) (*appsv1.Deployment, error) {
if err := c.Create(ctx, desired); err != nil {
return nil, err
if apierrors.IsNotFound(getError) {
logger.V(0).Info("Creating new gateway deployment")
err := c.Create(ctx, desiredDeployment)
if err != nil {
return nil, errors.Join(err, errors.New("failed to create gateway deployment"))
}
return desiredDeployment, nil
} else {
logger.V(0).Info("Patching existing gateway deployment")
newDep, err := patchDeployment(existingDeployment, desiredDeployment, ctx, c)
if err != nil {
return nil, errors.Join(err, errors.New("failed to patch gateway deployment"))
}
return newDep, nil
}
return desired, nil
}

func patchDeployment(existing *appsv1.Deployment, desired *appsv1.Deployment, ctx context.Context, c client.Client) (*appsv1.Deployment, error) {
Expand All @@ -90,7 +80,6 @@ func patchDeployment(existing *appsv1.Deployment, desired *appsv1.Deployment, ct
})

if err != nil {
logger.Error(err, "Failed to patch deployment")
return nil, err
}

Expand Down
50 changes: 49 additions & 1 deletion autoscaler/controllers/gateway/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package gateway

import (
"context"
"encoding/json"
"time"

appsv1 "k8s.io/api/apps/v1"

Expand All @@ -11,6 +13,7 @@ import (
k8sconsts "github.com/odigos-io/odigos/k8sutils/pkg/consts"
"github.com/odigos-io/odigos/k8sutils/pkg/env"
"github.com/odigos-io/odigos/k8sutils/pkg/utils"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -23,13 +26,51 @@ var (
}
)

func getCollectorsGroupDeployedConditionsPatch(err error) string {

status := metav1.ConditionTrue
if err != nil {
status = metav1.ConditionFalse
}

message := "Gateway collector is deployed in the cluster"
if err != nil {
message = err.Error()
}

reason := "GatewayDeployedCreatedSuccessfully"
if err != nil {
// in the future, we can be more specific and break it down to
// more detailed reasons about what exactly failed
reason = "GatewayDeployedCreationFailed"
}

patch := map[string]interface{}{
"status": map[string]interface{}{
"conditions": []metav1.Condition{{
Type: "Deployed",
Status: status,
Reason: reason,
Message: message,
LastTransitionTime: metav1.NewTime(time.Now()),
}},
},
}

patchData, _ := json.Marshal(patch)
// marshal error is ignored as it is not expected to happen
return string(patchData)
}

func Sync(ctx context.Context, k8sClient client.Client, scheme *runtime.Scheme, imagePullSecrets []string, odigosVersion string) error {
logger := log.FromContext(ctx)

odigosNs := env.GetCurrentNamespace()
var gatewayCollectorGroup odigosv1.CollectorsGroup
err := k8sClient.Get(ctx, client.ObjectKey{Namespace: odigosNs, Name: k8sconsts.OdigosClusterCollectorConfigMapName}, &gatewayCollectorGroup)
if err != nil {
// collectors group is created by the scheduler, after the first destination is added.
// it is however possible that some reconciler (like deployment) triggered and the collectors group will be created shortly.
return client.IgnoreNotFound(err)
}

Expand All @@ -53,7 +94,14 @@ func Sync(ctx context.Context, k8sClient client.Client, scheme *runtime.Scheme,
return err
}

return syncGateway(&dests, &processors, &gatewayCollectorGroup, ctx, k8sClient, scheme, imagePullSecrets, odigosVersion, &odigosConfig)
err = syncGateway(&dests, &processors, &gatewayCollectorGroup, ctx, k8sClient, scheme, imagePullSecrets, odigosVersion, &odigosConfig)
statusPatchString := getCollectorsGroupDeployedConditionsPatch(err)
statusErr := k8sClient.Status().Patch(ctx, &gatewayCollectorGroup, client.RawPatch(types.MergePatchType, []byte(statusPatchString)))
if statusErr != nil {
logger.Error(statusErr, "Failed to patch collectors group status")
// just log the error, do not fail the reconciliation
}
return err
}

func syncGateway(dests *odigosv1.DestinationList, processors *odigosv1.ProcessorList,
Expand Down
Loading

0 comments on commit 06f421a

Please sign in to comment.