Skip to content

Commit

Permalink
Add work probe mode (#77)
Browse files Browse the repository at this point in the history
* Add work probe mode

Signed-off-by: Jian Qiu <jqiu@redhat.com>

* add a deployment prober

1. add more test cases
2. add deployment prober

Signed-off-by: Jian Qiu <jqiu@redhat.com>

* Resolve comments

Signed-off-by: Jian Qiu <jqiu@redhat.com>
  • Loading branch information
qiujian16 committed Mar 8, 2022
1 parent f3917ca commit df7ea69
Show file tree
Hide file tree
Showing 12 changed files with 834 additions and 41 deletions.
7 changes: 7 additions & 0 deletions pkg/addonmanager/constants/constants.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package constants

import "fmt"

const (
// AddonLabel is the label for addon
AddonLabel = "open-cluster-management.io/addon-name"
Expand All @@ -13,3 +15,8 @@ const (
// PreDeleteHookFinalizer is the finalizer for an addon which has deployed hook objects
PreDeleteHookFinalizer = "cluster.open-cluster-management.io/addon-pre-delete"
)

// DeployWorkName return the name of work for the addon
func DeployWorkName(addonName string) string {
return fmt.Sprintf("addon-%s-deploy", addonName)
}
208 changes: 185 additions & 23 deletions pkg/addonmanager/controllers/addonhealthcheck/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,38 +6,46 @@ import (

"github.com/openshift/library-go/pkg/controller/factory"
"github.com/openshift/library-go/pkg/operator/events"
"k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
"open-cluster-management.io/addon-framework/pkg/addonmanager/constants"
"open-cluster-management.io/addon-framework/pkg/agent"
addonapiv1alpha1 "open-cluster-management.io/api/addon/v1alpha1"
addonv1alpha1client "open-cluster-management.io/api/client/addon/clientset/versioned"
addoninformerv1alpha1 "open-cluster-management.io/api/client/addon/informers/externalversions/addon/v1alpha1"
addonlisterv1alpha1 "open-cluster-management.io/api/client/addon/listers/addon/v1alpha1"
workinformers "open-cluster-management.io/api/client/work/informers/externalversions/work/v1"
worklister "open-cluster-management.io/api/client/work/listers/work/v1"
workapiv1 "open-cluster-management.io/api/work/v1"
)

// addonHealthCheckController reconciles instances of ManagedClusterAddon on the hub.
type addonHealthCheckController struct {
addonClient addonv1alpha1client.Interface
managedClusterAddonLister addonlisterv1alpha1.ManagedClusterAddOnLister
workLister worklister.ManifestWorkLister
agentAddons map[string]agent.AgentAddon
eventRecorder events.Recorder
}

func NewAddonHealthCheckController(
addonClient addonv1alpha1client.Interface,
addonInformers addoninformerv1alpha1.ManagedClusterAddOnInformer,
workInformers workinformers.ManifestWorkInformer,
agentAddons map[string]agent.AgentAddon,
recorder events.Recorder,
) factory.Controller {
c := &addonHealthCheckController{
addonClient: addonClient,
managedClusterAddonLister: addonInformers.Lister(),
workLister: workInformers.Lister(),
agentAddons: agentAddons,
eventRecorder: recorder.WithComponentSuffix(fmt.Sprintf("addon-healthcheck-controller")),
eventRecorder: recorder.WithComponentSuffix("addon-healthcheck-controller"),
}

return factory.New().WithFilteredEventsInformersQueueKeyFunc(
Expand All @@ -53,8 +61,34 @@ func NewAddonHealthCheckController(
return true
},
addonInformers.Informer()).
WithFilteredEventsInformersQueueKeyFunc(
func(obj runtime.Object) string {
accessor, _ := meta.Accessor(obj)
return fmt.Sprintf("%s/%s", accessor.GetNamespace(), accessor.GetLabels()[constants.AddonLabel])
},
func(obj interface{}) bool {
accessor, _ := meta.Accessor(obj)
if accessor.GetLabels() == nil {
return false
}

addonName, ok := accessor.GetLabels()[constants.AddonLabel]
if !ok {
return false
}

if _, ok := c.agentAddons[addonName]; !ok {
return false
}
if accessor.GetName() != constants.DeployWorkName(addonName) {
return false
}
return true
},
workInformers.Informer(),
).
WithSync(c.sync).
ToController(fmt.Sprintf("addon-healthcheck-controller"), recorder)
ToController("addon-healthcheck-controller", recorder)
}

func (c *addonHealthCheckController) sync(ctx context.Context, syncCtx factory.SyncContext) error {
Expand All @@ -73,44 +107,172 @@ func (c *addonHealthCheckController) sync(ctx context.Context, syncCtx factory.S
return err
}

for addonName, addon := range c.agentAddons {
if addon.GetAgentAddonOptions().HealthProber == nil {
continue
}
return c.syncAddonHealthChecker(ctx, managedClusterAddon, addonName, clusterName)
agentAddon := c.agentAddons[addonName]
if agentAddon == nil {
return nil
}

return nil
return c.syncAddonHealthChecker(ctx, managedClusterAddon, agentAddon)
}

func (c *addonHealthCheckController) syncAddonHealthChecker(ctx context.Context, addon *addonapiv1alpha1.ManagedClusterAddOn, addonName, clusterName string) error {
func (c *addonHealthCheckController) syncAddonHealthChecker(ctx context.Context, addon *addonapiv1alpha1.ManagedClusterAddOn, agentAddon agent.AgentAddon) error {
// for in-place edit
addon = addon.DeepCopy()
// reconcile health check mode
var expectedHealthCheckMode addonapiv1alpha1.HealthCheckMode
agentAddon := c.agentAddons[addonName]
if agentAddon != nil && agentAddon.GetAgentAddonOptions().HealthProber != nil {
switch c.agentAddons[addonName].GetAgentAddonOptions().HealthProber.Type {
// TODO(yue9944882): implement work api health checker
//case agent.HealthProberTypeWork:
//fallthrough
case agent.HealthProberTypeNone:
expectedHealthCheckMode = addonapiv1alpha1.HealthCheckModeCustomized
case agent.HealthProberTypeLease:
fallthrough
default:
expectedHealthCheckMode = addonapiv1alpha1.HealthCheckModeLease
}

if agentAddon.GetAgentAddonOptions().HealthProber == nil {
return nil
}

switch agentAddon.GetAgentAddonOptions().HealthProber.Type {
case agent.HealthProberTypeWork:
fallthrough
case agent.HealthProberTypeNone:
expectedHealthCheckMode = addonapiv1alpha1.HealthCheckModeCustomized
case agent.HealthProberTypeLease:
fallthrough
default:
expectedHealthCheckMode = addonapiv1alpha1.HealthCheckModeLease
}

if expectedHealthCheckMode != addon.Status.HealthCheck.Mode {
addon.Status.HealthCheck.Mode = expectedHealthCheckMode
c.eventRecorder.Eventf("HealthCheckModeUpdated", "Updated health check mode to %s", expectedHealthCheckMode)
_, err := c.addonClient.AddonV1alpha1().ManagedClusterAddOns(clusterName).
_, err := c.addonClient.AddonV1alpha1().ManagedClusterAddOns(addon.Namespace).
UpdateStatus(ctx, addon, metav1.UpdateOptions{})
if err != nil {
return err
}
}

return c.probeAddonStatus(ctx, addon, agentAddon)
}

func (c *addonHealthCheckController) probeAddonStatus(ctx context.Context, addon *addonapiv1alpha1.ManagedClusterAddOn, agentAddon agent.AgentAddon) error {
if agentAddon.GetAgentAddonOptions().HealthProber == nil {
return nil
}

if agentAddon.GetAgentAddonOptions().HealthProber.Type != agent.HealthProberTypeWork {
return nil
}

addonWork, err := c.workLister.ManifestWorks(addon.Namespace).Get(constants.DeployWorkName(addon.Name))
if err != nil {
cond := metav1.Condition{
Type: "Available",
Status: metav1.ConditionUnknown,
Reason: "WorkNotFound",
Message: "Work for addon is not found",
}
return c.updateConditions(ctx, addon, cond)
}

// Check the overall work available condition at first.
workCond := meta.FindStatusCondition(addonWork.Status.Conditions, workapiv1.WorkAvailable)
switch {
case workCond == nil:
cond := metav1.Condition{
Type: "Available",
Status: metav1.ConditionUnknown,
Reason: "WorkNotApplied",
Message: "Work is not applied yet",
}
return c.updateConditions(ctx, addon, cond)
case workCond.Status == metav1.ConditionFalse:
cond := metav1.Condition{
Type: "Available",
Status: metav1.ConditionFalse,
Reason: "WorkApplyFailed",
Message: workCond.Message,
}
return c.updateConditions(ctx, addon, cond)
}

if agentAddon.GetAgentAddonOptions().HealthProber.WorkProber == nil {
cond := metav1.Condition{
Type: "Available",
Status: metav1.ConditionTrue,
Reason: "WorkApplied",
Message: "Addon work is applied",
}
return c.updateConditions(ctx, addon, cond)
}

probeFields := agentAddon.GetAgentAddonOptions().HealthProber.WorkProber.ProbeFields

for _, field := range probeFields {
result := findResultByIdentifier(field.ResourceIdentifier, addonWork)
// if no results are returned. it is possible that work agent has not returned the feedback value.
// mark condition to unknown
if result == nil {
cond := metav1.Condition{
Type: "Available",
Status: metav1.ConditionUnknown,
Reason: "NoProbeResult",
Message: "Probe results are not returned",
}
return c.updateConditions(ctx, addon, cond)
}

err := agentAddon.GetAgentAddonOptions().HealthProber.WorkProber.HealthCheck(field.ResourceIdentifier, *result)
if err != nil {
cond := metav1.Condition{
Type: "Available",
Status: metav1.ConditionFalse,
Reason: "ProbeUnavailable",
Message: fmt.Sprintf("Probe addon unavailable with err %v", err),
}
return c.updateConditions(ctx, addon, cond)
}
}

cond := metav1.Condition{
Type: "Available",
Status: metav1.ConditionTrue,
Reason: "ProbeAvailable",
Message: "Addon is available",
}
return c.updateConditions(ctx, addon, cond)
}

func (c *addonHealthCheckController) updateConditions(ctx context.Context, addon *addonapiv1alpha1.ManagedClusterAddOn, conds ...metav1.Condition) error {
addonCopy := addon.DeepCopy()

for _, cond := range conds {
meta.SetStatusCondition(&addonCopy.Status.Conditions, cond)
}

if equality.Semantic.DeepEqual(addon.Status.Conditions, addonCopy.Status.Conditions) {
return nil
}

_, err := c.addonClient.AddonV1alpha1().ManagedClusterAddOns(addonCopy.Namespace).UpdateStatus(ctx, addonCopy, metav1.UpdateOptions{})
return err
}

func findResultByIdentifier(identifier workapiv1.ResourceIdentifier, work *workapiv1.ManifestWork) *workapiv1.StatusFeedbackResult {
for _, status := range work.Status.ResourceStatus.Manifests {
if identifier.Group != status.ResourceMeta.Group {
continue
}
if identifier.Resource != status.ResourceMeta.Resource {
continue
}
if identifier.Name != status.ResourceMeta.Name {
continue
}
if identifier.Namespace != status.ResourceMeta.Namespace {
continue
}

if len(status.StatusFeedbacks.Values) == 0 {
return nil
}

return &status.StatusFeedbacks
}

return nil
}
Loading

0 comments on commit df7ea69

Please sign in to comment.