Skip to content

Commit

Permalink
Trigger status-sync reconciles when spec-sync sees mismatching status
Browse files Browse the repository at this point in the history
Since status-sync watches the managed cluster's replicated policies on
the managed cluster (not the hub), it has a blind spot for when the
hub's status doesn't match and nothing triggers a status-sync reconcile
(e.g. new compliance event or policy update). Since spec-sync is already
watching the managed cluster's replicated policies on the hub, it can do
the work of detecting the status mismatch on the hub and managed
cluster, and trigger a status-sync reconcile to have the status-sync
controller fix it.

Relates:
https://issues.redhat.com/browse/ACM-12447

Signed-off-by: mprahl <mprahl@users.noreply.github.com>
  • Loading branch information
mprahl authored and openshift-merge-bot[bot] committed Jul 2, 2024
1 parent c2681fe commit de1345a
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 8 deletions.
8 changes: 8 additions & 0 deletions controllers/specsync/policy_spec_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"time"

"k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand All @@ -18,6 +19,7 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
Expand Down Expand Up @@ -59,6 +61,8 @@ type PolicyReconciler struct {
// The namespace that the replicated policies should be synced to.
TargetNamespace string
ConcurrentReconciles int
// StatusSyncRequests triggers status-sync controller reconciles based on what is observed on the hub
StatusSyncRequests chan<- event.GenericEvent
}

//+kubebuilder:rbac:groups=policy.open-cluster-management.io,resources=policies,verbs=create;delete;get;list;patch;update;watch
Expand Down Expand Up @@ -173,6 +177,10 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ
r.ManagedRecorder.Event(managedPlc, "Normal", "PolicySpecSync",
fmt.Sprintf("Policy %s was updated in cluster namespace %s", instance.GetName(),
r.TargetNamespace))
} else if !equality.Semantic.DeepEqual(instance.Status, managedPlc.Status) {
reqLogger.Info("Policy status does not match on the hub. Triggering the status-sync to update it.")

r.StatusSyncRequests <- event.GenericEvent{Object: managedPlc}
}

reqLogger.V(2).Info("Reconciliation complete.")
Expand Down
14 changes: 10 additions & 4 deletions controllers/statussync/policy_status_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"

"open-cluster-management.io/governance-policy-framework-addon/controllers/uninstall"
"open-cluster-management.io/governance-policy-framework-addon/controllers/utils"
Expand All @@ -61,17 +62,22 @@ var (
)

// SetupWithManager sets up the controller with the Manager.
func (r *PolicyReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
func (r *PolicyReconciler) SetupWithManager(mgr ctrl.Manager, additionalSource *source.Channel) error {
builder := ctrl.NewControllerManagedBy(mgr).
For(&policiesv1.Policy{}).
Watches(
&corev1.Event{},
handler.EnqueueRequestsFromMapFunc(eventMapper),
builder.WithPredicates(eventPredicateFuncs),
).
WithOptions(controller.Options{MaxConcurrentReconciles: r.ConcurrentReconciles}).
Named(ControllerName).
Complete(r)
Named(ControllerName)

if additionalSource != nil {
builder = builder.WatchesRawSource(additionalSource, &handler.EnqueueRequestForObject{})
}

return builder.Complete(r)
}

// blank assignment to verify that ReconcilePolicy implements reconcile.Reconciler
Expand Down
11 changes: 10 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,8 @@ func addControllers(
var hubClient client.Client
var specSyncRequests chan event.GenericEvent
var specSyncRequestsSource *source.Channel
var statusSyncRequests chan event.GenericEvent
var statusSyncRequestsSource *source.Channel

if hubMgr == nil {
hubCache, err := cache.New(hubCfg,
Expand Down Expand Up @@ -830,6 +832,12 @@ func addControllers(
DestBufferSize: bufferSize,
}

statusSyncRequests = make(chan event.GenericEvent, bufferSize)
statusSyncRequestsSource = &source.Channel{
Source: statusSyncRequests,
DestBufferSize: bufferSize,
}

hubClient = hubMgr.GetClient()
}

Expand All @@ -852,7 +860,7 @@ func addControllers(
ConcurrentReconciles: int(tool.Options.EvaluationConcurrency),
EventsQueue: queue,
SpecSyncRequests: specSyncRequests,
}).SetupWithManager(managedMgr); err != nil {
}).SetupWithManager(managedMgr, statusSyncRequestsSource); err != nil {
log.Error(err, "unable to create controller", "controller", "Policy")
os.Exit(1)
}
Expand Down Expand Up @@ -917,6 +925,7 @@ func addControllers(
Scheme: hubMgr.GetScheme(),
TargetNamespace: tool.Options.ClusterNamespace,
ConcurrentReconciles: int(tool.Options.EvaluationConcurrency),
StatusSyncRequests: statusSyncRequests,
}).SetupWithManager(hubMgr, specSyncRequestsSource); err != nil {
log.Error(err, "Unable to create the controller", "controller", specsync.ControllerName)
os.Exit(1)
Expand Down
30 changes: 27 additions & 3 deletions test/e2e/case2_status_sync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
policiesv1 "open-cluster-management.io/governance-policy-propagator/api/v1"
"open-cluster-management.io/governance-policy-propagator/test/utils"
Expand Down Expand Up @@ -117,7 +118,7 @@ var _ = Describe("Test status sync", func() {
return hubPlc.Object["status"]
}, defaultTimeoutSeconds, 1).Should(Equal(managedPlc.Object["status"]))
})
It("Should set status to Compliant again", func() {
It("Should set status to Compliant again", func(ctx SpecContext) {
By("Generating an compliant event on the policy")
managedPlc := utils.GetWithTimeout(
clientManagedDynamic,
Expand Down Expand Up @@ -150,9 +151,12 @@ var _ = Describe("Test status sync", func() {
Expect((plc.Status.Details)).To(HaveLen(1))
Expect((plc.Status.Details[0].History)).To(HaveLen(1))
Expect(plc.Status.Details[0].TemplateMeta.GetName()).To(Equal("case2-test-policy-configurationpolicy"))
By("Checking if hub policy status is in sync")

By("Checking if the hub policy status is in sync")
var hubPlc *unstructured.Unstructured

Eventually(func() interface{} {
hubPlc := utils.GetWithTimeout(
hubPlc = utils.GetWithTimeout(
clientHubDynamic,
gvrPolicy,
case2PolicyName,
Expand All @@ -162,6 +166,26 @@ var _ = Describe("Test status sync", func() {

return hubPlc.Object["status"]
}, defaultTimeoutSeconds, 1).Should(Equal(managedPlc.Object["status"]))

By("Clearing the hub status to verify it gets recovered")
delete(hubPlc.Object, "status")
_, err = clientHubDynamic.Resource(gvrPolicy).Namespace(clusterNamespaceOnHub).UpdateStatus(
ctx, hubPlc, metav1.UpdateOptions{},
)
Expect(err).ShouldNot(HaveOccurred())

Eventually(func() interface{} {
hubPlc = utils.GetWithTimeout(
clientHubDynamic,
gvrPolicy,
case2PolicyName,
clusterNamespaceOnHub,
true,
defaultTimeoutSeconds)

return hubPlc.Object["status"]
}, defaultTimeoutSeconds, 1).Should(Equal(managedPlc.Object["status"]))

By("clean up all events")
_, err = kubectlManaged("delete", "events", "-n", clusterNamespace, "--all")
Expect(err).ShouldNot(HaveOccurred())
Expand Down

0 comments on commit de1345a

Please sign in to comment.