diff --git a/incubator/virtualcluster/pkg/syncer/metrics/metrics.go b/incubator/virtualcluster/pkg/syncer/metrics/metrics.go index f2e83294a..c9695a60c 100644 --- a/incubator/virtualcluster/pkg/syncer/metrics/metrics.go +++ b/incubator/virtualcluster/pkg/syncer/metrics/metrics.go @@ -28,6 +28,8 @@ const ( PodOperationsKey = "pod_operations_total" PodOperationsDurationKey = "pod_operations_duration_seconds" PodOperationsErrorsKey = "pod_operations_errors_total" + CheckerMissMatchKey = "checker_missmatch_count" + CheckerRemedyKey = "checker_remedy_count" ) var ( @@ -56,6 +58,22 @@ var ( }, []string{"operation_type"}, ) + CheckerMissMatchStats = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Subsystem: ResourceSyncerSubsystem, + Name: CheckerMissMatchKey, + Help: "Last checker scan results for mismatched resources.", + }, + []string{"counter_name"}, + ) + CheckerRemedyStats = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: ResourceSyncerSubsystem, + Name: CheckerRemedyKey, + Help: "Cumulative number of checker remediation actions.", + }, + []string{"counter_name"}, + ) ) var registerMetrics sync.Once @@ -66,6 +84,8 @@ func Register() { prometheus.MustRegister(PodOperations) prometheus.MustRegister(PodOperationsDuration) prometheus.MustRegister(PodOperationsErrors) + prometheus.MustRegister(CheckerMissMatchStats) + prometheus.MustRegister(CheckerRemedyStats) }) } diff --git a/incubator/virtualcluster/pkg/syncer/resources/configmap/checker.go b/incubator/virtualcluster/pkg/syncer/resources/configmap/checker.go index a55862049..2c695887b 100644 --- a/incubator/virtualcluster/pkg/syncer/resources/configmap/checker.go +++ b/incubator/virtualcluster/pkg/syncer/resources/configmap/checker.go @@ -18,6 +18,7 @@ package configmap import ( "fmt" "sync" + "sync/atomic" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -29,9 +30,12 @@ import ( "k8s.io/klog" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/conversion" + "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/metrics" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/reconciler" ) +var numMissMatchedConfigMaps uint64 + // StartPeriodChecker starts the period checker for data consistency check. Checker is // blocking so should be called via a goroutine. func (c *controller) StartPeriodChecker(stopCh <-chan struct{}) error { @@ -58,6 +62,7 @@ func (c *controller) checkConfigMaps() { } wg := sync.WaitGroup{} + numMissMatchedConfigMaps = 0 for _, clusterName := range clusterNames { wg.Add(1) @@ -88,12 +93,16 @@ func (c *controller) checkConfigMaps() { deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(pConfigMap.UID)) if err = c.configMapClient.ConfigMaps(pConfigMap.Namespace).Delete(pConfigMap.Name, deleteOptions); err != nil { klog.Errorf("error deleting pConfigMap %v/%v in super master: %v", pConfigMap.Namespace, pConfigMap.Name, err) + } else { + metrics.CheckerRemedyStats.WithLabelValues("numDeletedOrphanSuperMasterConfigMaps").Inc() } continue } klog.Errorf("error getting vConfigMap %s/%s from cluster %s cache: %v", vNamespace, pConfigMap.Name, clusterName, err) } } + + metrics.CheckerMissMatchStats.WithLabelValues("numMissMatchedConfigMaps").Set(float64(numMissMatchedConfigMaps)) } // checkConfigMapsOfTenantCluster checks to see if configmaps in specific cluster keeps consistency. @@ -112,6 +121,8 @@ func (c *controller) checkConfigMapsOfTenantCluster(clusterName string) { // pConfigMap not found and vConfigMap still exists, we need to create pConfigMap again if err := c.multiClusterConfigMapController.RequeueObject(clusterName, &configMapList.Items[i], reconciler.AddEvent); err != nil { klog.Errorf("error requeue vConfigMap %v/%v in cluster %s: %v", vConfigMap.Namespace, vConfigMap.Name, clusterName, err) + } else { + metrics.CheckerRemedyStats.WithLabelValues("numRequeuedTenantConfigMaps").Inc() } continue } @@ -127,6 +138,7 @@ func (c *controller) checkConfigMapsOfTenantCluster(clusterName string) { } updated := conversion.Equality(spec).CheckConfigMapEquality(pConfigMap, &vConfigMap) if updated != nil { + atomic.AddUint64(&numMissMatchedConfigMaps, 1) klog.Warningf("ConfigMap %v/%v diff in super&tenant master", vConfigMap.Namespace, vConfigMap.Name) } } diff --git a/incubator/virtualcluster/pkg/syncer/resources/endpoints/checker.go b/incubator/virtualcluster/pkg/syncer/resources/endpoints/checker.go index 34998d26c..3cf975b39 100644 --- a/incubator/virtualcluster/pkg/syncer/resources/endpoints/checker.go +++ b/incubator/virtualcluster/pkg/syncer/resources/endpoints/checker.go @@ -18,6 +18,7 @@ package endpoints import ( "fmt" "sync" + "sync/atomic" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -27,8 +28,12 @@ import ( "k8s.io/klog" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/conversion" + "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/metrics" ) +var numMissingEndPoints uint64 +var numMissMatchedEndPoints uint64 + // StartPeriodChecker starts the period checker for data consistency check. Checker is // blocking so should be called via a goroutine. func (c *controller) StartPeriodChecker(stopCh <-chan struct{}) error { @@ -54,7 +59,8 @@ func (c *controller) checkEndPoints() { klog.Infof("tenant masters has no clusters, give up period checker") return } - + numMissingEndPoints = 0 + numMissMatchedEndPoints = 0 wg := sync.WaitGroup{} for _, clusterName := range clusterNames { @@ -65,6 +71,8 @@ func (c *controller) checkEndPoints() { }(clusterName) } wg.Wait() + metrics.CheckerMissMatchStats.WithLabelValues("numMissingEndPoints").Set(float64(numMissingEndPoints)) + metrics.CheckerMissMatchStats.WithLabelValues("numMissMatchedEndPoints").Set(float64(numMissMatchedEndPoints)) } // checkEndPointsOfTenantCluster checks to see if endpoints controller in tenant and super master working consistently. @@ -82,6 +90,7 @@ func (c *controller) checkEndPointsOfTenantCluster(clusterName string) { if errors.IsNotFound(err) { // pEp not found and vEp still exists, report the inconsistent ep controller behavior klog.Errorf("Cannot find pEp %v/%v in super master", targetNamespace, vEp.Name) + atomic.AddUint64(&numMissingEndPoints, 1) continue } if err != nil { @@ -89,6 +98,7 @@ func (c *controller) checkEndPointsOfTenantCluster(clusterName string) { } updated := conversion.Equality(nil).CheckEndpointsEquality(pEp, &vEp) if updated != nil { + atomic.AddUint64(&numMissMatchedEndPoints, 1) klog.Warningf("Endpoint %v/%v diff in super&tenant master", targetNamespace, vEp.Name) } } diff --git a/incubator/virtualcluster/pkg/syncer/resources/endpoints/dws.go b/incubator/virtualcluster/pkg/syncer/resources/endpoints/dws.go index a236999e6..413a7e31f 100644 --- a/incubator/virtualcluster/pkg/syncer/resources/endpoints/dws.go +++ b/incubator/virtualcluster/pkg/syncer/resources/endpoints/dws.go @@ -17,9 +17,12 @@ limitations under the License. package endpoints import ( + "fmt" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/cache" "k8s.io/klog" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/constants" @@ -28,13 +31,18 @@ import ( ) func (c *controller) StartDWS(stopCh <-chan struct{}) error { - // FIXME(zhuangqh): temorary bypass endpoints dws - // sign away the right of control to super master kcm. - return nil + if !cache.WaitForCacheSync(stopCh, c.endpointsSynced) { + return fmt.Errorf("failed to wait for caches to sync") + } + return c.multiClusterEndpointsController.Start(stopCh) } // The reconcile logic for tenant master endpoints informer func (c *controller) Reconcile(request reconciler.Request) (reconciler.Result, error) { + if request.Namespace != "default" || request.Name != "kubernetes" { + // For now, we bypass all ep events beside the default kubernetes ep. The tenant/master ep controllers handle ep lifecycle independently. + return reconciler.Result{}, nil + } klog.Infof("reconcile endpoints %s/%s %s event for cluster %s", request.Namespace, request.Name, request.Event, request.Cluster.Name) switch request.Event { diff --git a/incubator/virtualcluster/pkg/syncer/resources/namespace/checker.go b/incubator/virtualcluster/pkg/syncer/resources/namespace/checker.go index 8a19fda8b..83076cd10 100644 --- a/incubator/virtualcluster/pkg/syncer/resources/namespace/checker.go +++ b/incubator/virtualcluster/pkg/syncer/resources/namespace/checker.go @@ -17,7 +17,6 @@ package namespace import ( "fmt" - "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/constants" "sync" v1 "k8s.io/api/core/v1" @@ -29,7 +28,9 @@ import ( "k8s.io/client-go/tools/cache" "k8s.io/klog" + "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/constants" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/conversion" + "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/metrics" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/reconciler" ) @@ -90,6 +91,8 @@ func (c *controller) checkNamespaces() { } if err := c.namespaceClient.Namespaces().Delete(pNamespace.Name, opts); err != nil { klog.Errorf("error deleting pNamespace %s in super master: %v", pNamespace.Name, err) + } else { + metrics.CheckerRemedyStats.WithLabelValues("numDeletedOrphanSuperMasterNamespaces").Inc() } continue } @@ -114,6 +117,8 @@ func (c *controller) checkNamespacesOfTenantCluster(clusterName string) { // pNamespace not found and vNamespace still exists, we need to create pNamespace again if err := c.multiClusterNamespaceController.RequeueObject(clusterName, &namespaceList.Items[i], reconciler.AddEvent); err != nil { klog.Errorf("error requeue vNamespace %s in cluster %s: %v", vNamespace.Name, clusterName, err) + } else { + metrics.CheckerRemedyStats.WithLabelValues("numRequeuedTenantNamespaces").Inc() } continue } diff --git a/incubator/virtualcluster/pkg/syncer/resources/persistentvolume/checker.go b/incubator/virtualcluster/pkg/syncer/resources/persistentvolume/checker.go index d9e870912..f131a666c 100644 --- a/incubator/virtualcluster/pkg/syncer/resources/persistentvolume/checker.go +++ b/incubator/virtualcluster/pkg/syncer/resources/persistentvolume/checker.go @@ -19,6 +19,7 @@ package persistentvolume import ( "fmt" "sync" + "sync/atomic" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -30,8 +31,12 @@ import ( "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/constants" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/conversion" + "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/metrics" ) +var numClaimMissMatchedPVs uint64 +var numSpecMissMatchedPVs uint64 + func (c *controller) StartPeriodChecker(stopCh <-chan struct{}) error { if !cache.WaitForCacheSync(stopCh, c.pvSynced, c.pvcSynced) { return fmt.Errorf("failed to wait for caches to sync before starting Service checker") @@ -49,6 +54,8 @@ func (c *controller) checkPVs() { return } wg := sync.WaitGroup{} + numClaimMissMatchedPVs = 0 + numSpecMissMatchedPVs = 0 for _, clusterName := range clusterNames { wg.Add(1) @@ -84,6 +91,7 @@ func (c *controller) checkPVs() { vPVObj, err := c.multiClusterPersistentVolumeController.Get(clusterName, "", pPV.Name) if err != nil { if errors.IsNotFound(err) { + metrics.CheckerRemedyStats.WithLabelValues("numRequeuedSuperMasterPVs").Inc() c.queue.Add(pPV.Name) } klog.Errorf("fail to get pv %s from cluster %s: %v", pPV.Name, clusterName, err) @@ -92,9 +100,13 @@ func (c *controller) checkPVs() { vPV := vPVObj.(*v1.PersistentVolume) if vPV.Spec.ClaimRef == nil || vPV.Spec.ClaimRef.Name != pPVC.Name || vPV.Spec.ClaimRef.Namespace != vNamespace { klog.Errorf("vPV %v from cluster %s is not bound to the correct pvc", vPV, clusterName) + numClaimMissMatchedPVs++ } } } + + metrics.CheckerMissMatchStats.WithLabelValues("numClaimMissMatchedPVs").Set(float64(numClaimMissMatchedPVs)) + metrics.CheckerMissMatchStats.WithLabelValues("numSpecMissMatchedPVs").Set(float64(numSpecMissMatchedPVs)) } func (c *controller) checkPersistentVolumeOfTenantCluster(clusterName string) { @@ -124,6 +136,8 @@ func (c *controller) checkPersistentVolumeOfTenantCluster(clusterName string) { } if err := tenantClient.CoreV1().PersistentVolumes().Delete(vPV.Name, opts); err != nil { klog.Errorf("error deleting pv %v in cluster %s: %v", vPV.Name, clusterName, err) + } else { + metrics.CheckerRemedyStats.WithLabelValues("numDeletedOrphanTenantPVs").Inc() } } else { klog.Errorf("failed to get pPV %s from super master cache: %v", vPV.Name, err) @@ -132,6 +146,7 @@ func (c *controller) checkPersistentVolumeOfTenantCluster(clusterName string) { } updatedPVSpec := conversion.Equality(nil).CheckPVSpecEquality(&pPV.Spec, &vPV.Spec) if updatedPVSpec != nil { + atomic.AddUint64(&numSpecMissMatchedPVs, 1) klog.Warningf("spec of pv %v diff in super&tenant master %s", vPV.Name, clusterName) } } diff --git a/incubator/virtualcluster/pkg/syncer/resources/persistentvolumeclaim/checker.go b/incubator/virtualcluster/pkg/syncer/resources/persistentvolumeclaim/checker.go index 7c79954a6..27618866c 100644 --- a/incubator/virtualcluster/pkg/syncer/resources/persistentvolumeclaim/checker.go +++ b/incubator/virtualcluster/pkg/syncer/resources/persistentvolumeclaim/checker.go @@ -19,6 +19,7 @@ package persistentvolumeclaim import ( "fmt" "sync" + "sync/atomic" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -29,9 +30,12 @@ import ( "k8s.io/klog" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/conversion" + "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/metrics" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/reconciler" ) +var numMissMatchedPVCs uint64 + func (c *controller) StartPeriodChecker(stopCh <-chan struct{}) error { if !cache.WaitForCacheSync(stopCh, c.pvcSynced) { return fmt.Errorf("failed to wait for caches to sync before starting Service checker") @@ -51,6 +55,7 @@ func (c *controller) checkPVCs() { } wg := sync.WaitGroup{} + numMissMatchedPVCs = 0 for _, clusterName := range clusterNames { wg.Add(1) @@ -78,10 +83,14 @@ func (c *controller) checkPVCs() { deleteOptions := metav1.NewPreconditionDeleteOptions(string(pPVC.UID)) if err = c.pvcClient.PersistentVolumeClaims(pPVC.Namespace).Delete(pPVC.Name, deleteOptions); err != nil { klog.Errorf("error deleting pPVC %s/%s in super master: %v", pPVC.Namespace, pPVC.Name, err) + } else { + metrics.CheckerRemedyStats.WithLabelValues("numDeletedOrphanSuperMasterPVCs").Inc() } continue } } + + metrics.CheckerMissMatchStats.WithLabelValues("numMissMatchedPVCs").Set(float64(numMissMatchedPVCs)) } func (c *controller) checkPVCOfTenantCluster(clusterName string) { @@ -98,6 +107,8 @@ func (c *controller) checkPVCOfTenantCluster(clusterName string) { if errors.IsNotFound(err) { if err := c.multiClusterPersistentVolumeClaimController.RequeueObject(clusterName, &pvcList.Items[i], reconciler.AddEvent); err != nil { klog.Errorf("error requeue vPVC %v/%v in cluster %s: %v", vPVC.Namespace, vPVC.Name, clusterName, err) + } else { + metrics.CheckerRemedyStats.WithLabelValues("numRequeuedTenantPVCs").Inc() } continue } @@ -114,6 +125,7 @@ func (c *controller) checkPVCOfTenantCluster(clusterName string) { } updatedPVC := conversion.Equality(spec).CheckPVCEquality(pPVC, &vPVC) if updatedPVC != nil { + atomic.AddUint64(&numMissMatchedPVCs, 1) klog.Warningf("spec of pvc %v/%v diff in super&tenant master", vPVC.Namespace, vPVC.Name) } } diff --git a/incubator/virtualcluster/pkg/syncer/resources/pod/checker.go b/incubator/virtualcluster/pkg/syncer/resources/pod/checker.go index 0b912bd09..4856934a4 100644 --- a/incubator/virtualcluster/pkg/syncer/resources/pod/checker.go +++ b/incubator/virtualcluster/pkg/syncer/resources/pod/checker.go @@ -19,6 +19,7 @@ package pod import ( "fmt" "sync" + "sync/atomic" "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" @@ -32,6 +33,7 @@ import ( "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/constants" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/conversion" + "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/metrics" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/reconciler" ) @@ -40,6 +42,10 @@ const ( minimumGracePeriodInSeconds = 30 ) +var numStatusMissMatchedPods uint64 +var numSpecMissMatchedPods uint64 +var numUWMetaMissMatchedPods uint64 + // StartPeriodChecker starts the period checker for data consistency check. Checker is // blocking so should be called via a goroutine. func (c *controller) StartPeriodChecker(stopCh <-chan struct{}) error { @@ -141,6 +147,10 @@ func (c *controller) checkPods() { wg := sync.WaitGroup{} + numStatusMissMatchedPods = 0 + numSpecMissMatchedPods = 0 + numUWMetaMissMatchedPods = 0 + for _, clusterName := range clusterNames { wg.Add(1) go func(clusterName string) { @@ -179,6 +189,8 @@ func (c *controller) checkPods() { deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(pPod.UID)) if err = c.client.Pods(pPod.Namespace).Delete(pPod.Name, deleteOptions); err != nil { klog.Errorf("error deleting pPod %v/%v in super master: %v", pPod.Namespace, pPod.Name, err) + } else { + metrics.CheckerRemedyStats.WithLabelValues("numDeletedOrphanSuperMasterPods").Inc() } } continue @@ -187,9 +199,14 @@ func (c *controller) checkPods() { // pod has been updated by super master if !equality.Semantic.DeepEqual(vPod.Status, pPod.Status) { + numStatusMissMatchedPods++ klog.Warningf("status of pod %v/%v diff in super&tenant master", pPod.Namespace, pPod.Name) } } + + metrics.CheckerMissMatchStats.WithLabelValues("numStatusMissMatchedPods").Set(float64(numStatusMissMatchedPods)) + metrics.CheckerMissMatchStats.WithLabelValues("numSpecMissMatchedPods").Set(float64(numSpecMissMatchedPods)) + metrics.CheckerMissMatchStats.WithLabelValues("numUWMetaMissMatchedPods").Set(float64(numUWMetaMissMatchedPods)) } // checkPodsOfTenantCluster checks to see if pods in specific cluster keeps consistency. @@ -226,6 +243,8 @@ func (c *controller) checkPodsOfTenantCluster(clusterName string) { // pPod not found and vPod still exists, we need to create pPod again if err := c.multiClusterPodController.RequeueObject(clusterName, &podList.Items[i], reconciler.AddEvent); err != nil { klog.Errorf("error requeue vpod %v/%v in cluster %s: %v", vPod.Namespace, vPod.Name, clusterName, err) + } else { + metrics.CheckerRemedyStats.WithLabelValues("numRequeuedTenantPods").Inc() } } continue @@ -242,11 +261,13 @@ func (c *controller) checkPodsOfTenantCluster(clusterName string) { } updatedPod := conversion.Equality(spec).CheckPodEquality(pPod, &podList.Items[i]) if updatedPod != nil { + atomic.AddUint64(&numSpecMissMatchedPods, 1) klog.Warningf("spec of pod %v/%v diff in super&tenant master", vPod.Namespace, vPod.Name) } updatedMeta := conversion.Equality(spec).CheckUWObjectMetaEquality(&pPod.ObjectMeta, &podList.Items[i].ObjectMeta) if updatedMeta != nil { + atomic.AddUint64(&numUWMetaMissMatchedPods, 1) klog.Warningf("UWObjectMeta of pod %v/%v diff in super&tenant master", vPod.Namespace, vPod.Name) } } diff --git a/incubator/virtualcluster/pkg/syncer/resources/secret/checker.go b/incubator/virtualcluster/pkg/syncer/resources/secret/checker.go index 41cf2a132..a4205c495 100644 --- a/incubator/virtualcluster/pkg/syncer/resources/secret/checker.go +++ b/incubator/virtualcluster/pkg/syncer/resources/secret/checker.go @@ -19,6 +19,7 @@ package secret import ( "fmt" "sync" + "sync/atomic" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -30,9 +31,13 @@ import ( "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/constants" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/conversion" + "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/metrics" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/reconciler" ) +var numMissMatchedOpaqueSecrets uint64 +var numMissMatchedSASecrets uint64 + func (c *controller) StartPeriodChecker(stopCh <-chan struct{}) error { if !cache.WaitForCacheSync(stopCh, c.secretSynced) { return fmt.Errorf("failed to wait for caches to sync before starting secret checker") @@ -50,11 +55,14 @@ func (c *controller) checkSecrets() { } var wg sync.WaitGroup + numMissMatchedOpaqueSecrets = 0 + numMissMatchedSASecrets = 0 + for _, clusterName := range clusterNames { wg.Add(1) go func(clusterName string) { defer wg.Done() - c.checkNormalSecretOfTenantCluster(clusterName) + c.checkSecretOfTenantCluster(clusterName) }(clusterName) } wg.Wait() @@ -104,12 +112,17 @@ func (c *controller) checkSecrets() { deleteOptions := metav1.NewPreconditionDeleteOptions(string(pSecret.UID)) if err := c.secretClient.Secrets(pSecret.Namespace).Delete(pSecret.Name, deleteOptions); err != nil { klog.Errorf("error deleting pSecret %s/%s in super master: %v", pSecret.Namespace, pSecret.Name, err) + } else { + metrics.CheckerRemedyStats.WithLabelValues("numDeletedOrphanSuperMasterSecrets").Inc() } } } + + metrics.CheckerMissMatchStats.WithLabelValues("numMissMatchedOpaqueSecrets").Set(float64(numMissMatchedOpaqueSecrets)) + metrics.CheckerMissMatchStats.WithLabelValues("numMissMatchedSASecrets").Set(float64(numMissMatchedSASecrets)) } -func (c *controller) checkNormalSecretOfTenantCluster(clusterName string) { +func (c *controller) checkSecretOfTenantCluster(clusterName string) { listObj, err := c.multiClusterSecretController.List(clusterName) if err != nil { klog.Errorf("error listing secrets from cluster %s informer cache: %v", clusterName, err) @@ -129,6 +142,8 @@ func (c *controller) checkNormalSecretOfTenantCluster(clusterName string) { if errors.IsNotFound(err) { if err := c.multiClusterSecretController.RequeueObject(clusterName, &secretList.Items[i], reconciler.AddEvent); err != nil { klog.Errorf("error requeue vSecret %v/%v in cluster %s: %v", vSecret.Namespace, vSecret.Name, clusterName, err) + } else { + metrics.CheckerRemedyStats.WithLabelValues("numRequeuedTenantOpaqueSecrets").Inc() } continue } @@ -146,6 +161,7 @@ func (c *controller) checkNormalSecretOfTenantCluster(clusterName string) { updatedSecret := conversion.Equality(spec).CheckSecretEquality(pSecret, &secretList.Items[i]) if updatedSecret != nil { + atomic.AddUint64(&numMissMatchedOpaqueSecrets, 1) klog.Warningf("spec of secret %v/%v diff in super&tenant master", vSecret.Namespace, vSecret.Name) } } @@ -158,6 +174,8 @@ func (c *controller) checkServiceAccountTokenTypeSecretOfTenantCluster(clusterNa if errors.IsNotFound(err) || len(secretList) == 0 { if err := c.multiClusterSecretController.RequeueObject(clusterName, vSecret, reconciler.AddEvent); err != nil { klog.Errorf("error requeue service account type vSecret %v/%v in cluster %s: %v", vSecret.Namespace, vSecret.Name, clusterName, err) + } else { + metrics.CheckerRemedyStats.WithLabelValues("numRequeuedTenantSASecrets").Inc() } return } @@ -180,6 +198,7 @@ func (c *controller) checkServiceAccountTokenTypeSecretOfTenantCluster(clusterNa updatedSecret := conversion.Equality(spec).CheckSecretEquality(secretList[0], vSecret) if updatedSecret != nil { + atomic.AddUint64(&numMissMatchedSASecrets, 1) klog.Warningf("spec of service account token type secret %v/%v diff in super&tenant master", vSecret.Namespace, vSecret.Name) } } diff --git a/incubator/virtualcluster/pkg/syncer/resources/service/checker.go b/incubator/virtualcluster/pkg/syncer/resources/service/checker.go index 42c79da07..327f4f505 100644 --- a/incubator/virtualcluster/pkg/syncer/resources/service/checker.go +++ b/incubator/virtualcluster/pkg/syncer/resources/service/checker.go @@ -19,6 +19,7 @@ package service import ( "fmt" "sync" + "sync/atomic" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -29,9 +30,12 @@ import ( "k8s.io/klog" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/conversion" + "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/metrics" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/reconciler" ) +var numMissMatchedServices uint64 + func (c *controller) StartPeriodChecker(stopCh <-chan struct{}) error { if !cache.WaitForCacheSync(stopCh, c.serviceSynced) { return fmt.Errorf("failed to wait for caches to sync before starting Service checker") @@ -51,6 +55,7 @@ func (c *controller) checkServices() { } wg := sync.WaitGroup{} + numMissMatchedServices = 0 for _, clusterName := range clusterNames { wg.Add(1) @@ -78,10 +83,14 @@ func (c *controller) checkServices() { deleteOptions := metav1.NewPreconditionDeleteOptions(string(pService.UID)) if err = c.serviceClient.Services(pService.Namespace).Delete(pService.Name, deleteOptions); err != nil { klog.Errorf("error deleting pService %s/%s in super master: %v", pService.Namespace, pService.Name, err) + } else { + metrics.CheckerRemedyStats.WithLabelValues("numDeletedOrphanSuperMasterServices").Inc() } continue } } + + metrics.CheckerMissMatchStats.WithLabelValues("numMissMatchedServices").Set(float64(numMissMatchedServices)) } func (c *controller) checkServicesOfTenantCluster(clusterName string) { @@ -98,6 +107,8 @@ func (c *controller) checkServicesOfTenantCluster(clusterName string) { if errors.IsNotFound(err) { if err := c.multiClusterServiceController.RequeueObject(clusterName, &svcList.Items[i], reconciler.AddEvent); err != nil { klog.Errorf("error requeue vservice %v/%v in cluster %s: %v", vService.Namespace, vService.Name, clusterName, err) + } else { + metrics.CheckerRemedyStats.WithLabelValues("numRequeuedTenantServices").Inc() } continue } @@ -113,6 +124,7 @@ func (c *controller) checkServicesOfTenantCluster(clusterName string) { } updatedService := conversion.Equality(spec).CheckServiceEquality(pService, &svcList.Items[i]) if updatedService != nil { + atomic.AddUint64(&numMissMatchedServices, 1) klog.Warningf("spec of service %v/%v diff in super&tenant master", vService.Namespace, vService.Name) } } diff --git a/incubator/virtualcluster/pkg/syncer/resources/serviceaccount/checker.go b/incubator/virtualcluster/pkg/syncer/resources/serviceaccount/checker.go index 73c4f7de9..9edb539ed 100644 --- a/incubator/virtualcluster/pkg/syncer/resources/serviceaccount/checker.go +++ b/incubator/virtualcluster/pkg/syncer/resources/serviceaccount/checker.go @@ -29,6 +29,7 @@ import ( "k8s.io/klog" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/conversion" + "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/metrics" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/reconciler" ) @@ -88,6 +89,8 @@ func (c *controller) checkServiceAccounts() { deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(pSa.UID)) if err = c.saClient.ServiceAccounts(pSa.Namespace).Delete(pSa.Name, deleteOptions); err != nil { klog.Errorf("error deleting pServiceAccount %v/%v in super master: %v", pSa.Namespace, pSa.Name, err) + } else { + metrics.CheckerRemedyStats.WithLabelValues("numDeletedOrphanSuperMasterServiceAccounts").Inc() } continue } @@ -112,6 +115,8 @@ func (c *controller) checkServiceAccountsOfTenantCluster(clusterName string) { // pSa not found and vSa still exists, we need to create pSa again if err := c.multiClusterServiceAccountController.RequeueObject(clusterName, &saList.Items[i], reconciler.AddEvent); err != nil { klog.Errorf("error requeue vServiceAccount %v/%v in cluster %s: %v", vSa.Namespace, vSa.Name, clusterName, err) + } else { + metrics.CheckerRemedyStats.WithLabelValues("numRequeuedTenantServiceAccounts").Inc() } continue } diff --git a/incubator/virtualcluster/pkg/syncer/resources/storageclass/checker.go b/incubator/virtualcluster/pkg/syncer/resources/storageclass/checker.go index d853e9add..d30106de6 100644 --- a/incubator/virtualcluster/pkg/syncer/resources/storageclass/checker.go +++ b/incubator/virtualcluster/pkg/syncer/resources/storageclass/checker.go @@ -19,6 +19,7 @@ package storageclass import ( "fmt" "sync" + "sync/atomic" v1 "k8s.io/api/storage/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -30,9 +31,12 @@ import ( "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/constants" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/conversion" + "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/metrics" "github.com/kubernetes-sigs/multi-tenancy/incubator/virtualcluster/pkg/syncer/reconciler" ) +var numMissMatchedStorageClasses uint64 + func (c *controller) StartPeriodChecker(stopCh <-chan struct{}) error { if !cache.WaitForCacheSync(stopCh, c.storageclassSynced) { return fmt.Errorf("failed to wait for caches to sync before starting Service checker") @@ -51,6 +55,7 @@ func (c *controller) checkStorageClass() { } wg := sync.WaitGroup{} + numMissMatchedStorageClasses = 0 for _, clusterName := range clusterNames { wg.Add(1) @@ -75,12 +80,15 @@ func (c *controller) checkStorageClass() { _, err := c.multiClusterStorageClassController.Get(clusterName, "", pStorageClass.Name) if err != nil { if errors.IsNotFound(err) { + metrics.CheckerRemedyStats.WithLabelValues("numRequeuedSuperMasterStorageClasses").Inc() c.queue.Add(reconciler.UwsRequest{Key: pStorageClass.Name, ClusterName: clusterName}) } klog.Errorf("fail to get storageclass from cluster %s: %v", clusterName, err) } } } + + metrics.CheckerMissMatchStats.WithLabelValues("numMissMatchedStorageClasses").Set(float64(numMissMatchedStorageClasses)) } func (c *controller) checkStorageClassOfTenantCluster(clusterName string) { @@ -105,6 +113,8 @@ func (c *controller) checkStorageClassOfTenantCluster(clusterName string) { } if err := tenantClient.StorageV1().StorageClasses().Delete(vStorageClass.Name, opts); err != nil { klog.Errorf("error deleting storageclass %v in cluster %s: %v", vStorageClass.Name, clusterName, err) + } else { + metrics.CheckerRemedyStats.WithLabelValues("numDeletedOrphanTenantStorageClasses").Inc() } continue } @@ -116,6 +126,7 @@ func (c *controller) checkStorageClassOfTenantCluster(clusterName string) { updatedStorageClass := conversion.Equality(nil).CheckStorageClassEquality(pStorageClass, &scList.Items[i]) if updatedStorageClass != nil { + atomic.AddUint64(&numMissMatchedStorageClasses, 1) klog.Warningf("spec of storageClass %v diff in super&tenant master", vStorageClass.Name) } }