From 758448f94d05aca4c3ff66cff8a6fa0eb0d50d37 Mon Sep 17 00:00:00 2001 From: mittachaitu Date: Sat, 28 Mar 2020 13:26:23 +0530 Subject: [PATCH 1/4] feat(pendingsnapshots): add pending snapshots on CVR by talking to peer replicas Signed-off-by: mittachaitu --- .../controller/replica-controller/handler.go | 6 +- .../volumereplica/volumereplica.go | 130 ++++++++++++++++-- 2 files changed, 119 insertions(+), 17 deletions(-) diff --git a/cmd/cstor-pool-mgmt/controller/replica-controller/handler.go b/cmd/cstor-pool-mgmt/controller/replica-controller/handler.go index 6b09336987..9ff84944f9 100644 --- a/cmd/cstor-pool-mgmt/controller/replica-controller/handler.go +++ b/cmd/cstor-pool-mgmt/controller/replica-controller/handler.go @@ -629,27 +629,28 @@ func (c *CStorVolumeReplicaController) syncCvr(cvr *apis.CStorVolumeReplica) { // Get the zfs volume name corresponding to this cvr. volumeName, err := volumereplica.GetVolumeName(cvr) if err != nil { - klog.Errorf("Unable to sync CVR capacity: %v", err) c.recorder.Event( cvr, corev1.EventTypeWarning, string(common.FailureCapacitySync), string(common.MessageResourceFailCapacitySync), ) + return } // Get capacity of the volume. capacity, err := volumereplica.Capacity(volumeName) if err != nil { - klog.Errorf("Unable to sync CVR capacity: %v", err) c.recorder.Event( cvr, corev1.EventTypeWarning, string(common.FailureCapacitySync), string(common.MessageResourceFailCapacitySync), ) + return } else { cvr.Status.Capacity = *capacity } + if os.Getenv(string(common.RebuildEstimates)) == "true" { err = volumereplica.GetAndUpdateSnapshotInfo(c.clientset, cvr) if err != nil { @@ -660,6 +661,7 @@ func (c *CStorVolumeReplicaController) syncCvr(cvr *apis.CStorVolumeReplica) { fmt.Sprintf("Unable to update snapshot list ddetails in cvr status err: %v", err), ) } + } } diff --git a/cmd/cstor-pool-mgmt/volumereplica/volumereplica.go b/cmd/cstor-pool-mgmt/volumereplica/volumereplica.go index 1576c76ec8..b51dd10466 100644 --- a/cmd/cstor-pool-mgmt/volumereplica/volumereplica.go +++ b/cmd/cstor-pool-mgmt/volumereplica/volumereplica.go @@ -34,6 +34,7 @@ import ( "github.com/openebs/maya/pkg/util" zfs "github.com/openebs/maya/pkg/zfs/cmd/v1alpha1" "github.com/pkg/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog" ) @@ -699,24 +700,123 @@ func GetAndUpdateSnapshotInfo( } // If CVR is in rebuilding go and get snapshot information - // from other replicas - //if cvr.Status.Phase == apis.CVRStatusRebuilding { - // err = getAndUpdatePendingSnapshotsList(clientset, cvr) - //} + // from other replicas and add snapshots under pending snapshot list + if cvr.Status.Phase == apis.CVRStatusRebuilding || + cvr.Status.Phase == apis.CVRStatusReconstructingNewReplica { + err = getAndAddPendingSnapshotList(clientset, cvr) + if err != nil { + return errors.Wrapf(err, "failed to update pending snapshots") + } + } + + // It looks like hack but we must do this because of below reason + // - There might be chances of in nth reconciliation CVR might be degraded + // and nth+1 reconciliation CVR might be Healthy(by completing rebuilding) + // because we getting CVR status from zrepl. + if cvr.Status.Phase == apis.CVRStatusOnline && + len(cvr.Status.PendingSnapshots) != 0 { + cvr.Status.PendingSnapshots = nil + } return nil } -//func getAndUpdatePendingSnapshotsList( -// clientset clientset.Interface, cvr *apis.CStorVolumeReplica) error { -// peerCVRList, err := getPeerReplicas(clientset, cvr) -// if err != nil { -// return errors.Wrapf(err, "failed to get peer CVRs info of volume %s", volName) -// } -// TODO: Perform following steps -// 1. Get the snapshot info from peer replicas -// 2. If snapshot info doesn't exist under CVR.Status.Snapshots then -// add it under CVR.Status.PendingSnapshots -//} +// getAndAddPendingSnapshotList get the snapshot information from peer replicas and +// add under pending snapshot list +// NOTE: Below function will delete the snapshot under pending snapshots if doesn't exists +// on other replicas +func getAndAddPendingSnapshotList( + clientset clientset.Interface, cvr *apis.CStorVolumeReplica) error { + peerCVRList, err := getPeerReplicas(clientset, cvr) + if err != nil { + return errors.Wrapf(err, "failed to get peer CVRs of volume replica %s", cvr.Name) + } + + peerSnapshotList := getPeerSnapshotInfoList(peerCVRList) + if cvr.Status.PendingSnapshots == nil { + cvr.Status.PendingSnapshots = map[string]apis.CStorSnapshotInfo{} + } + + // Delete the pending snapshot that doesn't exist in peer snapshot list + for snapName, _ := range cvr.Status.PendingSnapshots { + if _, ok := peerSnapshotList[snapName]; !ok { + delete(cvr.Status.PendingSnapshots, snapName) + } + } + + // Add peer snapshots if doesn't exist on Snapshots and PendingSnapshots + // of current CVR + for snapName, snapInfo := range peerSnapshotList { + if _, ok := cvr.Status.Snapshots[snapName]; !ok { + if _, ok := cvr.Status.PendingSnapshots[snapName]; !ok { + cvr.Status.PendingSnapshots[snapName] = snapInfo + } + } + } + + return nil +} + +// getPeerReplicas returns list of peer replicas of volume +func getPeerReplicas( + clientset clientset.Interface, + cvr *apis.CStorVolumeReplica) (*apis.CStorVolumeReplicaList, error) { + volName := cvr.GetLabels()[string(apis.PersistentVolumeCPK)] + peerCVRList := &apis.CStorVolumeReplicaList{} + cvrList, err := clientset.OpenebsV1alpha1(). + CStorVolumeReplicas(cvr.Namespace). + List(metav1.ListOptions{LabelSelector: string(apis.PersistentVolumeCPK) + "=" + volName}) + if err != nil { + return nil, err + } + for _, obj := range cvrList.Items { + if obj.Name != cvr.Name { + peerCVRList.Items = append(peerCVRList.Items, obj) + } + } + return peerCVRList, nil +} + +// getPeerSnapshotInfoList returns the map of snapshot name and snapshot info +// If any healthy replica exist in peer replica it will return Status.Snapshots +// else iterate over all the degraded replicas and get snapshot list +func getPeerSnapshotInfoList( + peerCVRList *apis.CStorVolumeReplicaList) map[string]apis.CStorSnapshotInfo { + + // NOTE: There are possibilites to have stale phase + healthyReplica := getHealthyReplicaFromPeerReplicas(peerCVRList) + if healthyReplica != nil { + // Since Updating the status of replica and snapshot list is atomic + // update safe to return status.snapshots + return healthyReplica.Status.Snapshots + } + + snapshotInfoList := map[string]apis.CStorSnapshotInfo{} + for _, cvrObj := range peerCVRList.Items { + // No need to get snapshot information from Offline, + // NewReplicaDegraded, Recreate becasue they might + // consist stale information + if cvrObj.Status.Phase == apis.CVRStatusDegraded { + for snapName, snapInfo := range cvrObj.Status.Snapshots { + if _, ok := snapshotInfoList[snapName]; !ok { + snapshotInfoList[snapName] = snapInfo + } + } + } + } + return snapshotInfoList +} + +// getHealthyReplicaFromPeerReplicas returns healthy replica from the +// peer replica list if exists else it return nil +func getHealthyReplicaFromPeerReplicas( + peerCVRList *apis.CStorVolumeReplicaList) *apis.CStorVolumeReplica { + for _, cvrObj := range peerCVRList.Items { + if cvrObj.Status.Phase == apis.CVRStatusOnline { + return &cvrObj + } + } + return nil +} // addOrDeleteSnapshotListInfo adds/deletes the snapshots in CVR // It performs below steps: From 1578634290a09a79823f22652c1e9e952bb63bcb Mon Sep 17 00:00:00 2001 From: mittachaitu Date: Tue, 31 Mar 2020 18:17:34 +0530 Subject: [PATCH 2/4] This commit handels the error message Signed-off-by: mittachaitu --- .../controller/replica-controller/handler.go | 38 ++++++++----------- .../volumereplica/volumereplica.go | 14 ++++--- 2 files changed, 24 insertions(+), 28 deletions(-) diff --git a/cmd/cstor-pool-mgmt/controller/replica-controller/handler.go b/cmd/cstor-pool-mgmt/controller/replica-controller/handler.go index 9ff84944f9..cf1215977f 100644 --- a/cmd/cstor-pool-mgmt/controller/replica-controller/handler.go +++ b/cmd/cstor-pool-mgmt/controller/replica-controller/handler.go @@ -168,7 +168,16 @@ func (c *CStorVolumeReplicaController) syncHandler( // Synchronize cstor volume total allocated and // used capacity fields on CVR object. // Any kind of sync activity should be done from here. - c.syncCvr(cvrGot) + err = c.syncCvr(cvrGot) + if err != nil { + c.recorder.Event( + cvrGot, + corev1.EventTypeWarning, + "SyncFailed", + fmt.Sprintf("failed to sync CVR error: %s", err.Error()), + ) + return nil + } _, err = c.clientset. OpenebsV1alpha1(). @@ -625,28 +634,16 @@ func (c *CStorVolumeReplicaController) getCVRStatus( } // syncCvr updates field on CVR object after fetching the values from zfs utility. -func (c *CStorVolumeReplicaController) syncCvr(cvr *apis.CStorVolumeReplica) { +func (c *CStorVolumeReplicaController) syncCvr(cvr *apis.CStorVolumeReplica) error { // Get the zfs volume name corresponding to this cvr. volumeName, err := volumereplica.GetVolumeName(cvr) if err != nil { - c.recorder.Event( - cvr, - corev1.EventTypeWarning, - string(common.FailureCapacitySync), - string(common.MessageResourceFailCapacitySync), - ) - return + return err } // Get capacity of the volume. capacity, err := volumereplica.Capacity(volumeName) if err != nil { - c.recorder.Event( - cvr, - corev1.EventTypeWarning, - string(common.FailureCapacitySync), - string(common.MessageResourceFailCapacitySync), - ) - return + return errors.Wrapf(err, "failed to get volume replica capacity") } else { cvr.Status.Capacity = *capacity } @@ -654,15 +651,10 @@ func (c *CStorVolumeReplicaController) syncCvr(cvr *apis.CStorVolumeReplica) { if os.Getenv(string(common.RebuildEstimates)) == "true" { err = volumereplica.GetAndUpdateSnapshotInfo(c.clientset, cvr) if err != nil { - c.recorder.Event( - cvr, - corev1.EventTypeWarning, - "SnapshotList", - fmt.Sprintf("Unable to update snapshot list ddetails in cvr status err: %v", err), - ) + return errors.Wrapf(err, "Unable to update snapshot list details in CVR status err: %v", err) } - } + return nil } func (c *CStorVolumeReplicaController) reconcileVersion(cvr *apis.CStorVolumeReplica) ( diff --git a/cmd/cstor-pool-mgmt/volumereplica/volumereplica.go b/cmd/cstor-pool-mgmt/volumereplica/volumereplica.go index b51dd10466..d4593d49a4 100644 --- a/cmd/cstor-pool-mgmt/volumereplica/volumereplica.go +++ b/cmd/cstor-pool-mgmt/volumereplica/volumereplica.go @@ -710,9 +710,12 @@ func GetAndUpdateSnapshotInfo( } // It looks like hack but we must do this because of below reason - // - There might be chances of in nth reconciliation CVR might be degraded - // and nth+1 reconciliation CVR might be Healthy(by completing rebuilding) - // because we getting CVR status from zrepl. + // - There might be chances in nth reconciliation CVR might be in Rebuilding + // and pending snapshots added under CVR.Status.PendingSnapshots and after that + // let us assume this pool is down meanwhile if snapshot deletion request + // came and deleted snapshots in peer replicas. In next reconciliation if + // CVR is Healthy then there might be chances that pending snapshots remains + // as is to cover this corner case below check is required. if cvr.Status.Phase == apis.CVRStatusOnline && len(cvr.Status.PendingSnapshots) != 0 { cvr.Status.PendingSnapshots = nil @@ -723,7 +726,7 @@ func GetAndUpdateSnapshotInfo( // getAndAddPendingSnapshotList get the snapshot information from peer replicas and // add under pending snapshot list // NOTE: Below function will delete the snapshot under pending snapshots if doesn't exists -// on other replicas +// on peer replicas func getAndAddPendingSnapshotList( clientset clientset.Interface, cvr *apis.CStorVolumeReplica) error { peerCVRList, err := getPeerReplicas(clientset, cvr) @@ -782,6 +785,7 @@ func getPeerReplicas( func getPeerSnapshotInfoList( peerCVRList *apis.CStorVolumeReplicaList) map[string]apis.CStorSnapshotInfo { + // TODO: Get Opinion From Review Comments // NOTE: There are possibilites to have stale phase healthyReplica := getHealthyReplicaFromPeerReplicas(peerCVRList) if healthyReplica != nil { @@ -793,7 +797,7 @@ func getPeerSnapshotInfoList( snapshotInfoList := map[string]apis.CStorSnapshotInfo{} for _, cvrObj := range peerCVRList.Items { // No need to get snapshot information from Offline, - // NewReplicaDegraded, Recreate becasue they might + // NewReplicaDegraded and Recreate CVR becasue they might // consist stale information if cvrObj.Status.Phase == apis.CVRStatusDegraded { for snapName, snapInfo := range cvrObj.Status.Snapshots { From 33cd4411c9fe80d0d3756aa91dd3e30c7ab80d62 Mon Sep 17 00:00:00 2001 From: mittachaitu Date: Tue, 31 Mar 2020 23:28:50 +0530 Subject: [PATCH 3/4] This commit address the review comments Signed-off-by: mittachaitu --- .../controller/replica-controller/handler.go | 2 +- .../volumereplica/volumereplica.go | 32 ++----------------- 2 files changed, 4 insertions(+), 30 deletions(-) diff --git a/cmd/cstor-pool-mgmt/controller/replica-controller/handler.go b/cmd/cstor-pool-mgmt/controller/replica-controller/handler.go index cf1215977f..0fa005a97a 100644 --- a/cmd/cstor-pool-mgmt/controller/replica-controller/handler.go +++ b/cmd/cstor-pool-mgmt/controller/replica-controller/handler.go @@ -651,7 +651,7 @@ func (c *CStorVolumeReplicaController) syncCvr(cvr *apis.CStorVolumeReplica) err if os.Getenv(string(common.RebuildEstimates)) == "true" { err = volumereplica.GetAndUpdateSnapshotInfo(c.clientset, cvr) if err != nil { - return errors.Wrapf(err, "Unable to update snapshot list details in CVR status err: %v", err) + return errors.Wrapf(err, "Unable to update snapshot list details in CVR") } } return nil diff --git a/cmd/cstor-pool-mgmt/volumereplica/volumereplica.go b/cmd/cstor-pool-mgmt/volumereplica/volumereplica.go index d4593d49a4..e7e6fd448a 100644 --- a/cmd/cstor-pool-mgmt/volumereplica/volumereplica.go +++ b/cmd/cstor-pool-mgmt/volumereplica/volumereplica.go @@ -785,43 +785,17 @@ func getPeerReplicas( func getPeerSnapshotInfoList( peerCVRList *apis.CStorVolumeReplicaList) map[string]apis.CStorSnapshotInfo { - // TODO: Get Opinion From Review Comments - // NOTE: There are possibilites to have stale phase - healthyReplica := getHealthyReplicaFromPeerReplicas(peerCVRList) - if healthyReplica != nil { - // Since Updating the status of replica and snapshot list is atomic - // update safe to return status.snapshots - return healthyReplica.Status.Snapshots - } - snapshotInfoList := map[string]apis.CStorSnapshotInfo{} for _, cvrObj := range peerCVRList.Items { - // No need to get snapshot information from Offline, - // NewReplicaDegraded and Recreate CVR becasue they might - // consist stale information - if cvrObj.Status.Phase == apis.CVRStatusDegraded { - for snapName, snapInfo := range cvrObj.Status.Snapshots { - if _, ok := snapshotInfoList[snapName]; !ok { - snapshotInfoList[snapName] = snapInfo - } + for snapName, snapInfo := range cvrObj.Status.Snapshots { + if _, ok := snapshotInfoList[snapName]; !ok { + snapshotInfoList[snapName] = snapInfo } } } return snapshotInfoList } -// getHealthyReplicaFromPeerReplicas returns healthy replica from the -// peer replica list if exists else it return nil -func getHealthyReplicaFromPeerReplicas( - peerCVRList *apis.CStorVolumeReplicaList) *apis.CStorVolumeReplica { - for _, cvrObj := range peerCVRList.Items { - if cvrObj.Status.Phase == apis.CVRStatusOnline { - return &cvrObj - } - } - return nil -} - // addOrDeleteSnapshotListInfo adds/deletes the snapshots in CVR // It performs below steps: // 1. Add snapshot if it doesn't exist in CVR but exist on ZFS. From 37f5427cd8cf2845a657a7fc74b0334f4731c278 Mon Sep 17 00:00:00 2001 From: mittachaitu Date: Wed, 1 Apr 2020 11:44:47 +0530 Subject: [PATCH 4/4] This commit renames the function name and add required logs Signed-off-by: mittachaitu --- .../controller/replica-controller/handler.go | 6 ++--- .../volumereplica/volumereplica.go | 24 +++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/cmd/cstor-pool-mgmt/controller/replica-controller/handler.go b/cmd/cstor-pool-mgmt/controller/replica-controller/handler.go index 0fa005a97a..0b802c224d 100644 --- a/cmd/cstor-pool-mgmt/controller/replica-controller/handler.go +++ b/cmd/cstor-pool-mgmt/controller/replica-controller/handler.go @@ -168,7 +168,7 @@ func (c *CStorVolumeReplicaController) syncHandler( // Synchronize cstor volume total allocated and // used capacity fields on CVR object. // Any kind of sync activity should be done from here. - err = c.syncCvr(cvrGot) + err = c.syncCVRStatus(cvrGot) if err != nil { c.recorder.Event( cvrGot, @@ -633,8 +633,8 @@ func (c *CStorVolumeReplicaController) getCVRStatus( return replicaStatus, nil } -// syncCvr updates field on CVR object after fetching the values from zfs utility. -func (c *CStorVolumeReplicaController) syncCvr(cvr *apis.CStorVolumeReplica) error { +// syncCVRStatus updates field on CVR status after fetching the values from zfs utility. +func (c *CStorVolumeReplicaController) syncCVRStatus(cvr *apis.CStorVolumeReplica) error { // Get the zfs volume name corresponding to this cvr. volumeName, err := volumereplica.GetVolumeName(cvr) if err != nil { diff --git a/cmd/cstor-pool-mgmt/volumereplica/volumereplica.go b/cmd/cstor-pool-mgmt/volumereplica/volumereplica.go index e7e6fd448a..5203ed760d 100644 --- a/cmd/cstor-pool-mgmt/volumereplica/volumereplica.go +++ b/cmd/cstor-pool-mgmt/volumereplica/volumereplica.go @@ -718,6 +718,11 @@ func GetAndUpdateSnapshotInfo( // as is to cover this corner case below check is required. if cvr.Status.Phase == apis.CVRStatusOnline && len(cvr.Status.PendingSnapshots) != 0 { + klog.Infof("CVR: %s is marked as %s hence removing pending snapshots %v", + cvr.Name, + cvr.Status.Phase, + getSnapshotNames(cvr.Status.PendingSnapshots), + ) cvr.Status.PendingSnapshots = nil } return nil @@ -729,6 +734,9 @@ func GetAndUpdateSnapshotInfo( // on peer replicas func getAndAddPendingSnapshotList( clientset clientset.Interface, cvr *apis.CStorVolumeReplica) error { + newSnapshots := []string{} + removedSnapshots := []string{} + peerCVRList, err := getPeerReplicas(clientset, cvr) if err != nil { return errors.Wrapf(err, "failed to get peer CVRs of volume replica %s", cvr.Name) @@ -743,6 +751,7 @@ func getAndAddPendingSnapshotList( for snapName, _ := range cvr.Status.PendingSnapshots { if _, ok := peerSnapshotList[snapName]; !ok { delete(cvr.Status.PendingSnapshots, snapName) + removedSnapshots = append(removedSnapshots, snapName) } } @@ -752,10 +761,16 @@ func getAndAddPendingSnapshotList( if _, ok := cvr.Status.Snapshots[snapName]; !ok { if _, ok := cvr.Status.PendingSnapshots[snapName]; !ok { cvr.Status.PendingSnapshots[snapName] = snapInfo + newSnapshots = append(newSnapshots, snapName) } } } + klog.Infof( + "Adding %v pending snapshots and deleting %v pending snapshots on CVR %s", + newSnapshots, + removedSnapshots, + cvr.Name) return nil } @@ -796,6 +811,15 @@ func getPeerSnapshotInfoList( return snapshotInfoList } +// getSnapshotNames returns snapshot names from map of snapshot and snapshot info +func getSnapshotNames(snapMap map[string]apis.CStorSnapshotInfo) []string { + snapNameList := make([]string, len(snapMap)) + for snapName, _ := range snapMap { + snapNameList = append(snapNameList, snapName) + } + return snapNameList +} + // addOrDeleteSnapshotListInfo adds/deletes the snapshots in CVR // It performs below steps: // 1. Add snapshot if it doesn't exist in CVR but exist on ZFS.