Skip to content

Commit

Permalink
fixes as result of e2e testing and addressing technical dept
Browse files Browse the repository at this point in the history
- add 'create/watch' role access to the VRG reconciler
- add 'observedGeneration' to drpc status conditions
- allow 'omitemtpy' for drpc phase and drpc conditions
- fix relocation not waiting for secondaries before restoring PVs
- fix iterating though the s3ProfileName and fail it only if there are no s3Profile to restore from
  • Loading branch information
BenamarMk authored and ShyamsundarR committed Aug 16, 2021
1 parent 733ed24 commit 0f62093
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 36 deletions.
4 changes: 2 additions & 2 deletions api/v1alpha1/drplacementcontrol_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,9 @@ type VRGConditions struct {

// DRPlacementControlStatus defines the observed state of DRPlacementControl
type DRPlacementControlStatus struct {
Phase DRState `json:"phase"`
Phase DRState `json:"phase,omitempty"`
PreferredDecision plrv1.PlacementDecision `json:"preferredDecision,omitempty"`
Conditions []metav1.Condition `json:"conditions"`
Conditions []metav1.Condition `json:"conditions,omitempty"`
ResourceConditions VRGConditions `json:"resourceConditions,omitempty"`
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,9 +359,6 @@ spec:
- namespace
type: object
type: object
required:
- conditions
- phase
type: object
type: object
served: true
Expand Down
3 changes: 2 additions & 1 deletion config/dr_cluster/rbac/role.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
Expand Down Expand Up @@ -33,6 +32,7 @@ rules:
resources:
- persistentvolumes
verbs:
- create
- get
- list
- patch
Expand Down Expand Up @@ -93,3 +93,4 @@ rules:
verbs:
- get
- list
- watch
2 changes: 2 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ rules:
resources:
- persistentvolumes
verbs:
- create
- get
- list
- patch
Expand Down Expand Up @@ -163,6 +164,7 @@ rules:
verbs:
- get
- list
- watch
- apiGroups:
- view.open-cluster-management.io
resources:
Expand Down
54 changes: 35 additions & 19 deletions controllers/drplacementcontrol_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,9 @@ func (r *DRPlacementControlReconciler) finalizeDRPC(ctx context.Context, drpc *r
if preferredCluster == "" {
clonedPlRule, err := r.getClonedPlacementRule(ctx, clonedPlRuleName, drpc.Namespace)
if err != nil {
return fmt.Errorf("couldn't determine the preferred cluster name (%w)", err)
r.Log.Info("Cloned placement rule not found")

return nil
}

if len(clonedPlRule.Status.Decisions) != 0 {
Expand All @@ -488,8 +490,12 @@ func (r *DRPlacementControlReconciler) finalizeDRPC(ctx context.Context, drpc *r
}
}

// delete cloned placementrule
return r.deleteClonedPlacementRule(ctx, clonedPlRuleName, drpc.Namespace)
// delete cloned placementrule, if created
if drpc.Spec.PreferredCluster == "" {
return r.deleteClonedPlacementRule(ctx, clonedPlRuleName, drpc.Namespace)
}

return nil
}

func (r *DRPlacementControlReconciler) getPlacementRules(ctx context.Context,
Expand Down Expand Up @@ -1404,19 +1410,24 @@ func (d *DRPCInstance) ensureCleanup(clusterToSkip string) error {
d.log.Info("ensure cleanup on secondaries")

idx, condition := GetDRPCCondition(&d.instance.Status, rmn.ConditionReconciling)
d.log.Info(fmt.Sprintf("Condition %v", condition))

if idx == -1 {
return fmt.Errorf("condition not found. %v", d.instance.Status.Conditions)
d.log.Info("Generating new condition")
condition = d.newCondition(rmn.ConditionReconciling)
d.needStatusUpdate = true
}

if condition.Reason == rmn.ReasonSuccess && condition.Status == metav1.ConditionTrue {
d.log.Info(fmt.Sprintf("Condition %v", condition))

if condition.Reason == rmn.ReasonSuccess &&
condition.Status == metav1.ConditionTrue &&
condition.ObservedGeneration == d.instance.Generation {
d.log.Info("Condition values tallied, cleaup is considered complete")

return nil
}

if condition.Reason == rmn.ReasonProgressing {
d.updateCondition(condition, idx, rmn.ReasonCleaning, metav1.ConditionFalse)
}
d.updateCondition(condition, idx, rmn.ReasonCleaning, metav1.ConditionFalse)

clean, err := d.cleanup(clusterToSkip)
if err != nil {
Expand All @@ -1436,6 +1447,7 @@ func (d *DRPCInstance) updateCondition(condition *metav1.Condition, idx int, rea
status metav1.ConditionStatus) {
condition.Reason = reason
condition.Status = status
condition.ObservedGeneration = d.instance.Generation
d.instance.Status.Conditions[idx] = *condition
d.needStatusUpdate = true
}
Expand Down Expand Up @@ -1892,17 +1904,10 @@ func (d *DRPCInstance) newCondition(condType string) *metav1.Condition {
LastTransitionTime: metav1.Now(),
Reason: d.getConditionReason(condType),
Message: d.getConditionMessage(condType),
ObservedGeneration: d.instance.Generation,
}
}

// func (d *DRPCInstance) getConditionType() string {
// if d.instance.Status.Phase == "" {
// return rmn.ConditionNotAvailable
// }

// return rmn.ConditionAvailable
// }

func (d *DRPCInstance) getConditionStatus(condType string) metav1.ConditionStatus {
if condType == rmn.ConditionAvailable {
return d.getConditionStatusForTypeAvailable()
Expand Down Expand Up @@ -1942,14 +1947,25 @@ func (d *DRPCInstance) getConditionStatusForTypeReconciling() metav1.ConditionSt
}

func (d *DRPCInstance) reconciling() bool {
// Deployed phase requires no cleanup and further reconcile for cleanup
if d.instance.Status.Phase == rmn.Deployed {
d.log.Info("Initial deployed phase detected")

return false
}

idx, condition := GetDRPCCondition(&d.instance.Status, rmn.ConditionReconciling)
d.log.Info(fmt.Sprintf("Condition %v", condition))

if idx == -1 {
return false
d.log.Info("Found missing reconciling condition")

return true
}

if condition.Reason == rmn.ReasonSuccess && condition.Status == metav1.ConditionTrue {
if condition.Reason == rmn.ReasonSuccess &&
condition.Status == metav1.ConditionTrue &&
condition.ObservedGeneration == d.instance.Generation {
return false
}

Expand Down
35 changes: 24 additions & 11 deletions controllers/volumereplicationgroup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,9 @@ func filterPVC(mgr manager.Manager, pvc *corev1.PersistentVolumeClaim, log logr.
// +kubebuilder:rbac:groups=ramendr.openshift.io,resources=volumereplicationgroups/finalizers,verbs=update
// +kubebuilder:rbac:groups=replication.storage.openshift.io,resources=volumereplications,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=replication.storage.openshift.io,resources=volumereplicationclasses,verbs=get;list;watch;update;patch
// +kubebuilder:rbac:groups=storage.k8s.io,resources=storageclasses,verbs=get;list
// +kubebuilder:rbac:groups=storage.k8s.io,resources=storageclasses,verbs=get;list;watch
// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;update;patch
// +kubebuilder:rbac:groups=core,resources=persistentvolumes,verbs=get;list;watch;update;patch
// +kubebuilder:rbac:groups=core,resources=persistentvolumes,verbs=get;list;watch;update;patch;create

// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
Expand Down Expand Up @@ -417,31 +417,44 @@ func (v *VRGInstance) restorePVs() error {
setPVMetadataProgressingCondition(&v.instance.Status.Conditions, v.instance.Generation, msg)

v.log.Info(fmt.Sprintf("Restoring PVs to this managed cluster %v", v.instance.Spec.S3ProfileList))
// TODO: this is added in the last minute while merging. More thought will be put in this

success := true

for _, s3ProfileName := range v.instance.Spec.S3ProfileList {
pvList, err := v.listPVsFromS3Store(s3ProfileName)
if err != nil {
return fmt.Errorf("failed to retrieve PVs from S3 store (%w)", err)
success = false

v.log.Error(err, "failed to retrieve PVs from S3 store", "ProfileName", s3ProfileName)
}

v.log.Info(fmt.Sprintf("Found %d PVs", len(pvList)))

if len(pvList) == 0 {
return nil
}

for idx := range pvList {
err := v.restorePV(&pvList[idx])
if err != nil {
v.log.Error(err, "Stopping restore")
success = false

return err
v.log.Error(err, "Restore PVs failed", "ProfileName", s3ProfileName)
}
}

if !success {
// go to the next profile
continue
}

setPVMetadataAvailableCondition(&v.instance.Status.Conditions, v.instance.Generation, msg)

v.log.Info(fmt.Sprintf("Restored %d PVs using profile %s", len(pvList), s3ProfileName))

success = true

break
}

if !success {
return fmt.Errorf("failed to restorePVs using profile list (%v)", v.instance.Spec.S3ProfileList)
}

return nil
Expand Down Expand Up @@ -684,7 +697,7 @@ func (v *VRGInstance) processAsPrimary() (ctrl.Result, error) {
if err := v.restorePVs(); err != nil {
v.log.Info("Restoring PVs failed", "errorValue", err)

msg := "Failed to restore PVs"
msg := fmt.Sprintf("Failed to restore PVs (%v)", err.Error())
setPVMetadataErrorCondition(&v.instance.Status.Conditions, v.instance.Generation, msg)

if err = v.updateVRGStatus(false); err != nil {
Expand Down

0 comments on commit 0f62093

Please sign in to comment.