From d5c3fe0304e16cc259924a0d0c4e204d3bc6a066 Mon Sep 17 00:00:00 2001 From: Adrian Ludwin Date: Fri, 25 Sep 2020 13:02:14 -0400 Subject: [PATCH] Improve anchor deletion This change fixes todos in the cherrypick for #1150 (see issue #1149). It simplifies and restructures a lot of the logic to make it easier to follow while looking at less data (e.g. a lot more focus on anchor.Status.State). It also adds a lots more documentation. Tested: all e2e tests pass on GKE 1.18 when combined with fixes to the e2e tests (PRs #1160, #1162, #1163 and #1164). --- incubator/hnc/internal/reconcilers/anchor.go | 204 ++++++++++++------- 1 file changed, 126 insertions(+), 78 deletions(-) diff --git a/incubator/hnc/internal/reconcilers/anchor.go b/incubator/hnc/internal/reconcilers/anchor.go index d510d1540..8b02fc29d 100644 --- a/incubator/hnc/internal/reconcilers/anchor.go +++ b/incubator/hnc/internal/reconcilers/anchor.go @@ -55,7 +55,7 @@ type AnchorReconciler struct { func (r *AnchorReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) { ctx := context.Background() log := r.Log.WithValues("trigger", req.NamespacedName) - log.Info("Reconciling anchor") + log.V(1).Info("Reconciling anchor") // Get names of the hierarchical namespace and the current namespace. nm := req.Name @@ -67,6 +67,7 @@ func (r *AnchorReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) { inst, err := r.getInstance(ctx, pnm, nm) if err != nil { if apierrors.IsNotFound(err) { + log.Info("Anchor has been deleted") return ctrl.Result{}, nil } return ctrl.Result{}, err @@ -88,13 +89,15 @@ func (r *AnchorReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) { return ctrl.Result{}, err } + // Update the state so we know the relationship between the anchor and its namespace. + r.updateState(log, inst, snsInst) + + // Handle the case where the anchor is being deleted. if deleting, err := r.onDeleting(ctx, log, inst, snsInst); deleting { return ctrl.Result{}, err } - // Update the state. If the subnamespace doesn't exist, create it. - // (This is how a subnamespace is created through a CR) - r.updateState(log, inst, snsInst) + // If the subnamespace doesn't exist, create it. if inst.Status.State == api.Missing { if err := r.writeNamespace(ctx, log, nm, pnm); err != nil { return ctrl.Result{}, err @@ -107,123 +110,168 @@ func (r *AnchorReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) { return ctrl.Result{}, r.writeInstance(ctx, log, inst) } -func (r *AnchorReconciler) onDeleting(ctx context.Context, log logr.Logger, inst *api.SubnamespaceAnchor, snsInst *corev1.Namespace) (deleting bool, err error) { +// onDeleting returns true if the anchor is in the process of being deleted, and handles all +// necessary steps to allow it to be deleted. This typically amounts to two steps: +// +// 1. Delete the subnamespace controlled by this anchor. +// 2. Remove the finalizers on this anchor, allowing it to be deleted. +// +// There are several conditions where we skip step 1 - for example, if we're uninstalling HNC, or +// if allowCascadingDeletion is disabled but the subnamespace has descendants (see +// shouldDeleteSubns for details). In such cases, we move straight to step 2. +func (r *AnchorReconciler) onDeleting(ctx context.Context, log logr.Logger, inst *api.SubnamespaceAnchor, snsInst *corev1.Namespace) (bool, error) { // Early exit and continue reconciliation if the instance is not being deleted. if inst.DeletionTimestamp.IsZero() { return false, nil } + // We handle deletions differently depending on whether _one_ anchor is being deleted (i.e., the + // user wants to delete the namespace) or whether the Anchor CRD is being deleted, which usually + // means HNC is being uninstalled and we shouldn't delete _any_ namespaces. deletingCRD, err := isDeletingCRD(ctx, r, api.Anchors) if err != nil { log.Info("Couldn't determine if CRD is being deleted") return false, err } + log.Info("Anchor is being deleted", "deletingCRD", deletingCRD) - // Update the state of the anchor - // - // TODO: call this in the parent function after v0.5 so there's no special code path for the - // onDeleting case. - r.updateState(log, inst, snsInst) - - log.Info("The anchor is being deleted", "deletingCRD", deletingCRD) + // Check if we need to perform step 1 (delete subns), step 2 (allow finalization) or just wait for + // something to happen. See method-level comments for details. switch { - case len(inst.ObjectMeta.Finalizers) == 0: - // We've finished processing this, nothing to do. - log.Info("Do nothing since the finalizers are already gone.") - return true, nil case r.shouldDeleteSubns(log, inst, snsInst, deletingCRD): - // The subnamespace is not already being deleted but it allows cascadingDelete or it's a leaf. - // Delete the subnamespace, unless the CRD is being deleted, in which case, we want to leave the - // namespaces alone. - log.Info("The subnamespace is not being deleted but it allows cascading deletion or it's a leaf.") + log.Info("Deleting subnamespace") return true, r.deleteNamespace(ctx, log, snsInst) - case r.removeFinalizers(log, inst, snsInst): - // We've determined that this anchor is ready to be deleted. + case r.shouldFinalizeAnchor(log, inst, snsInst): + log.Info("Removing finalizer") + inst.ObjectMeta.Finalizers = nil return true, r.writeInstance(ctx, log, inst) default: - // The subnamespace is already being deleted. Do nothing in this Reconcile(). - // Wait until it's purged to remove the finalizers in another Reconcile(). - log.Info("Do nothing since the subnamespace is still being deleted (not purged yet).") + // There's nothing to do; we're just waiting for something to happen. Print out a log message + // indicating what we're waiting for. + if len(inst.ObjectMeta.Finalizers) > 0 { + log.Info("Waiting for subnamespace to be fully purged") + } else { + // I doubt we'll ever get here but I suppose it's possible + log.Info("Waiting for K8s to delete this anchor (all finalizers are removed)") + } return true, nil } } -// shouldDeleteSubns returns true if the namespace still exists and it is a leaf -// subnamespace or it allows cascading delete unless the CRD is being deleted. +// shouldDeleteSubns returns true if the namespace still exists and should be deleted as a result of +// the anchor being deleted. There are a variety of reasons why subnamespaces *shouldn't* be deleted +// with their anchors (see the body of this method for details); this function also returns false if +// the subns is already being deleted and we just need to keep waiting for it to disappear. // -// TODO: fix comment post-v0.5 +// This is the only part of the deletion process that needs to access the forest, in order to check +// the recursive AllowCascadingDeletion setting. func (r *AnchorReconciler) shouldDeleteSubns(log logr.Logger, inst *api.SubnamespaceAnchor, nsInst *corev1.Namespace, deletingCRD bool) bool { r.forest.Lock() defer r.forest.Unlock() // If the CRD is being deleted, we want to leave the subnamespace alone. if deletingCRD { + log.Info("Will not delete subnamespace since the anchor CRD is being deleted") return false } - // If the anchor isn't in the "ok" state, don't delete the namespace. If it's "conflict," we - // *definitely* don't want to delete it; if it's missing (and possibly being created), then there - // isn't really a good option but we'll eventually put a condition on the namespace so it's not a - // big deal. - // - // TODO: much of the remaining portion of this function can probably be replaced by this switch - // statement. In v0.5, we're playing it safe so I won't modify the rest of the function, but in - // v0.6 we should restructure. + // The state of the anchor is the most important factor in whether the namespace should be + // deleted. switch inst.Status.State { - case api.Missing: - return false + case api.Ok: + // The namespace and anchor are bound together, so it's valid to decide the fate of the + // namespace here. Firstly, if the namespace is already being deleted, or the anchor has already + // been finalized, it means we've already passed step #1 in the deletion process (see onDeleting + // for details) so we shouldn't try to perform it again. + if !nsInst.DeletionTimestamp.IsZero() { + log.V(1).Info("The subnamespace is already being deleted; no need to delete again") + return false + } + if len(inst.ObjectMeta.Finalizers) == 0 { + log.V(1).Info("The anchor has already been finalized; do not reconsider deleting the namespace") + return false + } + + // The subns is ready to be deleted. It's only safe to delete subnamespaces if they're leaves or + // if ACD=true on one of their ancestors; the webhooks *should* ensure that this is always true + // before we get here, but if something slips by, we'll just leave the old subnamespace alone + // with a missing-anchor condition. + cns := r.forest.Get(inst.Name) + if cns.ChildNames() != nil && !cns.AllowsCascadingDeletion() { + log.Info("This subnamespace has descendants and allowCascadingDeletion is disabled; will not delete") + return false + } + + // Everything looks normal and safe. Delete the namespace. + return true + case api.Conflict: + // The anchor is in a bad state; a namespace exists but isn't tied to this anchor. We certainly + // shouldn't delete the namespace. + log.Info("Anchor is conflicted; will not delete subnamespace") return false - case api.Ok: - // keep going... + + case api.Missing: + // This most likely means that the namespace has been deleted. There's a small chance that the + // user created this anchor and then deleted it before the subns could be created; if so, + // there's nothing to delete. + log.V(1).Info("The subnamespace is deleted; no need to delete again") + return false + default: log.Error(errors.New("Bad anchor state"), "state", inst.Status.State) // Stay on the safe side - don't delete return false } - cnm := inst.Name - pnm := inst.Namespace - cns := r.forest.Get(cnm) +} - // If the declared subnamespace is not created by this anchor, don't delete it. - if cns.Parent().Name() != pnm { +// shouldFinalizeAnchor determines whether the anchor is safe to delete. It should only be called once +// we know that we don't need to delete the subnamespace itself (e.g. it's already gone, it can't be +// deleted, it's in the process of being deleted, etc). +func (r *AnchorReconciler) shouldFinalizeAnchor(log logr.Logger, inst *api.SubnamespaceAnchor, snsInst *corev1.Namespace) bool { + // If the anchor is already finalized, there's no need to do it again. + if len(inst.ObjectMeta.Finalizers) == 0 { return false } - // If the subnamespace is created by this anchor but is already being deleted, - // or has already been deleted, then there's no need to delete it again. - if !nsInst.DeletionTimestamp.IsZero() || !cns.Exists() { + switch inst.Status.State { + case api.Ok: + // The subnamespace exists and is bound to this anchor. Since we called shouldDeleteSubns before + // this function, we can rely on the subns' deletion timestamp being nonzero if the subns should + // be deleted with this namespace. If the subns *isn't* being deleted at this point, we can + // infer that it's not going to be, and the anchor is safe to finalize now. + if snsInst.DeletionTimestamp.IsZero() { + log.Info("Subnamespace will not be deleted; allowing anchor to be finalized") + return true + } + log.V(1).Info("Subnamespace is being deleted; cannot finalize anchor yet") return false - } - // The subnamespace exists and isn't being deleted. We should delete it if it - // doesn't have any children itself, or if cascading deletion is enabled. - return cns.ChildNames() == nil || cns.AllowsCascadingDeletion() -} + case api.Missing: + // Most likely, the namespace has been deleted. There's a small chance the anchor was created + // and then deleted before we were able to create the namespace, in which case, it's also fine + // to allow the anchor to be deleted. + // + // Is it possible that the namespace actually was created, but that HNC just hasn't noticed it + // yet? I don't _think_ so - we'd create the namespace via the controller-runtime client, and + // then we'd try to read it via the same client, so I'm hoping there's no weird distributed + // concurrency problem. But even if this isn't true, the worst that could happen (in this corner + // case of a corner case) is that the namespace gets created with a Missing Anchor condition, + // which really isn't the end of the world. So I don't think it's worth worrying about too much. + log.Info("Subnamespace does not exist; allowing anchor to be finalized") + return true -func (r *AnchorReconciler) removeFinalizers(log logr.Logger, inst *api.SubnamespaceAnchor, snsInst *corev1.Namespace) bool { - pnm := inst.Namespace - sOf := snsInst.GetAnnotations()[api.SubnamespaceOf] + case api.Conflict: + // Bad anchors can always be removed. + log.Info("Anchor is in the conflicted state; allowing it to be finalized") + return true - // This switch statement has an explicit case for all conditions when we *can* delete the - // finalizers. The default behaviour is that we *cannot* delete the finalizers yet. - switch { - case snsInst.Name == "": - log.Info("The subnamespace is already purged; allowing the anchor to be deleted") - case sOf != pnm: - log.Info("The subnamespace believes it is the subnamespace of another namespace; allowing the anchor to be deleted", "annotation", sOf) - case snsInst.DeletionTimestamp.IsZero(): - // onDeleting has decided not to try to delete this namespace. Either cascading deletion isn't - // allowed (and the user has bypassed the webhook), or the CRD has been deleted and we don't - // want to invoke cascading deletion. Just allow the anchor to be deleted. - log.Info("We've decided not to delete the subnamespace; allowing the anchor to be deleted") default: - return false + // Should never happen, so log an error and let it be deleted. + log.Error(errors.New("Illegal state"), "Unknown state", "state", inst.Status.State) + return true } - - inst.ObjectMeta.Finalizers = nil - return true } func (r *AnchorReconciler) updateState(log logr.Logger, inst *api.SubnamespaceAnchor, snsInst *corev1.Namespace) { @@ -232,13 +280,16 @@ func (r *AnchorReconciler) updateState(log logr.Logger, inst *api.SubnamespaceAn sOf := snsInst.Annotations[api.SubnamespaceOf] switch { case snsInst.Name == "": - log.Info("The subnamespace does not exist", "subnamespace", nm) + log.Info("The subnamespace does not exist; anchor state is Missing", "subnamespace", nm) inst.Status.State = api.Missing case sOf != pnm: - log.Info("The subnamespaceOf annotation of this namespace doesn't match its parent", "annotation", sOf) + log.Info("The subnamespaceOf annotation does not match parent; anchor state is Conflict", "annotation", sOf) inst.Status.State = api.Conflict default: - log.Info("The subnamespace has the correct subnamespaceOf annotation", "annotation", sOf) + if inst.Status.State != api.Ok { + // Only print log if something's changed + log.Info("The subnamespace and its anchor are correctly synchronized", "prevState", inst.Status.State) + } inst.Status.State = api.Ok } } @@ -267,13 +318,11 @@ func (r *AnchorReconciler) getInstance(ctx context.Context, pnm, nm string) (*ap func (r *AnchorReconciler) writeInstance(ctx context.Context, log logr.Logger, inst *api.SubnamespaceAnchor) error { if inst.CreationTimestamp.IsZero() { - log.Info("Creating instance on apiserver") if err := r.Create(ctx, inst); err != nil { log.Error(err, "while creating on apiserver") return err } } else { - log.Info("Updating instance on apiserver") if err := r.Update(ctx, inst); err != nil { log.Error(err, "while updating on apiserver") return err @@ -315,7 +364,6 @@ func (r *AnchorReconciler) writeNamespace(ctx context.Context, log logr.Logger, } func (r *AnchorReconciler) deleteNamespace(ctx context.Context, log logr.Logger, inst *corev1.Namespace) error { - log.Info("Deleting namespace on apiserver") if err := r.Delete(ctx, inst); err != nil { log.Error(err, "while deleting on apiserver") return err