From fad97e339c7b8a50e361e1b4188ae1349b700b42 Mon Sep 17 00:00:00 2001 From: Adrian Ludwin Date: Fri, 25 Sep 2020 13:02:14 -0400 Subject: [PATCH] Improve anchor deletion This change fixes todos in the cherrypick for #1150 (see issue #1149). It simplifies and restructures a lot of the logic to make it easier to follow while looking at less data (e.g. a lot more focus on anchor.Status.State). Tested: all e2e tests pass on GKE 1.17. --- incubator/hnc/internal/reconcilers/anchor.go | 123 ++++++++++--------- incubator/hnc/test/e2e/issues_test.go | 2 +- 2 files changed, 63 insertions(+), 62 deletions(-) diff --git a/incubator/hnc/internal/reconcilers/anchor.go b/incubator/hnc/internal/reconcilers/anchor.go index 266505dd4..73c16d363 100644 --- a/incubator/hnc/internal/reconcilers/anchor.go +++ b/incubator/hnc/internal/reconcilers/anchor.go @@ -88,13 +88,15 @@ func (r *AnchorReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) { return ctrl.Result{}, err } + // Update the state so we know the relationship between the anchor and its namespace. + r.updateState(log, inst, snsInst) + + // Handle the case where the anchor is being deleted. if deleting, err := r.onDeleting(ctx, log, inst, snsInst); deleting { return ctrl.Result{}, err } - // Update the state. If the subnamespace doesn't exist, create it. - // (This is how a subnamespace is created through a CR) - r.updateState(log, inst, snsInst) + // If the subnamespace doesn't exist, create it. if inst.Status.State == api.Missing { if err := r.writeNamespace(ctx, log, nm, pnm); err != nil { return ctrl.Result{}, err @@ -113,45 +115,45 @@ func (r *AnchorReconciler) onDeleting(ctx context.Context, log logr.Logger, inst return false, nil } + // We handle deletions differently depending on whether _one_ anchor is being deleted (i.e., the + // user wants to delete the namespace) or whether the Anchor CRD is being deleted, which usually + // means HNC is being uninstalled and we shouldn't delete _any_ namespaces. deletingCRD, err := isDeletingCRD(ctx, r, api.Anchors) if err != nil { log.Info("Couldn't determine if CRD is being deleted") return false, err } - // Update the state of the anchor - // - // TODO: call this in the parent function after v0.5 so there's no special code path for the - // onDeleting case. - r.updateState(log, inst, snsInst) - - log.Info("The anchor is being deleted", "deletingCRD", deletingCRD) + log.Info("Deleting", "deletingCRD", deletingCRD) switch { case len(inst.ObjectMeta.Finalizers) == 0: // We've finished processing this, nothing to do. - log.Info("Do nothing since the finalizers are already gone.") + log.Info("Finalizers have already been removed; nothing to do") return true, nil case r.shouldDeleteSubns(log, inst, snsInst, deletingCRD): // The subnamespace is not already being deleted but it allows cascadingDelete or it's a leaf. // Delete the subnamespace, unless the CRD is being deleted, in which case, we want to leave the // namespaces alone. - log.Info("The subnamespace is not being deleted but it allows cascading deletion or it's a leaf.") + log.Info("The subnamespace exists and should be deleted") return true, r.deleteNamespace(ctx, log, snsInst) - case r.removeFinalizers(log, inst, snsInst): + case r.shouldFinalizerAnchor(log, inst, snsInst): + inst.ObjectMeta.Finalizers = nil + log.Info("Anchor can be deleted; removing its finalizers") // We've determined that this anchor is ready to be deleted. return true, r.writeInstance(ctx, log, inst) default: // The subnamespace is already being deleted. Do nothing in this Reconcile(). // Wait until it's purged to remove the finalizers in another Reconcile(). - log.Info("Do nothing since the subnamespace is still being deleted (not purged yet).") + log.Info("Waiting for subnamespace to be fully purged") return true, nil } } -// shouldDeleteSubns returns true if the namespace still exists and it is a leaf -// subnamespace or it allows cascading delete unless the CRD is being deleted. +// shouldDeleteSubns returns true if the namespace still exists and should be deleted as a result of +// the anchor being deleted as well. // -// TODO: fix comment post-v0.5 +// This is the only part of the deletion process that needs to access the forest, in order to check +// the recursive AllowCascadingDeletion setting. func (r *AnchorReconciler) shouldDeleteSubns(log logr.Logger, inst *api.SubnamespaceAnchor, nsInst *corev1.Namespace, deletingCRD bool) bool { r.forest.Lock() defer r.forest.Unlock() @@ -161,69 +163,70 @@ func (r *AnchorReconciler) shouldDeleteSubns(log logr.Logger, inst *api.Subnames return false } - // If the anchor isn't in the "ok" state, don't delete the namespace. If it's "conflict," we - // *definitely* don't want to delete it; if it's missing (and possibly being created), then there - // isn't really a good option but we'll eventually put a condition on the namespace so it's not a - // big deal. - // - // TODO: much of the remaining portion of this function can probably be replaced by this switch - // statement. In v0.5, we're playing it safe so I won't modify the rest of the function, but in - // v0.6 we should restructure. switch inst.Status.State { case api.Missing: + // Either the namespace hasn't been created yet, or it's just been deleted. Either way, we don't + // need to delete it! return false case api.Conflict: + // The anchor is in a bad state; a namespace exists but isn't tied to this anchor. We certainly + // shouldn't delete the namespace. return false case api.Ok: - // keep going... + // The anchor and namespace are linked. We might still need to delete it. default: log.Error(errors.New("Bad anchor state"), "state", inst.Status.State) // Stay on the safe side - don't delete return false } - cnm := inst.Name - pnm := inst.Namespace - cns := r.forest.Get(cnm) - - // If the declared subnamespace is not created by this anchor, don't delete it. - if cns.Parent().Name() != pnm { + // If the namespace doesn't exist or is currently being deleted, we don't need to delete it again. + // Note that the state might be `Ok` (and not `Missing`) if the deleted namespace hasn't been + // reconciled yet, but there's no need to wait. + if nsInst.CreationTimestamp.IsZero() || !nsInst.DeletionTimestamp.IsZero() { return false } - // If the subnamespace is created by this anchor but is already being deleted, - // or has already been deleted, then there's no need to delete it again. - if !nsInst.DeletionTimestamp.IsZero() || !cns.Exists() { - return false - } - - // The subnamespace exists and isn't being deleted. We should delete it if it - // doesn't have any children itself, or if cascading deletion is enabled. + // The namespace is in a good state and is linked to this anchor, so it can be deleted. For + // safety's sake, we only actually delete subnamespaces if they're leaves or if ACD=true on one of + // their ancestors; the webhooks *should* ensure that this is always true before we get here, but + // if something slips by, we'll just leave the old subnamespace alone with a missing-anchor + // condition. + cns := r.forest.Get(inst.Name) return cns.ChildNames() == nil || cns.AllowsCascadingDelete() } -func (r *AnchorReconciler) removeFinalizers(log logr.Logger, inst *api.SubnamespaceAnchor, snsInst *corev1.Namespace) bool { - pnm := inst.Namespace - sOf := snsInst.GetAnnotations()[api.SubnamespaceOf] +// shouldFinalizerAnchor determines whether the anchor is safe to delete. It should only be called once +// we know that we don't need to delete the subnamespace itself (e.g. it's already gone, it can't be +// deleted, it's in the process of being deleted, etc). +func (r *AnchorReconciler) shouldFinalizerAnchor(log logr.Logger, inst *api.SubnamespaceAnchor, snsInst *corev1.Namespace) bool { + // If the namespace is gone, we can finish deleting the anchor. If the namespace is present but + // isn't already being deleted, we assume that shouldDeleteSubns has explicitly decided *not* to + // delete it, which means that there's nothing stopping the anchor from being deleted. + if snsInst.Name == "" || snsInst.DeletionTimestamp.IsZero() { + return true + } - // This switch statement has an explicit case for all conditions when we *can* delete the - // finalizers. The default behaviour is that we *cannot* delete the finalizers yet. - switch { - case snsInst.Name == "": - log.Info("The subnamespace is already purged; allowing the anchor to be deleted") - case sOf != pnm: - log.Info("The subnamespace believes it is the subnamespace of another namespace; allowing the anchor to be deleted", "annotation", sOf) - case snsInst.DeletionTimestamp.IsZero(): - // onDeleting has decided not to try to delete this namespace. Either cascading deletion isn't - // allowed (and the user has bypassed the webhook), or the CRD has been deleted and we don't - // want to invoke cascading deletion. Just allow the anchor to be deleted. - log.Info("We've decided not to delete the subnamespace; allowing the anchor to be deleted") - default: + switch inst.Status.State { + case api.Ok: + // The anchor is in a good state, and the namespace exists and is being deleted. Keep waiting. return false + case api.Missing: + // This should never happen; we know that the namespace exists since it's name is non-empty, and + // we *just* updated the state. Log an error but allow the object to be deleted anyway - a + // missing anchor isn't the end of the world. + log.Error(errors.New("Illegal state"), "Anchor in missing state but namespace exists (should be ok or conflict") + return true + case api.Conflict: + // Technically, even if this anchor's in a conflicted state, someone else could be deleting the + // namespace at the same time as this function gets called, but there's no need to wait for the + // (conflicting) namespace to be removed as well. + return true + default: + // As for the `missing` case, log an error but allow the anchor to be deleted. + log.Error(errors.New("Illegal state"), "Unknown state", "state", inst.Status.State) + return true } - - inst.ObjectMeta.Finalizers = nil - return true } func (r *AnchorReconciler) updateState(log logr.Logger, inst *api.SubnamespaceAnchor, snsInst *corev1.Namespace) { @@ -267,13 +270,11 @@ func (r *AnchorReconciler) getInstance(ctx context.Context, pnm, nm string) (*ap func (r *AnchorReconciler) writeInstance(ctx context.Context, log logr.Logger, inst *api.SubnamespaceAnchor) error { if inst.CreationTimestamp.IsZero() { - log.Info("Creating instance on apiserver") if err := r.Create(ctx, inst); err != nil { log.Error(err, "while creating on apiserver") return err } } else { - log.Info("Updating instance on apiserver") if err := r.Update(ctx, inst); err != nil { log.Error(err, "while updating on apiserver") return err diff --git a/incubator/hnc/test/e2e/issues_test.go b/incubator/hnc/test/e2e/issues_test.go index 5acc8cf41..d4ba38b47 100644 --- a/incubator/hnc/test/e2e/issues_test.go +++ b/incubator/hnc/test/e2e/issues_test.go @@ -358,7 +358,7 @@ var _ = Describe("Issues that require repairing HNC", func() { MustRun("kubectl get secret -n", nsChild) }) - It("Should delete a non-leaf child namespace when deleting a parent namespace with good anchor under race condition - issue #797", func() { + FIt("Should delete a non-leaf child namespace when deleting a parent namespace with good anchor under race condition - issue #797", func() { MustRun("kubectl hns create", nsSubChild, "-n", nsChild) MustRun("kubectl hns set", nsChild, "-a") // Test: Remove subns (anchor) in the good parent 'parent'