Skip to content

Commit

Permalink
Merge pull request #2351 from killianmuldoon/pr-fix-vpsheredeployment…
Browse files Browse the repository at this point in the history
…zone-deletion

🐛 Handle missing VSphereFailureDomain on VSphereDeploymentZone deletion
  • Loading branch information
k8s-ci-robot authored Sep 28, 2023
2 parents c34fe65 + 448c4c4 commit 1b3caae
Show file tree
Hide file tree
Showing 6 changed files with 169 additions and 114 deletions.
55 changes: 30 additions & 25 deletions controllers/vspheredeploymentzone_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,16 +112,6 @@ func (r vsphereDeploymentZoneReconciler) Reconcile(ctx context.Context, request
return reconcile.Result{}, err
}

failureDomain := &infrav1.VSphereFailureDomain{}
failureDomainKey := client.ObjectKey{Name: vsphereDeploymentZone.Spec.FailureDomain}
if err := r.Client.Get(ctx, failureDomainKey, failureDomain); err != nil {
if apierrors.IsNotFound(err) {
log.V(4).Info("Failure Domain not found, won't reconcile", "key", failureDomainKey)
return reconcile.Result{}, nil
}
return reconcile.Result{}, err
}

patchHelper, err := patch.NewHelper(vsphereDeploymentZone, r.Client)
if err != nil {
return reconcile.Result{}, errors.Wrapf(
Expand All @@ -135,7 +125,6 @@ func (r vsphereDeploymentZoneReconciler) Reconcile(ctx context.Context, request
vsphereDeploymentZoneContext := &capvcontext.VSphereDeploymentZoneContext{
ControllerContext: r.ControllerContext,
VSphereDeploymentZone: vsphereDeploymentZone,
VSphereFailureDomain: failureDomain,
Logger: log,
PatchHelper: patchHelper,
}
Expand All @@ -160,7 +149,13 @@ func (r vsphereDeploymentZoneReconciler) Reconcile(ctx context.Context, request
}

func (r vsphereDeploymentZoneReconciler) reconcileNormal(ctx context.Context, deploymentZoneCtx *capvcontext.VSphereDeploymentZoneContext) error {
authSession, err := r.getVCenterSession(ctx, deploymentZoneCtx)
failureDomain := &infrav1.VSphereFailureDomain{}
failureDomainKey := client.ObjectKey{Name: deploymentZoneCtx.VSphereDeploymentZone.Spec.FailureDomain}
// Return an error if the FailureDomain can not be retrieved.
if err := r.Client.Get(ctx, failureDomainKey, failureDomain); err != nil {
return errors.Wrap(err, "VSphereFailureDomain could not be retrieved")
}
authSession, err := r.getVCenterSession(ctx, deploymentZoneCtx, failureDomain.Spec.Topology.Datacenter)
if err != nil {
deploymentZoneCtx.Logger.V(4).Error(err, "unable to create session")
conditions.MarkFalse(deploymentZoneCtx.VSphereDeploymentZone, infrav1.VCenterAvailableCondition, infrav1.VCenterUnreachableReason, clusterv1.ConditionSeverityError, err.Error())
Expand All @@ -177,7 +172,7 @@ func (r vsphereDeploymentZoneReconciler) reconcileNormal(ctx context.Context, de
conditions.MarkTrue(deploymentZoneCtx.VSphereDeploymentZone, infrav1.PlacementConstraintMetCondition)

// reconcile the failure domain
if err := r.reconcileFailureDomain(ctx, deploymentZoneCtx); err != nil {
if err := r.reconcileFailureDomain(ctx, deploymentZoneCtx, failureDomain); err != nil {
deploymentZoneCtx.Logger.V(4).Error(err, "failed to reconcile failure domain", "failureDomain", deploymentZoneCtx.VSphereDeploymentZone.Spec.FailureDomain)
deploymentZoneCtx.VSphereDeploymentZone.Status.Ready = pointer.Bool(false)
return errors.Wrapf(err, "failed to reconcile failure domain")
Expand Down Expand Up @@ -209,10 +204,10 @@ func (r vsphereDeploymentZoneReconciler) reconcilePlacementConstraint(ctx contex
return nil
}

func (r vsphereDeploymentZoneReconciler) getVCenterSession(ctx context.Context, deploymentZoneCtx *capvcontext.VSphereDeploymentZoneContext) (*session.Session, error) {
func (r vsphereDeploymentZoneReconciler) getVCenterSession(ctx context.Context, deploymentZoneCtx *capvcontext.VSphereDeploymentZoneContext, datacenter string) (*session.Session, error) {
params := session.NewParams().
WithServer(deploymentZoneCtx.VSphereDeploymentZone.Spec.Server).
WithDatacenter(deploymentZoneCtx.VSphereFailureDomain.Spec.Topology.Datacenter).
WithDatacenter(datacenter).
WithUserInfo(r.ControllerContext.Username, r.ControllerContext.Password).
WithFeatures(session.Feature{
EnableKeepAlive: r.EnableKeepAlive,
Expand Down Expand Up @@ -252,7 +247,6 @@ func (r vsphereDeploymentZoneReconciler) reconcileDelete(ctx context.Context, de

machines := &clusterv1.MachineList{}
if err := r.Client.List(ctx, machines); err != nil {
r.Logger.Error(err, "unable to list machines")
return errors.Wrapf(err, "unable to list machines")
}

Expand All @@ -264,13 +258,25 @@ func (r vsphereDeploymentZoneReconciler) reconcileDelete(ctx context.Context, de
})
if len(machinesUsingDeploymentZone) > 0 {
machineNamesStr := util.MachinesAsString(machinesUsingDeploymentZone.SortedByCreationTimestamp())
err := errors.Errorf("%s is currently in use by machines: %s", deploymentZoneCtx.VSphereDeploymentZone.Name, machineNamesStr)
r.Logger.Error(err, "Error deleting VSphereDeploymentZone", "name", deploymentZoneCtx.VSphereDeploymentZone.Name)
return errors.Errorf("error deleting VSphereDeploymentZone: %s is currently in use by machines: %s", deploymentZoneCtx.VSphereDeploymentZone.Name, machineNamesStr)
}

failureDomain := &infrav1.VSphereFailureDomain{}
failureDomainKey := client.ObjectKey{Name: deploymentZoneCtx.VSphereDeploymentZone.Spec.FailureDomain}
// Return an error if the FailureDomain can not be retrieved.
if err := r.Client.Get(ctx, failureDomainKey, failureDomain); err != nil {
// If the VSphereFailureDomain is not found return early and remove the finalizer.
// This prevents early deletion of the VSphereFailureDomain from blocking VSphereDeploymentZone deletion.
if apierrors.IsNotFound(err) {
ctrlutil.RemoveFinalizer(deploymentZoneCtx.VSphereDeploymentZone, infrav1.DeploymentZoneFinalizer)
return nil
}
return err
}

if err := updateOwnerReferences(ctx, deploymentZoneCtx.VSphereFailureDomain, r.Client, func() []metav1.OwnerReference {
return clusterutilv1.RemoveOwnerRef(deploymentZoneCtx.VSphereFailureDomain.OwnerReferences, metav1.OwnerReference{
// Reconcile the deletion of the VSphereFailureDomain by removing ownerReferences and deleting if necessary.
if err := updateOwnerReferences(ctx, failureDomain, r.Client, func() []metav1.OwnerReference {
return clusterutilv1.RemoveOwnerRef(failureDomain.OwnerReferences, metav1.OwnerReference{
APIVersion: infrav1.GroupVersion.String(),
Kind: deploymentZoneCtx.VSphereDeploymentZone.Kind,
Name: deploymentZoneCtx.VSphereDeploymentZone.Name,
Expand All @@ -279,15 +285,14 @@ func (r vsphereDeploymentZoneReconciler) reconcileDelete(ctx context.Context, de
return err
}

if len(deploymentZoneCtx.VSphereFailureDomain.OwnerReferences) == 0 {
deploymentZoneCtx.Logger.Info("deleting vsphereFailureDomain", "name", deploymentZoneCtx.VSphereFailureDomain.Name)
if err := r.Client.Delete(ctx, deploymentZoneCtx.VSphereFailureDomain); err != nil && !apierrors.IsNotFound(err) {
deploymentZoneCtx.Logger.Error(err, "failed to delete related %s %s", deploymentZoneCtx.VSphereFailureDomain.GroupVersionKind(), deploymentZoneCtx.VSphereFailureDomain.Name)
if len(failureDomain.OwnerReferences) == 0 {
deploymentZoneCtx.Logger.Info("deleting VSphereFailureDomain", "name", failureDomain.Name)
if err := r.Client.Delete(ctx, failureDomain); err != nil && !apierrors.IsNotFound(err) {
return errors.Wrapf(err, "failed to delete VSphereFailureDomain %s", failureDomain.Name)
}
}

ctrlutil.RemoveFinalizer(deploymentZoneCtx.VSphereDeploymentZone, infrav1.DeploymentZoneFinalizer)

return nil
}

Expand Down
40 changes: 20 additions & 20 deletions controllers/vspheredeploymentzone_controller_domain.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,40 +35,40 @@ import (
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/taggable"
)

func (r vsphereDeploymentZoneReconciler) reconcileFailureDomain(ctx context.Context, deploymentZoneCtx *capvcontext.VSphereDeploymentZoneContext) error {
logger := ctrl.LoggerFrom(ctx).WithValues("VSphereFailureDomain", klog.KObj(deploymentZoneCtx.VSphereFailureDomain))
func (r vsphereDeploymentZoneReconciler) reconcileFailureDomain(ctx context.Context, deploymentZoneCtx *capvcontext.VSphereDeploymentZoneContext, vsphereFailureDomain *infrav1.VSphereFailureDomain) error {
logger := ctrl.LoggerFrom(ctx).WithValues("VSphereFailureDomain", klog.KObj(vsphereFailureDomain))

// verify the failure domain for the region
if err := r.reconcileInfraFailureDomain(ctx, deploymentZoneCtx, deploymentZoneCtx.VSphereFailureDomain.Spec.Region); err != nil {
if err := r.reconcileInfraFailureDomain(ctx, deploymentZoneCtx, vsphereFailureDomain, vsphereFailureDomain.Spec.Region); err != nil {
conditions.MarkFalse(deploymentZoneCtx.VSphereDeploymentZone, infrav1.VSphereFailureDomainValidatedCondition, infrav1.RegionMisconfiguredReason, clusterv1.ConditionSeverityError, err.Error())
logger.Error(err, "region is not configured correctly")
return errors.Wrapf(err, "region is not configured correctly")
}

// verify the failure domain for the zone
if err := r.reconcileInfraFailureDomain(ctx, deploymentZoneCtx, deploymentZoneCtx.VSphereFailureDomain.Spec.Zone); err != nil {
if err := r.reconcileInfraFailureDomain(ctx, deploymentZoneCtx, vsphereFailureDomain, vsphereFailureDomain.Spec.Zone); err != nil {
conditions.MarkFalse(deploymentZoneCtx.VSphereDeploymentZone, infrav1.VSphereFailureDomainValidatedCondition, infrav1.ZoneMisconfiguredReason, clusterv1.ConditionSeverityError, err.Error())
logger.Error(err, "zone is not configured correctly")
return errors.Wrapf(err, "zone is not configured correctly")
}

if computeCluster := deploymentZoneCtx.VSphereFailureDomain.Spec.Topology.ComputeCluster; computeCluster != nil {
if err := r.reconcileComputeCluster(ctx, deploymentZoneCtx); err != nil {
if computeCluster := vsphereFailureDomain.Spec.Topology.ComputeCluster; computeCluster != nil {
if err := r.reconcileComputeCluster(ctx, deploymentZoneCtx, vsphereFailureDomain); err != nil {
logger.Error(err, "compute cluster is not configured correctly", "name", *computeCluster)
return errors.Wrap(err, "compute cluster is not configured correctly")
}
}

if err := r.reconcileTopology(ctx, deploymentZoneCtx); err != nil {
if err := r.reconcileTopology(ctx, deploymentZoneCtx, vsphereFailureDomain); err != nil {
logger.Error(err, "topology is not configured correctly")
return errors.Wrap(err, "topology is not configured correctly")
}

// Ensure the VSphereDeploymentZone is marked as an owner of the VSphereFailureDomain.
if err := updateOwnerReferences(ctx, deploymentZoneCtx.VSphereFailureDomain, r.Client,
if err := updateOwnerReferences(ctx, vsphereFailureDomain, r.Client,
func() []metav1.OwnerReference {
return clusterutilv1.EnsureOwnerRef(
deploymentZoneCtx.VSphereFailureDomain.OwnerReferences,
vsphereFailureDomain.OwnerReferences,
metav1.OwnerReference{
APIVersion: infrav1.GroupVersion.String(),
Kind: deploymentZoneCtx.VSphereDeploymentZone.Kind,
Expand All @@ -84,15 +84,15 @@ func (r vsphereDeploymentZoneReconciler) reconcileFailureDomain(ctx context.Cont
return nil
}

func (r vsphereDeploymentZoneReconciler) reconcileInfraFailureDomain(ctx context.Context, deploymentZoneCtx *capvcontext.VSphereDeploymentZoneContext, failureDomain infrav1.FailureDomain) error {
func (r vsphereDeploymentZoneReconciler) reconcileInfraFailureDomain(ctx context.Context, deploymentZoneCtx *capvcontext.VSphereDeploymentZoneContext, vsphereFailureDomain *infrav1.VSphereFailureDomain, failureDomain infrav1.FailureDomain) error {
if *failureDomain.AutoConfigure { //nolint:staticcheck
return r.createAndAttachMetadata(ctx, deploymentZoneCtx, failureDomain)
return r.createAndAttachMetadata(ctx, deploymentZoneCtx, vsphereFailureDomain, failureDomain)
}
return r.verifyFailureDomain(ctx, deploymentZoneCtx, failureDomain)
return r.verifyFailureDomain(ctx, deploymentZoneCtx, vsphereFailureDomain, failureDomain)
}

func (r vsphereDeploymentZoneReconciler) reconcileTopology(ctx context.Context, deploymentZoneCtx *capvcontext.VSphereDeploymentZoneContext) error {
topology := deploymentZoneCtx.VSphereFailureDomain.Spec.Topology
func (r vsphereDeploymentZoneReconciler) reconcileTopology(ctx context.Context, deploymentZoneCtx *capvcontext.VSphereDeploymentZoneContext, vsphereFailureDomain *infrav1.VSphereFailureDomain) error {
topology := vsphereFailureDomain.Spec.Topology
if datastore := topology.Datastore; datastore != "" {
if _, err := deploymentZoneCtx.AuthSession.Finder.Datastore(ctx, datastore); err != nil {
conditions.MarkFalse(deploymentZoneCtx.VSphereDeploymentZone, infrav1.VSphereFailureDomainValidatedCondition, infrav1.DatastoreNotFoundReason, clusterv1.ConditionSeverityError, "datastore %s is misconfigured", datastore)
Expand Down Expand Up @@ -123,8 +123,8 @@ func (r vsphereDeploymentZoneReconciler) reconcileTopology(ctx context.Context,
return nil
}

func (r vsphereDeploymentZoneReconciler) reconcileComputeCluster(ctx context.Context, deploymentZoneCtx *capvcontext.VSphereDeploymentZoneContext) error {
computeCluster := deploymentZoneCtx.VSphereFailureDomain.Spec.Topology.ComputeCluster
func (r vsphereDeploymentZoneReconciler) reconcileComputeCluster(ctx context.Context, deploymentZoneCtx *capvcontext.VSphereDeploymentZoneContext, vsphereFailureDomain *infrav1.VSphereFailureDomain) error {
computeCluster := vsphereFailureDomain.Spec.Topology.ComputeCluster
if computeCluster == nil {
return nil
}
Expand Down Expand Up @@ -156,12 +156,12 @@ func (r vsphereDeploymentZoneReconciler) reconcileComputeCluster(ctx context.Con

// verifyFailureDomain verifies the Failure Domain. It verifies the existence of tag and category specified and
// checks whether the specified tags exist on the DataCenter or Compute Cluster or Hosts (in a HostGroup).
func (r vsphereDeploymentZoneReconciler) verifyFailureDomain(ctx context.Context, deploymentZoneCtx *capvcontext.VSphereDeploymentZoneContext, failureDomain infrav1.FailureDomain) error {
func (r vsphereDeploymentZoneReconciler) verifyFailureDomain(ctx context.Context, deploymentZoneCtx *capvcontext.VSphereDeploymentZoneContext, vsphereFailureDomain *infrav1.VSphereFailureDomain, failureDomain infrav1.FailureDomain) error {
if _, err := deploymentZoneCtx.AuthSession.TagManager.GetTagForCategory(ctx, failureDomain.Name, failureDomain.TagCategory); err != nil {
return errors.Wrapf(err, "failed to verify tag %s and category %s", failureDomain.Name, failureDomain.TagCategory)
}

objects, err := taggable.GetObjects(ctx, deploymentZoneCtx, failureDomain.Type)
objects, err := taggable.GetObjects(ctx, deploymentZoneCtx, vsphereFailureDomain, failureDomain.Type)
if err != nil {
return errors.Wrapf(err, "failed to find object")
}
Expand All @@ -179,7 +179,7 @@ func (r vsphereDeploymentZoneReconciler) verifyFailureDomain(ctx context.Context
return nil
}

func (r vsphereDeploymentZoneReconciler) createAndAttachMetadata(ctx context.Context, deploymentZoneCtx *capvcontext.VSphereDeploymentZoneContext, failureDomain infrav1.FailureDomain) error {
func (r vsphereDeploymentZoneReconciler) createAndAttachMetadata(ctx context.Context, deploymentZoneCtx *capvcontext.VSphereDeploymentZoneContext, vsphereFailureDomain *infrav1.VSphereFailureDomain, failureDomain infrav1.FailureDomain) error {
logger := ctrl.LoggerFrom(ctx, "tag", failureDomain.Name, "category", failureDomain.TagCategory)
categoryID, err := metadata.CreateCategory(ctx, deploymentZoneCtx, failureDomain.TagCategory, failureDomain.Type)
if err != nil {
Expand All @@ -193,7 +193,7 @@ func (r vsphereDeploymentZoneReconciler) createAndAttachMetadata(ctx context.Con
}

logger = logger.WithValues("type", failureDomain.Type)
objects, err := taggable.GetObjects(ctx, deploymentZoneCtx, failureDomain.Type)
objects, err := taggable.GetObjects(ctx, deploymentZoneCtx, vsphereFailureDomain, failureDomain.Type)
if err != nil {
logger.V(4).Error(err, "failed to find object")
return err
Expand Down
Loading

0 comments on commit 1b3caae

Please sign in to comment.