Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix race with ActivePlan between Instance and PlanExecution controllers #722

Merged
merged 21 commits into from
Aug 28, 2019
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 33 additions & 43 deletions pkg/controller/instance/instance_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error {
instance.Status.ActivePlan.Name == "" {

log.Printf("InstanceController: Creating a deploy execution plan for the instance %v", instance.Name)
err = createPlanOld(mgr, "deploy", &instance)
err = createPlan(mgr.GetClient(), mgr.GetEventRecorderFor("instance-controller"), mgr.GetScheme(), "deploy", &instance)
if err != nil {
log.Printf("InstanceController: Error creating \"%v\" object for \"deploy\": %v", instance.Name, err)
}
Expand Down Expand Up @@ -264,7 +264,7 @@ func instanceEventFilter(mgr manager.Manager) predicate.Funcs {
}
}

if err = createPlanOld(mgr, planName, new); err != nil {
if err = createPlan(mgr.GetClient(), mgr.GetEventRecorderFor("instance-controller"), mgr.GetScheme(), planName, new); err != nil {
log.Printf("InstanceController: Error creating PlanExecution \"%v\" for instance \"%v\": %v", planName, new.Name, err)
}
}
Expand Down Expand Up @@ -305,57 +305,42 @@ func instanceEventFilter(mgr manager.Manager) predicate.Funcs {
}
}

func createPlan(r *ReconcileInstance, planName string, instance *kudov1alpha1.Instance) error {
func createPlan(c client.Client, r record.EventRecorder, scheme *runtime.Scheme, planName string, instance *kudov1alpha1.Instance) error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method should be called createPlanAndUpdateInstance or even createPlanAndUpdateInstanceActivePlanReerence :P

ctx := context.TODO()

planExecution := newPlanExecution(instance, planName, r.scheme)
return createPlanExecution(ctx, instance, planExecution, r, planName)
}

// createPlanExecution takes all the k8s objects needed to create the actual plan
func createPlanExecution(ctx context.Context, instance *kudov1alpha1.Instance, plan kudov1alpha1.PlanExecution, r *ReconcileInstance, planName string) error {
log.Printf("Creating PlanExecution of plan %s for instance %s", planName, instance.Name)
r.recorder.Event(instance, "Normal", "CreatePlanExecution", fmt.Sprintf("Creating \"%v\" plan execution", planName))
planExecution := newPlanExecution(instance, planName, scheme)
log.Printf("Creating PlanExecution of planExecution %s for instance %s", planName, instance.Name)
r.Event(instance, "Normal", "CreatePlanExecution", fmt.Sprintf("Creating \"%v\" planExecution execution", planName))

// Make this instance the owner of the PlanExecution
if err := controllerutil.SetControllerReference(instance, &plan, r.scheme); err != nil {
if err := controllerutil.SetControllerReference(instance, planExecution, scheme); err != nil {
log.Printf("InstanceController: Error setting ControllerReference")
return err
}
if err := r.Create(ctx, &plan); err != nil {
log.Printf("InstanceController: Error creating planexecution \"%v\": %v", plan.Name, err)
r.recorder.Event(instance, "Warning", "CreatePlanExecution", fmt.Sprintf("Error creating planexecution \"%v\": %v", plan.Name, err))
if err := c.Create(ctx, planExecution); err != nil {
log.Printf("InstanceController: Error creating planexecution \"%v\": %v", planExecution.Name, err)
r.Event(instance, "Warning", "CreatePlanExecution", fmt.Sprintf("Error creating planexecution \"%v\": %v", planExecution.Name, err))
return err
}
log.Printf("Created PlanExecution of plan %s for instance %s", planName, instance.Name)
r.recorder.Event(instance, "Normal", "PlanCreated", fmt.Sprintf("PlanExecution \"%v\" created", plan.Name))
return nil
}
log.Printf("Created PlanExecution of planExecution %s for instance %s", planName, instance.Name)
r.Event(instance, "Normal", "PlanCreated", fmt.Sprintf("PlanExecution \"%v\" created", planExecution.Name))

//todo: REMOVE this... we do not want to create a plan outside of reconcile
// most likely... any use of this function is wrong... all plans should be created in reconcile
func createPlanOld(mgr manager.Manager, planName string, instance *kudov1alpha1.Instance) error {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it was real pain maintaining two, while the only difference was how they accepted the dependencies which was pretty easy to unite

ctx := context.TODO()
recorder := mgr.GetEventRecorderFor("instance-controller")
log.Printf("Creating PlanExecution of plan %s for instance %s", planName, instance.Name)
recorder.Event(instance, "Normal", "CreatePlanExecution", fmt.Sprintf("Creating \"%v\" plan execution", planName))

planExecution := newPlanExecution(instance, planName, mgr.GetScheme())
return addActivePlanReference(c, r, planExecution, instance)
}

// Make this instance the owner of the PlanExecution
if err := controllerutil.SetControllerReference(instance, &planExecution, mgr.GetScheme()); err != nil {
log.Printf("InstanceController: Error setting ControllerReference")
return err
func addActivePlanReference(c client.Client, r record.EventRecorder, planExecution *kudov1alpha1.PlanExecution, instance *kudov1alpha1.Instance) error {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

main change

instance.Status.ActivePlan = corev1.ObjectReference{
Name: planExecution.Name,
Kind: "PlanExecution",
Namespace: planExecution.Namespace,
APIVersion: "kudo.dev/v1alpha1",
UID: planExecution.UID,
}

if err := mgr.GetClient().Create(ctx, &planExecution); err != nil {
log.Printf("InstanceController: Error creating planexecution \"%v\": %v", planExecution.Name, err)
recorder.Event(instance, "Warning", "CreatePlanExecution", fmt.Sprintf("Error creating planexecution \"%v\": %v", planExecution.Name, err))
return err
err := c.Update(context.TODO(), instance)
if err != nil {
r.Event(instance, "Warning", "UpdateError", fmt.Sprintf("Could not update the ActivePlan for (%v): %v", planExecution.Spec.Instance.Name, err))
}
log.Printf("Created PlanExecution of plan %s for instance %s", planName, instance.Name)
recorder.Event(instance, "Normal", "PlanCreated", fmt.Sprintf("PlanExecution \"%v\" created", planExecution.Name))
return nil
return err
}

var _ reconcile.Reconciler = &ReconcileInstance{}
Expand Down Expand Up @@ -392,7 +377,7 @@ func (r *ReconcileInstance) Reconcile(request reconcile.Request) (reconcile.Resu

// if this is new create and create and assign a planexecution and return
if isNewInstance(instance) {
err = createPlan(r, "deploy", instance)
err = createPlan(r.Client, r.recorder, r.scheme, "deploy", instance)
// err or not we return. If err == nil the controller is done, else requeue
return reconcile.Result{}, err
}
Expand Down Expand Up @@ -477,14 +462,18 @@ func parameterDifference(old, new map[string]string) map[string]string {
}

// newPlanExecution creates a PlanExecution based on the kind and instance for a given planName
func newPlanExecution(instance *kudov1alpha1.Instance, planName string, scheme *runtime.Scheme) kudov1alpha1.PlanExecution {
func newPlanExecution(instance *kudov1alpha1.Instance, planName string, scheme *runtime.Scheme) *kudov1alpha1.PlanExecution {
gvk, _ := apiutil.GVKForObject(instance, scheme)
ref := corev1.ObjectReference{
Kind: gvk.Kind,
Name: instance.Name,
Namespace: instance.Namespace,
}
planExecution := kudov1alpha1.PlanExecution{
TypeMeta: metav1.TypeMeta{
Kind: "PlanExecution",
APIVersion: "kudo.dev/v1alpha1",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have this dotted in a lot of places, so I looked for some "standardization" point that already exists in our codebase generated by kubebuilder, maybe we start moving to something more like this? Or at least turn it into a constant we can reference elsewhere. Maybe this is the constant?

SchemeGroupVersion = schema.GroupVersion{Group: "kudo.dev", Version: "v1alpha1"}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's try to unify this in another PR... I'll prepare one. Btw. how do you convert the groupversion to the string?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree, let's unify in a separate PR

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 we'll have to look that up. hopefully it's something we can just set as a var and not have as a function call everywhere.

},
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%v-%v-%v", instance.Name, planName, time.Now().Nanosecond()),
Namespace: instance.GetNamespace(),
Expand All @@ -500,5 +489,6 @@ func newPlanExecution(instance *kudov1alpha1.Instance, planName string, scheme *
Template: instance.Spec,
},
}
return planExecution

return &planExecution
}
56 changes: 36 additions & 20 deletions pkg/controller/planexecution/planexecution_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import (
"github.com/kudobuilder/kudo/pkg/util/health"
appsv1 "k8s.io/api/apps/v1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -76,7 +75,7 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error {
// Define a mapping from the object in the event to one or more objects to
// Reconcile. Specifically this calls for a reconciliation of any owned
// objects.
mapFn := handler.ToRequestsFunc(
mapToOwningInstanceActivePlan := handler.ToRequestsFunc(
func(a handler.MapObject) []reconcile.Request {
owners := a.Meta.GetOwnerReferences()
requests := make([]reconcile.Request, 0)
Expand Down Expand Up @@ -134,7 +133,7 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error {
err = c.Watch(
&source.Kind{Type: &appsv1.StatefulSet{}},
&handler.EnqueueRequestsFromMapFunc{
ToRequests: mapFn,
ToRequests: mapToOwningInstanceActivePlan,
},
p)
if err != nil {
Expand All @@ -143,7 +142,7 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error {
err = c.Watch(
&source.Kind{Type: &appsv1.Deployment{}},
&handler.EnqueueRequestsFromMapFunc{
ToRequests: mapFn,
ToRequests: mapToOwningInstanceActivePlan,
},
p)
if err != nil {
Expand All @@ -152,17 +151,42 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error {
err = c.Watch(
&source.Kind{Type: &batchv1.Job{}},
&handler.EnqueueRequestsFromMapFunc{
ToRequests: mapFn,
ToRequests: mapToOwningInstanceActivePlan,
},
p)
if err != nil {
return err
}

// for instances we're interested in updates of instances owned by some planexecution (instance was created as part of PE)
// but also root instances of an operator that might have been updated with new activeplan
err = c.Watch(
&source.Kind{Type: &kudov1alpha1.Instance{}},
&handler.EnqueueRequestsFromMapFunc{
ToRequests: mapFn,
ToRequests: handler.ToRequestsFunc(
Copy link
Contributor

@zen-dog zen-dog Aug 27, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's meta...

func(a handler.MapObject) []reconcile.Request {
requests := mapToOwningInstanceActivePlan(a)
if len(requests) == 0 {
inst := &kudov1alpha1.Instance{}
err = mgr.GetClient().Get(context.TODO(), client.ObjectKey{
Name: a.Meta.GetName(),
Namespace: a.Meta.GetNamespace(),
}, inst)

if err != nil {
// for every updated/added instance also trigger reconcile for its active plan
requests = append(requests, reconcile.Request{
NamespacedName: types.NamespacedName{
Name: inst.Status.ActivePlan.Name,
Namespace: inst.Status.ActivePlan.Namespace,
},
})
} else {
log.Printf("PlanExecutionController: received event from Instance %s/%s but instance of that name does not exist", a.Meta.GetNamespace(), a.Meta.GetName())
}
}
return requests
}),
},
p)
if err != nil {
Expand Down Expand Up @@ -228,6 +252,11 @@ func (r *ReconcilePlanExecution) Reconcile(request reconcile.Request) (reconcile
return reconcile.Result{}, err
}

if instance.Status.ActivePlan.Name != planExecution.Name || instance.Status.ActivePlan.Namespace != planExecution.Namespace {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is needed because otherwise the plan runs until the end but then cannot update instance because that one was modified in between

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in general I feel like this safeguard is correct. If we're reconciling PE that is not current active plan, we probably should not execute anything

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't this be executed right after the PE is created, but before the instance controller calls addActivePlanReference?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm good point, I guess it could. To overcome even that case I guess adding reconcile trigger after linking activeplan could be a solution? But I still think this safeguard is correct, because we should really execute only active plan, not every plan that changed for example...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed

Copy link
Contributor Author

@alenkacz alenkacz Aug 26, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh wait, yeah, that should be fine, I think the PE controller should also watch for instance update event and trigger reconciliation as it does for deployments etc. Would you agree? Looking at the implementation right now I am not 100% that is happening and that it was intentional but if we agree on that I can push in that direction

EDIT: Oh yeah, right now it watches only for instances that were created as part of that plan not all of them... I would probably watch and reconcile for all updates. I mean controller-runtime will deal with de-duplication and the reconcile should be idempotent anyway

// this can happen for newly created PlanExecution where ActivePlan was not yet set to point to this instance
return reconcile.Result{}, fmt.Errorf("instance %s does not have ActivePlan pointing to PlanExecution %s, %s. Instead %s, %s. Retrying", instance.Name, planExecution.Name, planExecution.Namespace, instance.Status.ActivePlan.Name, instance.Status.ActivePlan.Namespace)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're already watching instance updates, so there is no need to re-trigger this reconciliation (it might create a loop)

}

// Check for Suspend set.
if planExecution.Spec.Suspend != nil && *planExecution.Spec.Suspend {
planExecution.Status.State = kudov1alpha1.PhaseStateSuspend
Expand All @@ -245,20 +274,6 @@ func (r *ReconcilePlanExecution) Reconcile(request reconcile.Request) (reconcile
// Before returning from this function, update the status
defer r.Update(context.Background(), planExecution)

// Need to add ownerReference as the Instance.
instance.Status.ActivePlan = corev1.ObjectReference{
Name: planExecution.Name,
Kind: planExecution.Kind,
Namespace: planExecution.Namespace,
APIVersion: planExecution.APIVersion,
UID: planExecution.UID,
}
err = r.Update(context.TODO(), instance)
if err != nil {
r.recorder.Event(planExecution, "Warning", "UpdateError", fmt.Sprintf("Could not update the ActivePlan for (%v): %v", planExecution.Spec.Instance.Name, err))
log.Printf("PlanExecutionController: Upate of instance with ActivePlan errored: %v", err)
}

// Get associated OperatorVersion
operatorVersion := &kudov1alpha1.OperatorVersion{}
err = r.Get(context.TODO(),
Expand Down Expand Up @@ -430,6 +445,7 @@ func (r *ReconcilePlanExecution) Reconcile(request reconcile.Request) (reconcile
err = r.Client.Update(context.TODO(), instance)
if err != nil {
log.Printf("Error updating instance status to %v: %v\n", instance.Status.Status, err)
return reconcile.Result{}, err
}

return reconcile.Result{}, nil
Expand Down