-
Notifications
You must be signed in to change notification settings - Fork 102
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix race with ActivePlan between Instance and PlanExecution controllers #722
Changes from 18 commits
5718c4e
e70cf09
55853c2
3cb71c7
c6d6c70
219bf80
31216b3
7e83752
b93f869
d801821
3a4ce80
0185fe0
d171bf7
58f4ae8
004b3cd
96efe48
e209df7
063ca40
7e9c866
57645d4
905e61f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -101,7 +101,7 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error { | |||
instance.Status.ActivePlan.Name == "" { | ||||
|
||||
log.Printf("InstanceController: Creating a deploy execution plan for the instance %v", instance.Name) | ||||
err = createPlanOld(mgr, "deploy", &instance) | ||||
err = createPlan(mgr.GetClient(), mgr.GetEventRecorderFor("instance-controller"), mgr.GetScheme(), "deploy", &instance) | ||||
if err != nil { | ||||
log.Printf("InstanceController: Error creating \"%v\" object for \"deploy\": %v", instance.Name, err) | ||||
} | ||||
|
@@ -264,7 +264,7 @@ func instanceEventFilter(mgr manager.Manager) predicate.Funcs { | |||
} | ||||
} | ||||
|
||||
if err = createPlanOld(mgr, planName, new); err != nil { | ||||
if err = createPlan(mgr.GetClient(), mgr.GetEventRecorderFor("instance-controller"), mgr.GetScheme(), planName, new); err != nil { | ||||
log.Printf("InstanceController: Error creating PlanExecution \"%v\" for instance \"%v\": %v", planName, new.Name, err) | ||||
} | ||||
} | ||||
|
@@ -305,57 +305,42 @@ func instanceEventFilter(mgr manager.Manager) predicate.Funcs { | |||
} | ||||
} | ||||
|
||||
func createPlan(r *ReconcileInstance, planName string, instance *kudov1alpha1.Instance) error { | ||||
func createPlan(c client.Client, r record.EventRecorder, scheme *runtime.Scheme, planName string, instance *kudov1alpha1.Instance) error { | ||||
ctx := context.TODO() | ||||
|
||||
planExecution := newPlanExecution(instance, planName, r.scheme) | ||||
return createPlanExecution(ctx, instance, planExecution, r, planName) | ||||
} | ||||
|
||||
// createPlanExecution takes all the k8s objects needed to create the actual plan | ||||
func createPlanExecution(ctx context.Context, instance *kudov1alpha1.Instance, plan kudov1alpha1.PlanExecution, r *ReconcileInstance, planName string) error { | ||||
log.Printf("Creating PlanExecution of plan %s for instance %s", planName, instance.Name) | ||||
r.recorder.Event(instance, "Normal", "CreatePlanExecution", fmt.Sprintf("Creating \"%v\" plan execution", planName)) | ||||
planExecution := newPlanExecution(instance, planName, scheme) | ||||
log.Printf("Creating PlanExecution of planExecution %s for instance %s", planName, instance.Name) | ||||
r.Event(instance, "Normal", "CreatePlanExecution", fmt.Sprintf("Creating \"%v\" planExecution execution", planName)) | ||||
|
||||
// Make this instance the owner of the PlanExecution | ||||
if err := controllerutil.SetControllerReference(instance, &plan, r.scheme); err != nil { | ||||
if err := controllerutil.SetControllerReference(instance, planExecution, scheme); err != nil { | ||||
log.Printf("InstanceController: Error setting ControllerReference") | ||||
return err | ||||
} | ||||
if err := r.Create(ctx, &plan); err != nil { | ||||
log.Printf("InstanceController: Error creating planexecution \"%v\": %v", plan.Name, err) | ||||
r.recorder.Event(instance, "Warning", "CreatePlanExecution", fmt.Sprintf("Error creating planexecution \"%v\": %v", plan.Name, err)) | ||||
if err := c.Create(ctx, planExecution); err != nil { | ||||
log.Printf("InstanceController: Error creating planexecution \"%v\": %v", planExecution.Name, err) | ||||
r.Event(instance, "Warning", "CreatePlanExecution", fmt.Sprintf("Error creating planexecution \"%v\": %v", planExecution.Name, err)) | ||||
return err | ||||
} | ||||
log.Printf("Created PlanExecution of plan %s for instance %s", planName, instance.Name) | ||||
r.recorder.Event(instance, "Normal", "PlanCreated", fmt.Sprintf("PlanExecution \"%v\" created", plan.Name)) | ||||
return nil | ||||
} | ||||
log.Printf("Created PlanExecution of planExecution %s for instance %s", planName, instance.Name) | ||||
r.Event(instance, "Normal", "PlanCreated", fmt.Sprintf("PlanExecution \"%v\" created", planExecution.Name)) | ||||
|
||||
//todo: REMOVE this... we do not want to create a plan outside of reconcile | ||||
// most likely... any use of this function is wrong... all plans should be created in reconcile | ||||
func createPlanOld(mgr manager.Manager, planName string, instance *kudov1alpha1.Instance) error { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it was real pain maintaining two, while the only difference was how they accepted the dependencies which was pretty easy to unite |
||||
ctx := context.TODO() | ||||
recorder := mgr.GetEventRecorderFor("instance-controller") | ||||
log.Printf("Creating PlanExecution of plan %s for instance %s", planName, instance.Name) | ||||
recorder.Event(instance, "Normal", "CreatePlanExecution", fmt.Sprintf("Creating \"%v\" plan execution", planName)) | ||||
|
||||
planExecution := newPlanExecution(instance, planName, mgr.GetScheme()) | ||||
return addActivePlanReference(c, r, planExecution, instance) | ||||
} | ||||
|
||||
// Make this instance the owner of the PlanExecution | ||||
if err := controllerutil.SetControllerReference(instance, &planExecution, mgr.GetScheme()); err != nil { | ||||
log.Printf("InstanceController: Error setting ControllerReference") | ||||
return err | ||||
func addActivePlanReference(c client.Client, r record.EventRecorder, planExecution *kudov1alpha1.PlanExecution, instance *kudov1alpha1.Instance) error { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. main change |
||||
instance.Status.ActivePlan = corev1.ObjectReference{ | ||||
Name: planExecution.Name, | ||||
Kind: "PlanExecution", | ||||
Namespace: planExecution.Namespace, | ||||
APIVersion: "kudo.dev/v1alpha1", | ||||
UID: planExecution.UID, | ||||
} | ||||
|
||||
if err := mgr.GetClient().Create(ctx, &planExecution); err != nil { | ||||
log.Printf("InstanceController: Error creating planexecution \"%v\": %v", planExecution.Name, err) | ||||
recorder.Event(instance, "Warning", "CreatePlanExecution", fmt.Sprintf("Error creating planexecution \"%v\": %v", planExecution.Name, err)) | ||||
return err | ||||
err := c.Update(context.TODO(), instance) | ||||
if err != nil { | ||||
r.Event(instance, "Warning", "UpdateError", fmt.Sprintf("Could not update the ActivePlan for (%v): %v", planExecution.Spec.Instance.Name, err)) | ||||
} | ||||
log.Printf("Created PlanExecution of plan %s for instance %s", planName, instance.Name) | ||||
recorder.Event(instance, "Normal", "PlanCreated", fmt.Sprintf("PlanExecution \"%v\" created", planExecution.Name)) | ||||
return nil | ||||
return err | ||||
} | ||||
|
||||
var _ reconcile.Reconciler = &ReconcileInstance{} | ||||
|
@@ -392,7 +377,7 @@ func (r *ReconcileInstance) Reconcile(request reconcile.Request) (reconcile.Resu | |||
|
||||
// if this is new create and create and assign a planexecution and return | ||||
if isNewInstance(instance) { | ||||
err = createPlan(r, "deploy", instance) | ||||
err = createPlan(r.Client, r.recorder, r.scheme, "deploy", instance) | ||||
// err or not we return. If err == nil the controller is done, else requeue | ||||
return reconcile.Result{}, err | ||||
} | ||||
|
@@ -477,14 +462,18 @@ func parameterDifference(old, new map[string]string) map[string]string { | |||
} | ||||
|
||||
// newPlanExecution creates a PlanExecution based on the kind and instance for a given planName | ||||
func newPlanExecution(instance *kudov1alpha1.Instance, planName string, scheme *runtime.Scheme) kudov1alpha1.PlanExecution { | ||||
func newPlanExecution(instance *kudov1alpha1.Instance, planName string, scheme *runtime.Scheme) *kudov1alpha1.PlanExecution { | ||||
gvk, _ := apiutil.GVKForObject(instance, scheme) | ||||
ref := corev1.ObjectReference{ | ||||
Kind: gvk.Kind, | ||||
Name: instance.Name, | ||||
Namespace: instance.Namespace, | ||||
} | ||||
planExecution := kudov1alpha1.PlanExecution{ | ||||
TypeMeta: metav1.TypeMeta{ | ||||
Kind: "PlanExecution", | ||||
APIVersion: "kudo.dev/v1alpha1", | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have this dotted in a lot of places, so I looked for some "standardization" point that already exists in our codebase generated by kubebuilder, maybe we start moving to something more like this? Or at least turn it into a constant we can reference elsewhere. Maybe this is the constant? kudo/pkg/apis/kudo/v1alpha1/register.go Line 33 in 4def9dc
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's try to unify this in another PR... I'll prepare one. Btw. how do you convert the groupversion to the string? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree, let's unify in a separate PR There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 we'll have to look that up. hopefully it's something we can just set as a var and not have as a function call everywhere. |
||||
}, | ||||
ObjectMeta: metav1.ObjectMeta{ | ||||
Name: fmt.Sprintf("%v-%v-%v", instance.Name, planName, time.Now().Nanosecond()), | ||||
Namespace: instance.GetNamespace(), | ||||
|
@@ -500,5 +489,6 @@ func newPlanExecution(instance *kudov1alpha1.Instance, planName string, scheme * | |||
Template: instance.Spec, | ||||
}, | ||||
} | ||||
return planExecution | ||||
|
||||
return &planExecution | ||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,7 +33,6 @@ import ( | |
"github.com/kudobuilder/kudo/pkg/util/health" | ||
appsv1 "k8s.io/api/apps/v1" | ||
batchv1 "k8s.io/api/batch/v1" | ||
corev1 "k8s.io/api/core/v1" | ||
"k8s.io/apimachinery/pkg/api/errors" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
|
@@ -76,7 +75,7 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error { | |
// Define a mapping from the object in the event to one or more objects to | ||
// Reconcile. Specifically this calls for a reconciliation of any owned | ||
// objects. | ||
mapFn := handler.ToRequestsFunc( | ||
mapToOwningInstanceActivePlan := handler.ToRequestsFunc( | ||
func(a handler.MapObject) []reconcile.Request { | ||
owners := a.Meta.GetOwnerReferences() | ||
requests := make([]reconcile.Request, 0) | ||
|
@@ -134,7 +133,7 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error { | |
err = c.Watch( | ||
&source.Kind{Type: &appsv1.StatefulSet{}}, | ||
&handler.EnqueueRequestsFromMapFunc{ | ||
ToRequests: mapFn, | ||
ToRequests: mapToOwningInstanceActivePlan, | ||
}, | ||
p) | ||
if err != nil { | ||
|
@@ -143,7 +142,7 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error { | |
err = c.Watch( | ||
&source.Kind{Type: &appsv1.Deployment{}}, | ||
&handler.EnqueueRequestsFromMapFunc{ | ||
ToRequests: mapFn, | ||
ToRequests: mapToOwningInstanceActivePlan, | ||
}, | ||
p) | ||
if err != nil { | ||
|
@@ -152,17 +151,42 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error { | |
err = c.Watch( | ||
&source.Kind{Type: &batchv1.Job{}}, | ||
&handler.EnqueueRequestsFromMapFunc{ | ||
ToRequests: mapFn, | ||
ToRequests: mapToOwningInstanceActivePlan, | ||
}, | ||
p) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
// for instances we're interested in updates of instances owned by some planexecution (instance was created as part of PE) | ||
// but also root instances of an operator that might have been updated with new activeplan | ||
err = c.Watch( | ||
&source.Kind{Type: &kudov1alpha1.Instance{}}, | ||
&handler.EnqueueRequestsFromMapFunc{ | ||
ToRequests: mapFn, | ||
ToRequests: handler.ToRequestsFunc( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's meta... |
||
func(a handler.MapObject) []reconcile.Request { | ||
requests := mapToOwningInstanceActivePlan(a) | ||
if len(requests) == 0 { | ||
inst := &kudov1alpha1.Instance{} | ||
err = mgr.GetClient().Get(context.TODO(), client.ObjectKey{ | ||
Name: a.Meta.GetName(), | ||
Namespace: a.Meta.GetNamespace(), | ||
}, inst) | ||
|
||
if err != nil { | ||
// for every updated/added instance also trigger reconcile for its active plan | ||
requests = append(requests, reconcile.Request{ | ||
NamespacedName: types.NamespacedName{ | ||
Name: inst.Status.ActivePlan.Name, | ||
Namespace: inst.Status.ActivePlan.Namespace, | ||
}, | ||
}) | ||
} else { | ||
log.Printf("PlanExecutionController: received event from Instance %s/%s but instance of that name does not exist", a.Meta.GetNamespace(), a.Meta.GetName()) | ||
} | ||
} | ||
return requests | ||
}), | ||
}, | ||
p) | ||
if err != nil { | ||
|
@@ -228,6 +252,11 @@ func (r *ReconcilePlanExecution) Reconcile(request reconcile.Request) (reconcile | |
return reconcile.Result{}, err | ||
} | ||
|
||
if instance.Status.ActivePlan.Name != planExecution.Name || instance.Status.ActivePlan.Namespace != planExecution.Namespace { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is needed because otherwise the plan runs until the end but then cannot update instance because that one was modified in between There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in general I feel like this safeguard is correct. If we're reconciling PE that is not current active plan, we probably should not execute anything There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Couldn't this be executed right after the PE is created, but before the instance controller calls There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm good point, I guess it could. To overcome even that case I guess adding reconcile trigger after linking activeplan could be a solution? But I still think this safeguard is correct, because we should really execute only active plan, not every plan that changed for example... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh wait, yeah, that should be fine, I think the PE controller should also watch for instance update event and trigger reconciliation as it does for deployments etc. Would you agree? Looking at the implementation right now I am not 100% that is happening and that it was intentional but if we agree on that I can push in that direction EDIT: Oh yeah, right now it watches only for instances that were created as part of that plan not all of them... I would probably watch and reconcile for all updates. I mean controller-runtime will deal with de-duplication and the reconcile should be idempotent anyway |
||
// this can happen for newly created PlanExecution where ActivePlan was not yet set to point to this instance | ||
return reconcile.Result{}, fmt.Errorf("instance %s does not have ActivePlan pointing to PlanExecution %s, %s. Instead %s, %s. Retrying", instance.Name, planExecution.Name, planExecution.Namespace, instance.Status.ActivePlan.Name, instance.Status.ActivePlan.Namespace) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're already watching instance updates, so there is no need to re-trigger this reconciliation (it might create a loop) |
||
} | ||
|
||
// Check for Suspend set. | ||
if planExecution.Spec.Suspend != nil && *planExecution.Spec.Suspend { | ||
planExecution.Status.State = kudov1alpha1.PhaseStateSuspend | ||
|
@@ -245,20 +274,6 @@ func (r *ReconcilePlanExecution) Reconcile(request reconcile.Request) (reconcile | |
// Before returning from this function, update the status | ||
defer r.Update(context.Background(), planExecution) | ||
|
||
// Need to add ownerReference as the Instance. | ||
instance.Status.ActivePlan = corev1.ObjectReference{ | ||
Name: planExecution.Name, | ||
Kind: planExecution.Kind, | ||
Namespace: planExecution.Namespace, | ||
APIVersion: planExecution.APIVersion, | ||
UID: planExecution.UID, | ||
} | ||
err = r.Update(context.TODO(), instance) | ||
if err != nil { | ||
r.recorder.Event(planExecution, "Warning", "UpdateError", fmt.Sprintf("Could not update the ActivePlan for (%v): %v", planExecution.Spec.Instance.Name, err)) | ||
log.Printf("PlanExecutionController: Upate of instance with ActivePlan errored: %v", err) | ||
} | ||
|
||
// Get associated OperatorVersion | ||
operatorVersion := &kudov1alpha1.OperatorVersion{} | ||
err = r.Get(context.TODO(), | ||
|
@@ -430,6 +445,7 @@ func (r *ReconcilePlanExecution) Reconcile(request reconcile.Request) (reconcile | |
err = r.Client.Update(context.TODO(), instance) | ||
if err != nil { | ||
log.Printf("Error updating instance status to %v: %v\n", instance.Status.Status, err) | ||
return reconcile.Result{}, err | ||
} | ||
|
||
return reconcile.Result{}, nil | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This method should be called
createPlanAndUpdateInstance
or evencreatePlanAndUpdateInstanceActivePlanReerence
:P