From f5f86b369dad3e99bec0229743f19b0185fc18cd Mon Sep 17 00:00:00 2001 From: Abu Kashem Date: Thu, 22 Aug 2019 14:38:15 -0400 Subject: [PATCH] Bug 1740332: OLM should resume operator install OLM should automatically resume operator install when a user grants proper permission(s). Currently, a user has to manually delete the subscription and recreate it in order to trigger a reinstall of the operator. Do the following to trigger reinstall: - Add a new field 'AttenuatedServiceAccountRef' to status of InstallPlan. We use this to refer to the ServiceAccount that will be used to do attenuated scoped install of the operator. - Watch on Role(Binding), ServiceAccount resources. When these RBAC resources are added/updated find the target InstallPlan object. - Update the status.phase of the InstallPlan object to Installing. This will trigger a sync of the InstallPlan. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1740332 Jira: https://jira.coreos.com/browse/OLM-1244 --- pkg/api/apis/operators/installplan_types.go | 9 +- .../operators/v1alpha1/installplan_types.go | 21 ++-- .../v1alpha1/zz_generated.conversion.go | 2 + .../v1alpha1/zz_generated.deepcopy.go | 5 + .../apis/operators/zz_generated.deepcopy.go | 5 + .../operators/catalog/installplan_sync.go | 100 ++++++++++++++++++ pkg/controller/operators/catalog/operator.go | 29 ++++- pkg/lib/scoped/attenuator.go | 33 +++--- test/e2e/user_defined_sa_test.go | 54 ++++++++++ 9 files changed, 230 insertions(+), 28 deletions(-) create mode 100644 pkg/controller/operators/catalog/installplan_sync.go diff --git a/pkg/api/apis/operators/installplan_types.go b/pkg/api/apis/operators/installplan_types.go index 045f48d3717..447fd90a0f9 100644 --- a/pkg/api/apis/operators/installplan_types.go +++ b/pkg/api/apis/operators/installplan_types.go @@ -78,10 +78,11 @@ var ErrInvalidInstallPlan = errors.New("the InstallPlan contains invalid data") // // Status may trail the actual state of a system. type InstallPlanStatus struct { - Phase InstallPlanPhase - Conditions []InstallPlanCondition - CatalogSources []string - Plan []*Step + Phase InstallPlanPhase + Conditions []InstallPlanCondition + CatalogSources []string + Plan []*Step + AttenuatedServiceAccountRef *corev1.ObjectReference } // InstallPlanCondition represents the overall status of the execution of diff --git a/pkg/api/apis/operators/v1alpha1/installplan_types.go b/pkg/api/apis/operators/v1alpha1/installplan_types.go index 8bbe7eb3d86..d7a0d516178 100644 --- a/pkg/api/apis/operators/v1alpha1/installplan_types.go +++ b/pkg/api/apis/operators/v1alpha1/installplan_types.go @@ -84,6 +84,10 @@ type InstallPlanStatus struct { Conditions []InstallPlanCondition `json:"conditions,omitempty"` CatalogSources []string `json:"catalogSources"` Plan []*Step `json:"plan,omitempty"` + + // AttenuatedServiceAccountRef references the service account that is used + // to do scoped operator install. + AttenuatedServiceAccountRef *corev1.ObjectReference `json:"attenuatedServiceAccountRef,omitempty"` } // InstallPlanCondition represents the overall status of the execution of @@ -131,23 +135,22 @@ func (s *InstallPlanStatus) SetCondition(cond InstallPlanCondition) InstallPlanC return cond } - func ConditionFailed(cond InstallPlanConditionType, reason InstallPlanConditionReason, message string, now *metav1.Time) InstallPlanCondition { return InstallPlanCondition{ - Type: cond, - Status: corev1.ConditionFalse, - Reason: reason, - Message: message, - LastUpdateTime: *now, + Type: cond, + Status: corev1.ConditionFalse, + Reason: reason, + Message: message, + LastUpdateTime: *now, LastTransitionTime: *now, } } func ConditionMet(cond InstallPlanConditionType, now *metav1.Time) InstallPlanCondition { return InstallPlanCondition{ - Type: cond, - Status: corev1.ConditionTrue, - LastUpdateTime: *now, + Type: cond, + Status: corev1.ConditionTrue, + LastUpdateTime: *now, LastTransitionTime: *now, } } diff --git a/pkg/api/apis/operators/v1alpha1/zz_generated.conversion.go b/pkg/api/apis/operators/v1alpha1/zz_generated.conversion.go index 180d6df5fcd..6302d9b5d8c 100644 --- a/pkg/api/apis/operators/v1alpha1/zz_generated.conversion.go +++ b/pkg/api/apis/operators/v1alpha1/zz_generated.conversion.go @@ -1239,6 +1239,7 @@ func autoConvert_v1alpha1_InstallPlanStatus_To_operators_InstallPlanStatus(in *I out.Conditions = *(*[]operators.InstallPlanCondition)(unsafe.Pointer(&in.Conditions)) out.CatalogSources = *(*[]string)(unsafe.Pointer(&in.CatalogSources)) out.Plan = *(*[]*operators.Step)(unsafe.Pointer(&in.Plan)) + out.AttenuatedServiceAccountRef = (*corev1.ObjectReference)(unsafe.Pointer(in.AttenuatedServiceAccountRef)) return nil } @@ -1252,6 +1253,7 @@ func autoConvert_operators_InstallPlanStatus_To_v1alpha1_InstallPlanStatus(in *o out.Conditions = *(*[]InstallPlanCondition)(unsafe.Pointer(&in.Conditions)) out.CatalogSources = *(*[]string)(unsafe.Pointer(&in.CatalogSources)) out.Plan = *(*[]*Step)(unsafe.Pointer(&in.Plan)) + out.AttenuatedServiceAccountRef = (*corev1.ObjectReference)(unsafe.Pointer(in.AttenuatedServiceAccountRef)) return nil } diff --git a/pkg/api/apis/operators/v1alpha1/zz_generated.deepcopy.go b/pkg/api/apis/operators/v1alpha1/zz_generated.deepcopy.go index bb42b9be22d..345ea27431b 100644 --- a/pkg/api/apis/operators/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/api/apis/operators/v1alpha1/zz_generated.deepcopy.go @@ -777,6 +777,11 @@ func (in *InstallPlanStatus) DeepCopyInto(out *InstallPlanStatus) { } } } + if in.AttenuatedServiceAccountRef != nil { + in, out := &in.AttenuatedServiceAccountRef, &out.AttenuatedServiceAccountRef + *out = new(corev1.ObjectReference) + **out = **in + } return } diff --git a/pkg/api/apis/operators/zz_generated.deepcopy.go b/pkg/api/apis/operators/zz_generated.deepcopy.go index 13870d337d8..14ff8bf2b9c 100644 --- a/pkg/api/apis/operators/zz_generated.deepcopy.go +++ b/pkg/api/apis/operators/zz_generated.deepcopy.go @@ -777,6 +777,11 @@ func (in *InstallPlanStatus) DeepCopyInto(out *InstallPlanStatus) { } } } + if in.AttenuatedServiceAccountRef != nil { + in, out := &in.AttenuatedServiceAccountRef, &out.AttenuatedServiceAccountRef + *out = new(corev1.ObjectReference) + **out = **in + } return } diff --git a/pkg/controller/operators/catalog/installplan_sync.go b/pkg/controller/operators/catalog/installplan_sync.go new file mode 100644 index 00000000000..6a035f0b91a --- /dev/null +++ b/pkg/controller/operators/catalog/installplan_sync.go @@ -0,0 +1,100 @@ +package catalog + +import ( + "errors" + + "github.com/sirupsen/logrus" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + + "github.com/operator-framework/operator-lifecycle-manager/pkg/api/apis/operators/v1alpha1" + "github.com/operator-framework/operator-lifecycle-manager/pkg/lib/ownerutil" +) + +// When a user adds permission to a ServiceAccount by creating or updating +// Role/RoleBinding then we expect the InstallPlan that refers to the +// ServiceAccount to be retried if it has failed to install before due to +// permission issue(s). +func (o *Operator) triggerInstallPlanRetry(obj interface{}) (syncError error) { + metaObj, ok := obj.(metav1.Object) + if !ok { + syncError = errors.New("casting to metav1 object failed") + o.logger.Warn(syncError.Error()) + return + } + + related, _ := isObjectRBACRelated(obj) + if !related { + return + } + + ips, err := o.lister.OperatorsV1alpha1().InstallPlanLister().InstallPlans(metaObj.GetNamespace()).List(labels.Everything()) + if err != nil { + syncError = err + return + } + + isTarget := func(ip *v1alpha1.InstallPlan) bool { + // Only an InstallPlan that has failed to install before and only if it + // has a reference to a ServiceAccount then + return ip.Status.Phase == v1alpha1.InstallPlanPhaseFailed && ip.Status.AttenuatedServiceAccountRef != nil + } + + update := func(ip *v1alpha1.InstallPlan) error { + out := ip.DeepCopy() + out.Status.Phase = v1alpha1.InstallPlanPhaseInstalling + _, err := o.client.OperatorsV1alpha1().InstallPlans(ip.GetNamespace()).UpdateStatus(out) + + return err + } + + var errs []error + for _, ip := range ips { + if !isTarget(ip) { + continue + } + + logger := o.logger.WithFields(logrus.Fields{ + "ip": ip.GetName(), + "namespace": ip.GetNamespace(), + "phase": ip.Status.Phase, + }) + + if updateErr := update(ip); updateErr != nil { + errs = append(errs, updateErr) + logger.Warnf("failed to kick off InstallPlan retry - %v", updateErr) + continue + } + + logger.Info("InstallPlan status set to 'Installing' for retry") + } + + syncError = utilerrors.NewAggregate(errs) + return +} + +func isObjectRBACRelated(obj interface{}) (related bool, object runtime.Object) { + object, ok := obj.(runtime.Object) + if !ok { + return + } + + if err := ownerutil.InferGroupVersionKind(object); err != nil { + return + } + + kind := object.GetObjectKind().GroupVersionKind().Kind + switch kind { + case roleKind: + fallthrough + case roleBindingKind: + fallthrough + case serviceAccountKind: + related = true + } + + return +} diff --git a/pkg/controller/operators/catalog/operator.go b/pkg/controller/operators/catalog/operator.go index 47075e354f2..59e30b27271 100644 --- a/pkg/controller/operators/catalog/operator.go +++ b/pkg/controller/operators/catalog/operator.go @@ -388,7 +388,7 @@ func (o *Operator) syncObject(obj interface{}) (syncError error) { o.requeueOwners(metaObj) - return + return o.triggerInstallPlanRetry(obj) } func (o *Operator) handleDeletion(obj interface{}) { @@ -1027,6 +1027,28 @@ func (o *Operator) syncInstallPlans(obj interface{}) (syncError error) { return } + querier := o.serviceAccountQuerier.NamespaceQuerier(plan.GetNamespace()) + reference, err := querier() + if err != nil { + syncError = fmt.Errorf("attenuated service account query failed - %v", err) + return + } + + if reference != nil { + out := plan.DeepCopy() + out.Status.AttenuatedServiceAccountRef = reference + + if !reflect.DeepEqual(plan, out) { + if _, updateErr := o.client.OperatorsV1alpha1().InstallPlans(out.GetNamespace()).UpdateStatus(out); err != nil { + syncError = fmt.Errorf("failed to attach attenuated ServiceAccount to status - %v", updateErr) + return + } + + logger.Infof("successfully attached attenuated ServiceAccount to status - %s", reference.Name) + return + } + } + outInstallPlan, syncError := transitionInstallPlanState(logger.Logger, o, *plan, o.now()) if syncError != nil { @@ -1075,6 +1097,8 @@ var _ installPlanTransitioner = &Operator{} func transitionInstallPlanState(log *logrus.Logger, transitioner installPlanTransitioner, in v1alpha1.InstallPlan, now metav1.Time) (*v1alpha1.InstallPlan, error) { out := in.DeepCopy() + + switch in.Status.Phase { case v1alpha1.InstallPlanPhaseRequiresApproval: if out.Spec.Approved { @@ -1190,8 +1214,7 @@ func (o *Operator) ExecutePlan(plan *v1alpha1.InstallPlan) error { // Does the namespace have an operator group that specifies a user defined // service account? If so, then we should use a scoped client for plan // execution. - getter := o.serviceAccountQuerier.NamespaceQuerier(namespace) - kubeclient, crclient, err := o.clientAttenuator.AttenuateClient(getter) + kubeclient, crclient, err := o.clientAttenuator.AttenuateClientWithServiceAccount(plan.Status.AttenuatedServiceAccountRef) if err != nil { o.logger.Errorf("failed to get a client for plan execution- %v", err) return err diff --git a/pkg/lib/scoped/attenuator.go b/pkg/lib/scoped/attenuator.go index 2224b66d900..fe4ea19e8d6 100644 --- a/pkg/lib/scoped/attenuator.go +++ b/pkg/lib/scoped/attenuator.go @@ -50,22 +50,12 @@ type ClientAttenuator struct { logger *logrus.Logger } -// AttenuateClient returns appropriately scoped client(s) to the caller. +// AttenuateClientWithReference returns appropriately scoped client(s) to the caller. // // client(s) that are bound to OLM cluster-admin role are returned if the querier // returns no error and reference to a service account is nil. // Otherwise an attempt is made to return attenuated client instance(s). -func (s *ClientAttenuator) AttenuateClient(querier ServiceAccountQuerierFunc) (kubeclient operatorclient.ClientInterface, crclient versioned.Interface, err error) { - if querier == nil { - err = errQuerierNotSpecified - return - } - - reference, err := querier() - if err != nil { - return - } - +func (s *ClientAttenuator) AttenuateClientWithServiceAccount(reference *corev1.ObjectReference) (kubeclient operatorclient.ClientInterface, crclient versioned.Interface, err error) { if reference == nil { // No service account/token has been provided. Return the default client(s). kubeclient = s.kubeclient @@ -92,6 +82,25 @@ func (s *ClientAttenuator) AttenuateClient(querier ServiceAccountQuerierFunc) (k return } +// AttenuateClient returns appropriately scoped client(s) to the caller. +// +// client(s) that are bound to OLM cluster-admin role are returned if the querier +// returns no error and reference to a service account is nil. +// Otherwise an attempt is made to return attenuated client instance(s). +func (s *ClientAttenuator) AttenuateClient(querier ServiceAccountQuerierFunc) (kubeclient operatorclient.ClientInterface, crclient versioned.Interface, err error) { + if querier == nil { + err = errQuerierNotSpecified + return + } + + reference, err := querier() + if err != nil { + return + } + + return s.AttenuateClientWithServiceAccount(reference) +} + // AttenuateOperatorClient returns a scoped operator client instance based on the // service account returned by the querier specified. func (s *ClientAttenuator) AttenuateOperatorClient(querier ServiceAccountQuerierFunc) (kubeclient operatorclient.ClientInterface, err error) { diff --git a/test/e2e/user_defined_sa_test.go b/test/e2e/user_defined_sa_test.go index 5a4251b6fd4..4fd8c297b62 100644 --- a/test/e2e/user_defined_sa_test.go +++ b/test/e2e/user_defined_sa_test.go @@ -131,6 +131,60 @@ func TestUserDefinedServiceAccountWithPermission(t *testing.T) { } } +func TestUserDefinedServiceAccountWithRetry(t *testing.T) { + defer cleaner.NotifyTestComplete(t, false) + + kubeclient := newKubeClient(t) + crclient := newCRClient(t) + + namespace := genName("scoped-ns-") + _, cleanupNS := newNamespace(t, kubeclient, namespace) + defer cleanupNS() + + // Create a service account, but add no permission to it. + saName := genName("scoped-sa-") + _, cleanupSA := newServiceAccount(t, kubeclient, namespace, saName) + defer cleanupSA() + + // Add an OperatorGroup and specify the service account. + ogName := genName("scoped-og-") + _, cleanupOG := newOperatorGroupWithServiceAccount(t, crclient, namespace, ogName, saName) + defer cleanupOG() + + permissions := deploymentPermissions(t) + catsrc, subSpec, catsrcCleanup := newCatalogSource(t, kubeclient, crclient, "scoped", namespace, permissions) + defer catsrcCleanup() + + // Ensure that the catalog source is resolved before we create a subscription. + _, err := fetchCatalogSource(t, crclient, catsrc.GetName(), namespace, catalogSourceRegistryPodSynced) + require.NoError(t, err) + + subscriptionName := genName("scoped-sub-") + cleanupSubscription := createSubscriptionForCatalog(t, crclient, namespace, subscriptionName, catsrc.GetName(), subSpec.Package, subSpec.Channel, subSpec.StartingCSV, subSpec.InstallPlanApproval) + defer cleanupSubscription() + + // Wait until an install plan is created. + subscription, err := fetchSubscription(t, crclient, namespace, subscriptionName, subscriptionHasInstallPlanChecker) + require.NoError(t, err) + require.NotNil(t, subscription) + + // We expect the InstallPlan to be in status: Failed. + ipNameOld := subscription.Status.Install.Name + ipPhaseCheckerFunc := buildInstallPlanPhaseCheckFunc(v1alpha1.InstallPlanPhaseFailed) + ipGotOld, err := fetchInstallPlanWithNamespace(t, crclient, ipNameOld, namespace, ipPhaseCheckerFunc) + require.NoError(t, err) + require.Equal(t, v1alpha1.InstallPlanPhaseFailed, ipGotOld.Status.Phase) + + // Grant permission now and this should trigger an retry of InstallPlan. + cleanupPerm := grantPermission(t, kubeclient, namespace, saName) + defer cleanupPerm() + + ipPhaseCheckerFunc = buildInstallPlanPhaseCheckFunc(v1alpha1.InstallPlanPhaseComplete) + ipGotNew, err := fetchInstallPlanWithNamespace(t, crclient, ipNameOld, namespace, ipPhaseCheckerFunc) + require.NoError(t, err) + require.Equal(t, v1alpha1.InstallPlanPhaseComplete, ipGotNew.Status.Phase) +} + func newNamespace(t *testing.T, client operatorclient.ClientInterface, name string) (ns *corev1.Namespace, cleanup cleanupFunc) { request := &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{