Skip to content

Commit

Permalink
Bug 1740332: OLM should resume operator install
Browse files Browse the repository at this point in the history
OLM should automatically resume operator install when a user grants
proper permission(s).

Currently, a user has to manually delete the subscription and recreate
it in order to trigger a reinstall of the operator.

Do the following to trigger reinstall:
- Add a new field 'AttenuatedServiceAccountRef' to status of
  InstallPlan. We use this to refer to the ServiceAccount that will be
  used to do attenuated scoped install of the operator.
- Watch on Role(Binding), ServiceAccount resources. When these RBAC
  resources are added/updated find the target InstallPlan object.
- Update the status.phase of the InstallPlan object to Installing.
  This will trigger a sync of the InstallPlan.

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1740332
Jira: https://jira.coreos.com/browse/OLM-1244
  • Loading branch information
tkashem committed Aug 26, 2019
1 parent 772aaa0 commit ff19541
Show file tree
Hide file tree
Showing 9 changed files with 228 additions and 28 deletions.
9 changes: 5 additions & 4 deletions pkg/api/apis/operators/installplan_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,11 @@ var ErrInvalidInstallPlan = errors.New("the InstallPlan contains invalid data")
//
// Status may trail the actual state of a system.
type InstallPlanStatus struct {
Phase InstallPlanPhase
Conditions []InstallPlanCondition
CatalogSources []string
Plan []*Step
Phase InstallPlanPhase
Conditions []InstallPlanCondition
CatalogSources []string
Plan []*Step
AttenuatedServiceAccountRef *corev1.ObjectReference
}

// InstallPlanCondition represents the overall status of the execution of
Expand Down
21 changes: 12 additions & 9 deletions pkg/api/apis/operators/v1alpha1/installplan_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ type InstallPlanStatus struct {
Conditions []InstallPlanCondition `json:"conditions,omitempty"`
CatalogSources []string `json:"catalogSources"`
Plan []*Step `json:"plan,omitempty"`

// AttenuatedServiceAccountRef references the service account that is used
// to do scoped operator install.
AttenuatedServiceAccountRef *corev1.ObjectReference `json:"attenuatedServiceAccountRef,omitempty"`
}

// InstallPlanCondition represents the overall status of the execution of
Expand Down Expand Up @@ -131,23 +135,22 @@ func (s *InstallPlanStatus) SetCondition(cond InstallPlanCondition) InstallPlanC
return cond
}


func ConditionFailed(cond InstallPlanConditionType, reason InstallPlanConditionReason, message string, now *metav1.Time) InstallPlanCondition {
return InstallPlanCondition{
Type: cond,
Status: corev1.ConditionFalse,
Reason: reason,
Message: message,
LastUpdateTime: *now,
Type: cond,
Status: corev1.ConditionFalse,
Reason: reason,
Message: message,
LastUpdateTime: *now,
LastTransitionTime: *now,
}
}

func ConditionMet(cond InstallPlanConditionType, now *metav1.Time) InstallPlanCondition {
return InstallPlanCondition{
Type: cond,
Status: corev1.ConditionTrue,
LastUpdateTime: *now,
Type: cond,
Status: corev1.ConditionTrue,
LastUpdateTime: *now,
LastTransitionTime: *now,
}
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/api/apis/operators/v1alpha1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pkg/api/apis/operators/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pkg/api/apis/operators/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

100 changes: 100 additions & 0 deletions pkg/controller/operators/catalog/installplan_sync.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package catalog

import (
"errors"

"github.com/sirupsen/logrus"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
utilerrors "k8s.io/apimachinery/pkg/util/errors"

"github.com/operator-framework/operator-lifecycle-manager/pkg/api/apis/operators/v1alpha1"
"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/ownerutil"
)

// When a user adds permission to a ServiceAccount by creating or updating
// Role/RoleBinding then we expect the InstallPlan that refers to the
// ServiceAccount to be retried if it has failed to install before due to
// permission issue(s).
func (o *Operator) triggerInstallPlanRetry(obj interface{}) (syncError error) {
metaObj, ok := obj.(metav1.Object)
if !ok {
syncError = errors.New("casting to metav1 object failed")
o.logger.Warn(syncError.Error())
return
}

related, _ := isObjectRBACRelated(obj)
if !related {
return
}

ips, err := o.lister.OperatorsV1alpha1().InstallPlanLister().InstallPlans(metaObj.GetNamespace()).List(labels.Everything())
if err != nil {
syncError = err
return
}

isTarget := func(ip *v1alpha1.InstallPlan) bool {
// Only an InstallPlan that has failed to install before and only if it
// has a reference to a ServiceAccount then
return ip.Status.Phase == v1alpha1.InstallPlanPhaseFailed && ip.Status.AttenuatedServiceAccountRef != nil
}

update := func(ip *v1alpha1.InstallPlan) error {
out := ip.DeepCopy()
out.Status.Phase = v1alpha1.InstallPlanPhaseInstalling
_, err := o.client.OperatorsV1alpha1().InstallPlans(ip.GetNamespace()).UpdateStatus(out)

return err
}

var errs []error
for _, ip := range ips {
if !isTarget(ip) {
continue
}

logger := o.logger.WithFields(logrus.Fields{
"ip": ip.GetName(),
"namespace": ip.GetNamespace(),
"phase": ip.Status.Phase,
})

if updateErr := update(ip); updateErr != nil {
errs = append(errs, updateErr)
logger.WithError(updateErr).Warn("failed to kick off InstallPlan retry")
continue
}

logger.Info("InstallPlan status set to 'Installing' for retry")
}

syncError = utilerrors.NewAggregate(errs)
return
}

func isObjectRBACRelated(obj interface{}) (related bool, object runtime.Object) {
object, ok := obj.(runtime.Object)
if !ok {
return
}

if err := ownerutil.InferGroupVersionKind(object); err != nil {
return
}

kind := object.GetObjectKind().GroupVersionKind().Kind
switch kind {
case roleKind:
fallthrough
case roleBindingKind:
fallthrough
case serviceAccountKind:
related = true
}

return
}
27 changes: 24 additions & 3 deletions pkg/controller/operators/catalog/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ func (o *Operator) syncObject(obj interface{}) (syncError error) {

o.requeueOwners(metaObj)

return
return o.triggerInstallPlanRetry(obj)
}

func (o *Operator) handleDeletion(obj interface{}) {
Expand Down Expand Up @@ -1027,6 +1027,28 @@ func (o *Operator) syncInstallPlans(obj interface{}) (syncError error) {
return
}

querier := o.serviceAccountQuerier.NamespaceQuerier(plan.GetNamespace())
reference, err := querier()
if err != nil {
syncError = fmt.Errorf("attenuated service account query failed - %v", err)
return
}

if reference != nil {
out := plan.DeepCopy()
out.Status.AttenuatedServiceAccountRef = reference

if !reflect.DeepEqual(plan, out) {
if _, updateErr := o.client.OperatorsV1alpha1().InstallPlans(out.GetNamespace()).UpdateStatus(out); err != nil {
syncError = fmt.Errorf("failed to attach attenuated ServiceAccount to status - %v", updateErr)
return
}

logger.WithField("attenuated-sa", reference.Name).Info("successfully attached attenuated ServiceAccount to status")
return
}
}

outInstallPlan, syncError := transitionInstallPlanState(logger.Logger, o, *plan, o.now())

if syncError != nil {
Expand Down Expand Up @@ -1190,8 +1212,7 @@ func (o *Operator) ExecutePlan(plan *v1alpha1.InstallPlan) error {
// Does the namespace have an operator group that specifies a user defined
// service account? If so, then we should use a scoped client for plan
// execution.
getter := o.serviceAccountQuerier.NamespaceQuerier(namespace)
kubeclient, crclient, err := o.clientAttenuator.AttenuateClient(getter)
kubeclient, crclient, err := o.clientAttenuator.AttenuateClientWithServiceAccount(plan.Status.AttenuatedServiceAccountRef)
if err != nil {
o.logger.Errorf("failed to get a client for plan execution- %v", err)
return err
Expand Down
33 changes: 21 additions & 12 deletions pkg/lib/scoped/attenuator.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,22 +50,12 @@ type ClientAttenuator struct {
logger *logrus.Logger
}

// AttenuateClient returns appropriately scoped client(s) to the caller.
// AttenuateClientWithServiceAccount returns appropriately scoped client(s) to the caller.
//
// client(s) that are bound to OLM cluster-admin role are returned if the querier
// returns no error and reference to a service account is nil.
// Otherwise an attempt is made to return attenuated client instance(s).
func (s *ClientAttenuator) AttenuateClient(querier ServiceAccountQuerierFunc) (kubeclient operatorclient.ClientInterface, crclient versioned.Interface, err error) {
if querier == nil {
err = errQuerierNotSpecified
return
}

reference, err := querier()
if err != nil {
return
}

func (s *ClientAttenuator) AttenuateClientWithServiceAccount(reference *corev1.ObjectReference) (kubeclient operatorclient.ClientInterface, crclient versioned.Interface, err error) {
if reference == nil {
// No service account/token has been provided. Return the default client(s).
kubeclient = s.kubeclient
Expand All @@ -92,6 +82,25 @@ func (s *ClientAttenuator) AttenuateClient(querier ServiceAccountQuerierFunc) (k
return
}

// AttenuateClient returns appropriately scoped client(s) to the caller.
//
// client(s) that are bound to OLM cluster-admin role are returned if the querier
// returns no error and reference to a service account is nil.
// Otherwise an attempt is made to return attenuated client instance(s).
func (s *ClientAttenuator) AttenuateClient(querier ServiceAccountQuerierFunc) (kubeclient operatorclient.ClientInterface, crclient versioned.Interface, err error) {
if querier == nil {
err = errQuerierNotSpecified
return
}

reference, err := querier()
if err != nil {
return
}

return s.AttenuateClientWithServiceAccount(reference)
}

// AttenuateOperatorClient returns a scoped operator client instance based on the
// service account returned by the querier specified.
func (s *ClientAttenuator) AttenuateOperatorClient(querier ServiceAccountQuerierFunc) (kubeclient operatorclient.ClientInterface, err error) {
Expand Down
54 changes: 54 additions & 0 deletions test/e2e/user_defined_sa_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,60 @@ func TestUserDefinedServiceAccountWithPermission(t *testing.T) {
}
}

func TestUserDefinedServiceAccountWithRetry(t *testing.T) {
defer cleaner.NotifyTestComplete(t, false)

kubeclient := newKubeClient(t)
crclient := newCRClient(t)

namespace := genName("scoped-ns-")
_, cleanupNS := newNamespace(t, kubeclient, namespace)
defer cleanupNS()

// Create a service account, but add no permission to it.
saName := genName("scoped-sa-")
_, cleanupSA := newServiceAccount(t, kubeclient, namespace, saName)
defer cleanupSA()

// Add an OperatorGroup and specify the service account.
ogName := genName("scoped-og-")
_, cleanupOG := newOperatorGroupWithServiceAccount(t, crclient, namespace, ogName, saName)
defer cleanupOG()

permissions := deploymentPermissions(t)
catsrc, subSpec, catsrcCleanup := newCatalogSource(t, kubeclient, crclient, "scoped", namespace, permissions)
defer catsrcCleanup()

// Ensure that the catalog source is resolved before we create a subscription.
_, err := fetchCatalogSource(t, crclient, catsrc.GetName(), namespace, catalogSourceRegistryPodSynced)
require.NoError(t, err)

subscriptionName := genName("scoped-sub-")
cleanupSubscription := createSubscriptionForCatalog(t, crclient, namespace, subscriptionName, catsrc.GetName(), subSpec.Package, subSpec.Channel, subSpec.StartingCSV, subSpec.InstallPlanApproval)
defer cleanupSubscription()

// Wait until an install plan is created.
subscription, err := fetchSubscription(t, crclient, namespace, subscriptionName, subscriptionHasInstallPlanChecker)
require.NoError(t, err)
require.NotNil(t, subscription)

// We expect the InstallPlan to be in status: Failed.
ipNameOld := subscription.Status.Install.Name
ipPhaseCheckerFunc := buildInstallPlanPhaseCheckFunc(v1alpha1.InstallPlanPhaseFailed)
ipGotOld, err := fetchInstallPlanWithNamespace(t, crclient, ipNameOld, namespace, ipPhaseCheckerFunc)
require.NoError(t, err)
require.Equal(t, v1alpha1.InstallPlanPhaseFailed, ipGotOld.Status.Phase)

// Grant permission now and this should trigger an retry of InstallPlan.
cleanupPerm := grantPermission(t, kubeclient, namespace, saName)
defer cleanupPerm()

ipPhaseCheckerFunc = buildInstallPlanPhaseCheckFunc(v1alpha1.InstallPlanPhaseComplete)
ipGotNew, err := fetchInstallPlanWithNamespace(t, crclient, ipNameOld, namespace, ipPhaseCheckerFunc)
require.NoError(t, err)
require.Equal(t, v1alpha1.InstallPlanPhaseComplete, ipGotNew.Status.Phase)
}

func newNamespace(t *testing.T, client operatorclient.ClientInterface, name string) (ns *corev1.Namespace, cleanup cleanupFunc) {
request := &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Expand Down

0 comments on commit ff19541

Please sign in to comment.