Skip to content

Commit

Permalink
OCPBUGS-31522: Warn and allow CRD upgrade if validation fails but new…
Browse files Browse the repository at this point in the history
… CRD specifies a conversion strategy (#3209)

* warn but allow CRD upgrade when validations fail but new CRD has conversion strategy

Signed-off-by: everettraven <everettraven@gmail.com>

* unexport validationError

Signed-off-by: everettraven <everettraven@gmail.com>

* updating warning message to be more descriptive, add warning event on installplan

Signed-off-by: everettraven <everettraven@gmail.com>

* update warning templ per review comments

Signed-off-by: everettraven <everettraven@gmail.com>

---------

Signed-off-by: everettraven <everettraven@gmail.com>
  • Loading branch information
everettraven authored May 2, 2024
1 parent 9b28021 commit c3e1774
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 23 deletions.
14 changes: 7 additions & 7 deletions pkg/controller/operators/catalog/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1585,9 +1585,7 @@ func (o *Operator) ensureSubscriptionCSVState(logger *logrus.Entry, sub *v1alpha
}

func (o *Operator) setIPReference(subs []*v1alpha1.Subscription, gen int, installPlanRef *corev1.ObjectReference) []*v1alpha1.Subscription {
var (
lastUpdated = o.now()
)
lastUpdated := o.now()
for _, sub := range subs {
sub.Status.LastUpdated = lastUpdated
if installPlanRef != nil {
Expand Down Expand Up @@ -2204,6 +2202,10 @@ func validateV1Beta1CRDCompatibility(dynamicClient dynamic.Interface, oldCRD *ap
return validateExistingCRs(dynamicClient, gr, validationsMap)
}

type validationError struct {
error
}

// validateExistingCRs lists all CRs for each version entry in validationsMap, then validates each using the paired validation.
func validateExistingCRs(dynamicClient dynamic.Interface, gr schema.GroupResource, validationsMap map[string]*apiextensions.CustomResourceValidation) error {
for version, schemaValidation := range validationsMap {
Expand All @@ -2212,7 +2214,6 @@ func validateExistingCRs(dynamicClient dynamic.Interface, gr schema.GroupResourc
if err != nil {
return fmt.Errorf("error creating validator for schema version %s: %s", version, err)
}

gvr := schema.GroupVersionResource{Group: gr.Group, Version: version, Resource: gr.Resource}
crList, err := dynamicClient.Resource(gvr).List(context.TODO(), metav1.ListOptions{})
if err != nil {
Expand All @@ -2229,7 +2230,7 @@ func validateExistingCRs(dynamicClient dynamic.Interface, gr schema.GroupResourc
} else {
namespacedName = fmt.Sprintf("%s/%s", cr.GetNamespace(), cr.GetName())
}
return fmt.Errorf("error validating %s %q: updated validation is too restrictive: %v", cr.GroupVersionKind(), namespacedName, err)
return validationError{fmt.Errorf("error validating %s %q: updated validation is too restrictive: %v", cr.GroupVersionKind(), namespacedName, err)}
}
}
}
Expand Down Expand Up @@ -2321,7 +2322,7 @@ func (o *Operator) ExecutePlan(plan *v1alpha1.InstallPlan) error {
o.logger.Errorf("failed to get a client for plan execution- %v", err)
return err
}
b := newBuilder(plan, o.lister.OperatorsV1alpha1().ClusterServiceVersionLister(), builderKubeClient, builderDynamicClient, r, o.logger)
b := newBuilder(plan, o.lister.OperatorsV1alpha1().ClusterServiceVersionLister(), builderKubeClient, builderDynamicClient, r, o.logger, o.recorder)

for i, step := range plan.Status.Plan {
if err := func(i int, step *v1alpha1.Step) error {
Expand Down Expand Up @@ -2782,7 +2783,6 @@ func (o *Operator) ExecutePlan(plan *v1alpha1.InstallPlan) error {
func (o *Operator) getExistingAPIOwners(namespace string) (map[string][]string, error) {
// Get a list of CSVs in the namespace
csvList, err := o.client.OperatorsV1alpha1().ClusterServiceVersions(namespace).List(context.TODO(), metav1.ListOptions{})

if err != nil {
return nil, err
}
Expand Down
28 changes: 17 additions & 11 deletions pkg/controller/operators/catalog/operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,10 @@ func TestSyncInstallPlanUnhappy(t *testing.T) {
testName: "HasSteps/NoOperatorGroup",
err: fmt.Errorf("attenuated service account query failed - no operator group found that is managing this namespace"),
expectedPhase: v1alpha1.InstallPlanPhaseInstalling,
expectedCondition: &v1alpha1.InstallPlanCondition{Type: v1alpha1.InstallPlanInstalled, Status: corev1.ConditionFalse, Reason: v1alpha1.InstallPlanReasonInstallCheckFailed,
Message: "no operator group found that is managing this namespace"},
expectedCondition: &v1alpha1.InstallPlanCondition{
Type: v1alpha1.InstallPlanInstalled, Status: corev1.ConditionFalse, Reason: v1alpha1.InstallPlanReasonInstallCheckFailed,
Message: "no operator group found that is managing this namespace",
},
in: ipWithSteps,
},
{
Expand All @@ -264,8 +266,10 @@ func TestSyncInstallPlanUnhappy(t *testing.T) {
err: fmt.Errorf("attenuated service account query failed - more than one operator group(s) are managing this namespace count=2"),
expectedPhase: v1alpha1.InstallPlanPhaseInstalling,
in: ipWithSteps,
expectedCondition: &v1alpha1.InstallPlanCondition{Type: v1alpha1.InstallPlanInstalled, Status: corev1.ConditionFalse, Reason: v1alpha1.InstallPlanReasonInstallCheckFailed,
Message: "more than one operator group(s) are managing this namespace count=2"},
expectedCondition: &v1alpha1.InstallPlanCondition{
Type: v1alpha1.InstallPlanInstalled, Status: corev1.ConditionFalse, Reason: v1alpha1.InstallPlanReasonInstallCheckFailed,
Message: "more than one operator group(s) are managing this namespace count=2",
},
clientObjs: []runtime.Object{
operatorGroup("og1", "sa", namespace,
&corev1.ObjectReference{
Expand All @@ -286,8 +290,10 @@ func TestSyncInstallPlanUnhappy(t *testing.T) {
testName: "HasSteps/NonExistentServiceAccount",
err: fmt.Errorf("attenuated service account query failed - please make sure the service account exists. sa=sa1 operatorgroup=ns/og"),
expectedPhase: v1alpha1.InstallPlanPhaseInstalling,
expectedCondition: &v1alpha1.InstallPlanCondition{Type: v1alpha1.InstallPlanInstalled, Status: corev1.ConditionFalse, Reason: v1alpha1.InstallPlanReasonInstallCheckFailed,
Message: "please make sure the service account exists. sa=sa1 operatorgroup=ns/og"},
expectedCondition: &v1alpha1.InstallPlanCondition{
Type: v1alpha1.InstallPlanInstalled, Status: corev1.ConditionFalse, Reason: v1alpha1.InstallPlanReasonInstallCheckFailed,
Message: "please make sure the service account exists. sa=sa1 operatorgroup=ns/og",
},
in: ipWithSteps,
clientObjs: []runtime.Object{
operatorGroup("og", "sa1", namespace, nil),
Expand Down Expand Up @@ -1780,7 +1786,7 @@ func TestValidateV1Beta1CRDCompatibility(t *testing.T) {
},
oldCRD: unversionedCRDForV1beta1File("testdata/hivebug/crd.yaml"),
newCRD: unversionedCRDForV1beta1File("testdata/hivebug/crd.yaml"),
want: fmt.Errorf("error validating hive.openshift.io/v1, Kind=MachinePool \"test\": updated validation is too restrictive: [[].spec.clusterDeploymentRef: Invalid value: \"null\": spec.clusterDeploymentRef in body must be of type object: \"null\", [].spec.name: Required value, [].spec.platform: Required value]"),
want: validationError{fmt.Errorf("error validating hive.openshift.io/v1, Kind=MachinePool \"test\": updated validation is too restrictive: [[].spec.clusterDeploymentRef: Invalid value: \"null\": spec.clusterDeploymentRef in body must be of type object: \"null\", [].spec.name: Required value, [].spec.platform: Required value]")},
},
{
name: "backwards incompatible change",
Expand All @@ -1794,7 +1800,7 @@ func TestValidateV1Beta1CRDCompatibility(t *testing.T) {
},
oldCRD: unversionedCRDForV1beta1File("testdata/apiextensionsv1beta1/crd.old.yaml"),
newCRD: unversionedCRDForV1beta1File("testdata/apiextensionsv1beta1/crd.yaml"),
want: fmt.Errorf("error validating cluster.com/v1alpha1, Kind=testcrd \"my-cr-1\": updated validation is too restrictive: [].spec.scalar: Invalid value: 2: spec.scalar in body should be greater than or equal to 3"),
want: validationError{fmt.Errorf("error validating cluster.com/v1alpha1, Kind=testcrd \"my-cr-1\": updated validation is too restrictive: [].spec.scalar: Invalid value: 2: spec.scalar in body should be greater than or equal to 3")},
},
{
name: "unserved version",
Expand Down Expand Up @@ -1827,7 +1833,7 @@ func TestValidateV1Beta1CRDCompatibility(t *testing.T) {
},
oldCRD: unversionedCRDForV1beta1File("testdata/apiextensionsv1beta1/crd.no-versions-list.old.yaml"),
newCRD: unversionedCRDForV1beta1File("testdata/apiextensionsv1beta1/crd.no-versions-list.yaml"),
want: fmt.Errorf("error validating cluster.com/v1alpha1, Kind=testcrd \"my-cr-1\": updated validation is too restrictive: [].spec.scalar: Invalid value: 2: spec.scalar in body should be greater than or equal to 3"),
want: validationError{fmt.Errorf("error validating cluster.com/v1alpha1, Kind=testcrd \"my-cr-1\": updated validation is too restrictive: [].spec.scalar: Invalid value: 2: spec.scalar in body should be greater than or equal to 3")},
},
}
for _, tt := range tests {
Expand Down Expand Up @@ -1882,7 +1888,7 @@ func TestValidateV1CRDCompatibility(t *testing.T) {
},
oldCRD: unversionedCRDForV1File("testdata/apiextensionsv1/crontabs.crd.old.yaml"),
newCRD: unversionedCRDForV1File("testdata/apiextensionsv1/crontabs.crd.yaml"),
want: fmt.Errorf("error validating stable.example.com/v2, Kind=CronTab \"my-crontab\": updated validation is too restrictive: [].spec.replicas: Invalid value: 10: spec.replicas in body should be less than or equal to 9"),
want: validationError{fmt.Errorf("error validating stable.example.com/v2, Kind=CronTab \"my-crontab\": updated validation is too restrictive: [].spec.replicas: Invalid value: 10: spec.replicas in body should be less than or equal to 9")},
},
{
name: "cr not invalidated by unserved version",
Expand All @@ -1909,7 +1915,7 @@ func TestValidateV1CRDCompatibility(t *testing.T) {
},
oldCRD: unversionedCRDForV1File("testdata/apiextensionsv1/single-version-crd.old.yaml"),
newCRD: unversionedCRDForV1File("testdata/apiextensionsv1/single-version-crd.yaml"),
want: fmt.Errorf("error validating cluster.com/v1alpha1, Kind=testcrd \"my-cr-1\": updated validation is too restrictive: [].spec.scalar: Invalid value: 100: spec.scalar in body should be less than or equal to 50"),
want: validationError{fmt.Errorf("error validating cluster.com/v1alpha1, Kind=testcrd \"my-cr-1\": updated validation is too restrictive: [].spec.scalar: Invalid value: 100: spec.scalar in body should be less than or equal to 50")},
},
}
for _, tt := range tests {
Expand Down
31 changes: 26 additions & 5 deletions pkg/controller/operators/catalog/step.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@ import (
"github.com/operator-framework/operator-lifecycle-manager/pkg/controller/install"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
apiextensionsv1beta1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1"
apiextensionsv1client "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1"
apiextensionsv1beta1client "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1beta1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/retry"

"github.com/operator-framework/api/pkg/operators/v1alpha1"
Expand Down Expand Up @@ -44,18 +46,20 @@ type builder struct {
dynamicClient dynamic.Interface
manifestResolver ManifestResolver
logger logrus.FieldLogger
eventRecorder record.EventRecorder

annotator alongside.Annotator
}

func newBuilder(plan *v1alpha1.InstallPlan, csvLister listersv1alpha1.ClusterServiceVersionLister, opclient operatorclient.ClientInterface, dynamicClient dynamic.Interface, manifestResolver ManifestResolver, logger logrus.FieldLogger) *builder {
func newBuilder(plan *v1alpha1.InstallPlan, csvLister listersv1alpha1.ClusterServiceVersionLister, opclient operatorclient.ClientInterface, dynamicClient dynamic.Interface, manifestResolver ManifestResolver, logger logrus.FieldLogger, er record.EventRecorder) *builder {
return &builder{
plan: plan,
csvLister: csvLister,
opclient: opclient,
dynamicClient: dynamicClient,
manifestResolver: manifestResolver,
logger: logger,
eventRecorder: er,
}
}

Expand Down Expand Up @@ -140,7 +144,26 @@ func (b *builder) NewCRDV1Step(client apiextensionsv1client.ApiextensionsV1Inter
currentCRD, _ := client.CustomResourceDefinitions().Get(context.TODO(), crd.GetName(), metav1.GetOptions{})
crd.SetResourceVersion(currentCRD.GetResourceVersion())
if err = validateV1CRDCompatibility(b.dynamicClient, currentCRD, crd); err != nil {
return fmt.Errorf("error validating existing CRs against new CRD's schema for %q: %w", step.Resource.Name, err)
vErr := &validationError{}
// if the conversion strategy in the new CRD is not "Webhook" OR the error is not a ValidationError
// return an error. This will catch and return any errors that occur unrelated to actual validation.
// For example, the API server returning an error when performing a list operation
if crd.Spec.Conversion == nil || crd.Spec.Conversion.Strategy != apiextensionsv1.WebhookConverter || !errors.As(err, vErr) {
return fmt.Errorf("error validating existing CRs against new CRD's schema for %q: %w", step.Resource.Name, err)
}
// If the conversion strategy in the new CRD is "Webhook" and the error that occurred
// is an error related to validation, warn that validation failed but that we are trusting
// that the conversion strategy specified by the author will successfully convert to a format
// that passes validation and allow the upgrade to continue
warnTempl := `Validation of existing CRs against the new CRD's schema failed, but a webhook conversion strategy was specified in the new CRD.
The new webhook will only start after the bundle is upgraded, so we must assume that it will successfully convert existing CRs to a format that would have passed validation.
CRD: %q
Validation Error: %s
`
warnString := fmt.Sprintf(warnTempl, step.Resource.Name, err.Error())
b.logger.Warn(warnString)
b.eventRecorder.Event(b.plan, corev1.EventTypeWarning, "CRDValidation", warnString)
}

// check to see if stored versions changed and whether the upgrade could cause potential data loss
Expand Down Expand Up @@ -267,9 +290,7 @@ func (b *builder) NewCRDV1Beta1Step(client apiextensionsv1beta1client.Apiextensi
}

func setInstalledAlongsideAnnotation(a alongside.Annotator, dst metav1.Object, namespace string, name string, lister listersv1alpha1.ClusterServiceVersionLister, srcs ...metav1.Object) {
var (
nns []alongside.NamespacedName
)
var nns []alongside.NamespacedName

// Only keep references to existing and non-copied CSVs to
// avoid unbounded growth.
Expand Down

0 comments on commit c3e1774

Please sign in to comment.