Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve CatalogSource controller error handling #34

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions pkg/apis/core/v1beta1/catalogsource_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,11 @@ import (
const (
TypeReady = "Ready"

ReasonContentsAvailable = "ContentsAvailable"
ReasonUnpackError = "UnpackError"
ReasonContentsAvailable = "ContentsAvailable"
ReasonJobUnpackError = "JobUnpackError"
ReasonParseUnpackLogsError = "ParseUnpackLogsError"
ReasonBuildPackagesError = "BuildPackagesError"
ReasonBuildMetadataError = "BuildMetadataError"
rashmigottipati marked this conversation as resolved.
Show resolved Hide resolved
)

// +genclient
Expand Down Expand Up @@ -157,3 +160,22 @@ func IsUnpackPhaseError(err error) bool {
_, ok := err.(*UnpackPhaseError)
return ok
}

type UnpackPodNotPresentError struct {
message string
}

func NewUnpackPodNotPresentError(message string) *UnpackPodNotPresentError {
return &UnpackPodNotPresentError{
message: message,
}
}

func (u *UnpackPodNotPresentError) Error() string {
return u.message
}

func IsUnpackPodNotPresentError(err error) bool {
_, ok := err.(*UnpackPodNotPresentError)
return ok
}
rashmigottipati marked this conversation as resolved.
Show resolved Hide resolved
15 changes: 15 additions & 0 deletions pkg/apis/core/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

69 changes: 50 additions & 19 deletions pkg/controllers/core/catalogsource_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,9 @@ import (
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
ctrlutil "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/predicate"

corev1beta1 "github.com/operator-framework/catalogd/pkg/apis/core/v1beta1"
)
Expand Down Expand Up @@ -78,7 +76,7 @@ func (r *CatalogSourceReconciler) Reconcile(ctx context.Context, req ctrl.Reques
if err != nil {
if errors.IsNotFound(err) {
if err = r.createUnpackJob(ctx, catalogSource); err != nil {
updateStatusError(&catalogSource, err)
updateUnpackJobError(&catalogSource, err)
if err := r.Client.Status().Update(ctx, &catalogSource); err != nil {
return ctrl.Result{}, fmt.Errorf("updating catalogsource status: %v", err)
}
Expand All @@ -87,7 +85,7 @@ func (r *CatalogSourceReconciler) Reconcile(ctx context.Context, req ctrl.Reques
// after creating the job requeue
return ctrl.Result{Requeue: true}, nil
}
updateStatusError(&catalogSource, err)
updateUnpackJobError(&catalogSource, err)
if err := r.Client.Status().Update(ctx, &catalogSource); err != nil {
return ctrl.Result{}, fmt.Errorf("updating catalogsource status: %v", err)
}
Expand All @@ -96,11 +94,16 @@ func (r *CatalogSourceReconciler) Reconcile(ctx context.Context, req ctrl.Reques

declCfg, err := r.parseUnpackLogs(ctx, job)
if err != nil {
// check if the error is due to pod not being present and requeue if it is
if corev1beta1.IsUnpackPodNotPresentError(err) {
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
rashmigottipati marked this conversation as resolved.
Show resolved Hide resolved
}
rashmigottipati marked this conversation as resolved.
Show resolved Hide resolved

// check if this is a pod phase error and requeue if it is
if corev1beta1.IsUnpackPhaseError(err) {
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
}
updateStatusError(&catalogSource, err)
updateParseLogsError(&catalogSource, err)
if err := r.Client.Status().Update(ctx, &catalogSource); err != nil {
return ctrl.Result{}, fmt.Errorf("updating catalogsource status: %v", err)
}
Expand All @@ -109,15 +112,15 @@ func (r *CatalogSourceReconciler) Reconcile(ctx context.Context, req ctrl.Reques

// TODO: Can we create these resources in parallel using goroutines?
if err := r.buildPackages(ctx, declCfg, catalogSource); err != nil {
updateStatusError(&catalogSource, err)
updateBuildPackagesError(&catalogSource, err)
if err := r.Client.Status().Update(ctx, &catalogSource); err != nil {
return ctrl.Result{}, fmt.Errorf("updating catalogsource status: %v", err)
}
return ctrl.Result{}, err
}

if err := r.buildBundleMetadata(ctx, declCfg, catalogSource); err != nil {
updateStatusError(&catalogSource, err)
updateBuildMetadataError(&catalogSource, err)
if err := r.Client.Status().Update(ctx, &catalogSource); err != nil {
return ctrl.Result{}, fmt.Errorf("updating catalogsource status: %v", err)
}
Expand All @@ -142,26 +145,46 @@ func updateStatusReady(catalogSource *corev1beta1.CatalogSource) {
})
}

func updateStatusError(catalogSource *corev1beta1.CatalogSource, err error) {
func updateUnpackJobError(catalogSource *corev1beta1.CatalogSource, err error) {
meta.SetStatusCondition(&catalogSource.Status.Conditions, metav1.Condition{
Type: corev1beta1.TypeReady,
Status: metav1.ConditionFalse,
Reason: corev1beta1.ReasonJobUnpackError,
Message: "catalog contents have not been unpacked correctly and so are unavailable on the cluster",
})
}

func updateParseLogsError(catalogSource *corev1beta1.CatalogSource, err error) {
meta.SetStatusCondition(&catalogSource.Status.Conditions, metav1.Condition{
Type: corev1beta1.TypeReady,
Status: metav1.ConditionFalse,
Reason: corev1beta1.ReasonUnpackError,
Message: err.Error(),
Reason: corev1beta1.ReasonParseUnpackLogsError,
Message: "catalog contents could not be read and transformed into a File-Based Catalog format and so are unavailable on the cluster",
})
}

func updateBuildPackagesError(catalogSource *corev1beta1.CatalogSource, err error) {
meta.SetStatusCondition(&catalogSource.Status.Conditions, metav1.Condition{
Type: corev1beta1.TypeReady,
Status: metav1.ConditionFalse,
Reason: corev1beta1.ReasonBuildPackagesError,
Message: "unable to create Package CRs from catalog contents",
})
}

func updateBuildMetadataError(catalogSource *corev1beta1.CatalogSource, err error) {
meta.SetStatusCondition(&catalogSource.Status.Conditions, metav1.Condition{
Type: corev1beta1.TypeReady,
Status: metav1.ConditionFalse,
Reason: corev1beta1.ReasonBuildMetadataError,
Message: "unable to create BundleMetadata CRs from catalog contents",
rashmigottipati marked this conversation as resolved.
Show resolved Hide resolved
})
}

// SetupWithManager sets up the controller with the Manager.
func (r *CatalogSourceReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
// TODO: Due to us not having proper error handling,
// not having this results in the controller getting into
// an error state because once we update the status it requeues
// and then errors out when trying to create all the Packages again
// even though they already exist. This should be resolved by the fix
// for https://github.com/operator-framework/catalogd/issues/6. The fix for
// #6 should also remove the usage of `builder.WithPredicates(predicate.GenerationChangedPredicate{})`
For(&corev1beta1.CatalogSource{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})).
For(&corev1beta1.CatalogSource{}).
Complete(r)
}

Expand Down Expand Up @@ -202,6 +225,10 @@ func (r *CatalogSourceReconciler) buildBundleMetadata(ctx context.Context, declC
ctrlutil.SetOwnerReference(&catalogSource, &bundleMeta, r.Scheme)

if err := r.Client.Create(ctx, &bundleMeta); err != nil {
// If BundleMetadata CR already exists, continue
if errors.IsNotFound(err) {
return nil
}
rashmigottipati marked this conversation as resolved.
Show resolved Hide resolved
Comment on lines +228 to +231
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I am reading this right, I think this should be:

Suggested change
// If BundleMetadata CR already exists, continue
if errors.IsNotFound(err) {
return nil
}
// If BundleMetadata CR already exists, continue
if !errors.IsAlreadyExists(err) {
return err
}

return fmt.Errorf("creating bundlemetadata %q: %w", bundleMeta.Name, err)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something that was discussed in https://github.com/operator-framework/catalogd/pull/30/files#r1164309453 is instead of returning an error when first encountered, we should aggregate the errors and return all the errors that occurred. This would mean the first occurrence of an error doesn't completely stop the unpacking process.

That being said, I'm not sure how our plans to switch to a different mechanism would impact this.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I don't think I'd worry too much about this kind of thing right now unless it impacts
operator-framework/operator-controller#161

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it would impact operator-framework/operator-controller#161 any more than the current process does. If information fails to get unpacked we set the ready condition to false regardless to signal that you shouldn't attempt to get information from that catalog.

}
}
Expand Down Expand Up @@ -249,6 +276,10 @@ func (r *CatalogSourceReconciler) buildPackages(ctx context.Context, declCfg *de
ctrlutil.SetOwnerReference(&catalogSource, &pack, r.Scheme)

if err := r.Client.Create(ctx, &pack); err != nil {
// If Create fails due to Package CR already being present, continue
if errors.IsNotFound(err) {
return nil
}
Comment on lines +279 to +282
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto to wrong error check, I think it should be:

Suggested change
// If Create fails due to Package CR already being present, continue
if errors.IsNotFound(err) {
return nil
}
// If Create fails due to Package CR already being present, continue
if !errors.IsAlreadyExists(err) {
return err
}

return fmt.Errorf("creating package %q: %w", pack.Name, err)
rashmigottipati marked this conversation as resolved.
Show resolved Hide resolved
}
}
Expand Down Expand Up @@ -280,7 +311,7 @@ func (r *CatalogSourceReconciler) parseUnpackLogs(ctx context.Context, job *batc
}

if len(podsForJob.Items) <= 0 {
return nil, fmt.Errorf("no pods for job")
return nil, corev1beta1.NewUnpackPodNotPresentError(fmt.Sprintf("no pods found for job %q", job.GetName()))
rashmigottipati marked this conversation as resolved.
Show resolved Hide resolved
}
pod := podsForJob.Items[0]

Expand Down