diff --git a/api/v1alpha1/nicclusterpolicy_types.go b/api/v1alpha1/nicclusterpolicy_types.go index 1cbd8d88..680ca792 100644 --- a/api/v1alpha1/nicclusterpolicy_types.go +++ b/api/v1alpha1/nicclusterpolicy_types.go @@ -291,6 +291,9 @@ type AppliedState struct { Name string `json:"name"` // +kubebuilder:validation:Enum={"ready", "notReady", "ignore", "error"} State State `json:"state"` + // Message is a human readable message indicating details about why + // the state is in this condition + Message string `json:"message,omitempty"` } // NicClusterPolicyStatus defines the observed state of NicClusterPolicy diff --git a/config/crd/bases/mellanox.com_hostdevicenetworks.yaml b/config/crd/bases/mellanox.com_hostdevicenetworks.yaml index a41863b2..e9cae8a5 100644 --- a/config/crd/bases/mellanox.com_hostdevicenetworks.yaml +++ b/config/crd/bases/mellanox.com_hostdevicenetworks.yaml @@ -65,6 +65,11 @@ spec: description: AppliedState defines a finer-grained view of the observed state of NicClusterPolicy properties: + message: + description: |- + Message is a human readable message indicating details about why + the state is in this condition + type: string name: type: string state: diff --git a/config/crd/bases/mellanox.com_nicclusterpolicies.yaml b/config/crd/bases/mellanox.com_nicclusterpolicies.yaml index 3e976db3..7f06387b 100644 --- a/config/crd/bases/mellanox.com_nicclusterpolicies.yaml +++ b/config/crd/bases/mellanox.com_nicclusterpolicies.yaml @@ -1238,6 +1238,11 @@ spec: description: AppliedState defines a finer-grained view of the observed state of NicClusterPolicy properties: + message: + description: |- + Message is a human readable message indicating details about why + the state is in this condition + type: string name: type: string state: diff --git a/controllers/nicclusterpolicy_controller.go b/controllers/nicclusterpolicy_controller.go index 1e5d380b..f11ceefa 100644 --- a/controllers/nicclusterpolicy_controller.go +++ b/controllers/nicclusterpolicy_controller.go @@ -298,6 +298,11 @@ NextResult: for i := range cr.Status.AppliedStates { if cr.Status.AppliedStates[i].Name == stateStatus.StateName { cr.Status.AppliedStates[i].State = mellanoxv1alpha1.State(stateStatus.Status) + if stateStatus.ErrInfo != nil { + cr.Status.AppliedStates[i].Message = stateStatus.ErrInfo.Error() + } else { + cr.Status.AppliedStates[i].Message = "" + } continue NextResult } } diff --git a/controllers/nicclusterpolicy_controller_test.go b/controllers/nicclusterpolicy_controller_test.go index 057be3fd..4f395c6b 100644 --- a/controllers/nicclusterpolicy_controller_test.go +++ b/controllers/nicclusterpolicy_controller_test.go @@ -138,6 +138,84 @@ var _ = Describe("NicClusterPolicyReconciler Controller", func() { Expect(err).NotTo(HaveOccurred()) }) }) + Context("When MOFED precompiled tag does not exists", func() { + It("should set error message in status", func() { + By("Create Node") + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node", + Labels: map[string]string{ + nodeinfo.NodeLabelMlnxNIC: "true", + nodeinfo.NodeLabelOSName: "ubuntu", + nodeinfo.NodeLabelCPUArch: "amd64", + nodeinfo.NodeLabelKernelVerFull: "generic-9.0.1", + nodeinfo.NodeLabelOSVer: "20.0.4"}, + Annotations: make(map[string]string), + }, + } + err := k8sClient.Create(context.TODO(), node) + Expect(err).NotTo(HaveOccurred()) + By("Create NicClusterPolicy with MOFED ForcePrecompiled") + cr := mellanoxv1alpha1.NicClusterPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "nic-cluster-policy", + Namespace: "", + }, + Spec: mellanoxv1alpha1.NicClusterPolicySpec{ + OFEDDriver: &mellanoxv1alpha1.OFEDDriverSpec{ + ForcePrecompiled: true, + ImageSpec: mellanoxv1alpha1.ImageSpec{ + Image: "mofed", + Repository: "acme.buzz", + Version: "5.9-0.5.6.0", + ImagePullSecrets: []string{}, + }, + }, + }, + } + + err = k8sClient.Create(context.TODO(), &cr) + Expect(err).NotTo(HaveOccurred()) + + ncp := &mellanoxv1alpha1.NicClusterPolicy{} + err = k8sClient.Get(context.TODO(), types.NamespacedName{Namespace: cr.GetNamespace(), Name: cr.GetName()}, ncp) + Expect(err).NotTo(HaveOccurred()) + + By("Wait for NicClusterPolicy OFED state error message to be populated") + msg := "failed to create k8s objects from manifest: " + + "failed to render objects: ForcePrecompiled is enabled " + + "and precompiled tag was not found: " + + "5.9-0.5.6.0-generic-9.0.1-ubuntu20.0.4-amd64" + + Eventually(func() string { + found := &mellanoxv1alpha1.NicClusterPolicy{} + err = k8sClient.Get(context.TODO(), types.NamespacedName{Namespace: cr.GetNamespace(), Name: cr.GetName()}, found) + Expect(err).NotTo(HaveOccurred()) + return getAppliedStateMessage(found.Status.AppliedStates, "state-OFED") + }, timeout*10, interval).Should(BeEquivalentTo(msg)) + + By("Set MOFED ForcePrecompiled to false") + patch := []byte(`{"spec": {"ofedDriver":{"forcePrecompiled": false}}}`) + Expect(k8sClient.Patch(context.TODO(), &cr, client.RawPatch(types.MergePatchType, patch))).To(Succeed()) + + By("Wait for NicClusterPolicy OFED state error message to be cleared") + msg = "" + Eventually(func() string { + found := &mellanoxv1alpha1.NicClusterPolicy{} + err = k8sClient.Get(context.TODO(), types.NamespacedName{Namespace: cr.GetNamespace(), Name: cr.GetName()}, found) + Expect(err).NotTo(HaveOccurred()) + return getAppliedStateMessage(found.Status.AppliedStates, "state-OFED") + }, timeout*10, interval).Should(BeEquivalentTo(msg)) + + By("Delete NicClusterPolicy") + err = k8sClient.Delete(context.TODO(), &cr) + Expect(err).NotTo(HaveOccurred()) + + By("Delete Node") + err = k8sClient.Delete(context.TODO(), node) + Expect(err).NotTo(HaveOccurred()) + }) + }) Context("When NicClusterPolicy CR is deleted", func() { It("should set mofed.wait to false", func() { By("Create Node") @@ -318,3 +396,12 @@ var _ = Describe("NicClusterPolicyReconciler Controller", func() { }) }) }) + +func getAppliedStateMessage(states []mellanoxv1alpha1.AppliedState, stateName string) string { + for _, state := range states { + if state.Name == stateName { + return state.Message + } + } + return "" +} diff --git a/controllers/suite_test.go b/controllers/suite_test.go index d083ef1f..2e220343 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -39,7 +39,6 @@ import ( mellanoxcomv1alpha1 "github.com/Mellanox/network-operator/api/v1alpha1" "github.com/Mellanox/network-operator/pkg/clustertype" - "github.com/Mellanox/network-operator/pkg/docadriverimages" "github.com/Mellanox/network-operator/pkg/staticconfig" // +kubebuilder:scaffold:imports ) @@ -57,6 +56,15 @@ var k8sClient client.Client var testEnv *envtest.Environment var k8sManagerCancelFn context.CancelFunc +type mockImageProvider struct { +} + +func (d *mockImageProvider) TagExists(_ string) bool { + return false +} + +func (d *mockImageProvider) SetImageSpec(*mellanoxcomv1alpha1.ImageSpec) {} + func TestAPIs(t *testing.T) { RegisterFailHandler(Fail) @@ -134,7 +142,6 @@ var _ = BeforeSuite(func() { clusterTypeProvider, err := clustertype.NewProvider(context.Background(), k8sClient) Expect(err).NotTo(HaveOccurred()) staticConfigProvider := staticconfig.NewProvider(staticconfig.StaticConfig{CniBinDirectory: "/opt/cni/bin"}) - docaImagesProvider := docadriverimages.NewProvider(context.Background(), k8sClient) err = (&NicClusterPolicyReconciler{ Client: k8sManager.GetClient(), @@ -142,7 +149,7 @@ var _ = BeforeSuite(func() { ClusterTypeProvider: clusterTypeProvider, StaticConfigProvider: staticConfigProvider, MigrationCh: migrationCompletionChan, - DocaDriverImagesProvider: docaImagesProvider, + DocaDriverImagesProvider: &mockImageProvider{}, }).SetupWithManager(k8sManager, testSetupLog) Expect(err).ToNot(HaveOccurred()) diff --git a/deployment/network-operator/crds/mellanox.com_hostdevicenetworks.yaml b/deployment/network-operator/crds/mellanox.com_hostdevicenetworks.yaml index a41863b2..e9cae8a5 100644 --- a/deployment/network-operator/crds/mellanox.com_hostdevicenetworks.yaml +++ b/deployment/network-operator/crds/mellanox.com_hostdevicenetworks.yaml @@ -65,6 +65,11 @@ spec: description: AppliedState defines a finer-grained view of the observed state of NicClusterPolicy properties: + message: + description: |- + Message is a human readable message indicating details about why + the state is in this condition + type: string name: type: string state: diff --git a/deployment/network-operator/crds/mellanox.com_nicclusterpolicies.yaml b/deployment/network-operator/crds/mellanox.com_nicclusterpolicies.yaml index 3e976db3..7f06387b 100644 --- a/deployment/network-operator/crds/mellanox.com_nicclusterpolicies.yaml +++ b/deployment/network-operator/crds/mellanox.com_nicclusterpolicies.yaml @@ -1238,6 +1238,11 @@ spec: description: AppliedState defines a finer-grained view of the observed state of NicClusterPolicy properties: + message: + description: |- + Message is a human readable message indicating details about why + the state is in this condition + type: string name: type: string state: diff --git a/pkg/state/state_ofed.go b/pkg/state/state_ofed.go index 586eb213..7458e70f 100644 --- a/pkg/state/state_ofed.go +++ b/pkg/state/state_ofed.go @@ -298,7 +298,7 @@ func (s *stateOFED) Sync(ctx context.Context, customResource interface{}, infoCa objs, err := s.GetManifestObjects(ctx, cr, infoCatalog, log.FromContext(ctx)) if err != nil { - return SyncStateNotReady, errors.Wrap(err, "failed to create k8s objects from manifest") + return SyncStateError, errors.Wrap(err, "failed to create k8s objects from manifest") } if len(objs) == 0 { // GetManifestObjects returned no objects, this means that no objects need to be applied to the cluster @@ -455,7 +455,7 @@ func renderObjects(ctx context.Context, nodePool *nodeinfo.NodePool, useDtk bool precompiledExists := docaProvider.TagExists(precompiledTag) reqLogger.V(consts.LogLevelDebug).Info("Precompiled tag", "tag:", precompiledTag, "found:", precompiledExists) if !precompiledExists && cr.Spec.OFEDDriver.ForcePrecompiled { - return nil, fmt.Errorf("ForcePrecompiled is enabled and precompiled image was not found") + return nil, fmt.Errorf("ForcePrecompiled is enabled and precompiled tag was not found: %s", precompiledTag) } if precompiledExists {