Skip to content

Commit

Permalink
Merge branch 'master' into nv-ipam-crd
Browse files Browse the repository at this point in the history
  • Loading branch information
ykulazhenkov committed Sep 26, 2023
2 parents cd81508 + df0abeb commit 6216af9
Show file tree
Hide file tree
Showing 20 changed files with 557 additions and 52 deletions.
24 changes: 15 additions & 9 deletions api/v1alpha1/nicclusterpolicy_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,20 +211,26 @@ type NVIPAMSpec struct {
ImageSpecWithConfig `json:""`
}

// NicFeatureDiscoverySpec describes configuration options for nic-feature-discovery
type NICFeatureDiscoverySpec struct {
ImageSpec `json:""`
}

// NicClusterPolicySpec defines the desired state of NicClusterPolicy
type NicClusterPolicySpec struct {
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
// Important: Run "make" to regenerate code after modifying this file

NodeAffinity *v1.NodeAffinity `json:"nodeAffinity,omitempty"`
Tolerations []v1.Toleration `json:"tolerations,omitempty"`
OFEDDriver *OFEDDriverSpec `json:"ofedDriver,omitempty"`
RdmaSharedDevicePlugin *DevicePluginSpec `json:"rdmaSharedDevicePlugin,omitempty"`
SriovDevicePlugin *DevicePluginSpec `json:"sriovDevicePlugin,omitempty"`
IBKubernetes *IBKubernetesSpec `json:"ibKubernetes,omitempty"`
SecondaryNetwork *SecondaryNetworkSpec `json:"secondaryNetwork,omitempty"`
NvIpam *NVIPAMSpec `json:"nvIpam,omitempty"`
PSP *PSPSpec `json:"psp,omitempty"`
NodeAffinity *v1.NodeAffinity `json:"nodeAffinity,omitempty"`
Tolerations []v1.Toleration `json:"tolerations,omitempty"`
OFEDDriver *OFEDDriverSpec `json:"ofedDriver,omitempty"`
RdmaSharedDevicePlugin *DevicePluginSpec `json:"rdmaSharedDevicePlugin,omitempty"`
SriovDevicePlugin *DevicePluginSpec `json:"sriovDevicePlugin,omitempty"`
IBKubernetes *IBKubernetesSpec `json:"ibKubernetes,omitempty"`
SecondaryNetwork *SecondaryNetworkSpec `json:"secondaryNetwork,omitempty"`
NvIpam *NVIPAMSpec `json:"nvIpam,omitempty"`
PSP *PSPSpec `json:"psp,omitempty"`
NicFeatureDiscovery *NICFeatureDiscoverySpec `json:"nicFeatureDiscovery,omitempty"`
}

// AppliedState defines a finer-grained view of the observed state of NicClusterPolicy
Expand Down
21 changes: 21 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions config/crd/bases/mellanox.com_nicclusterpolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,28 @@ spec:
- repository
- version
type: object
nicFeatureDiscovery:
description: NicFeatureDiscoverySpec describes configuration options
for nic-feature-discovery
properties:
image:
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullSecrets:
items:
type: string
type: array
repository:
pattern: '[a-zA-Z0-9\.\-\/]+'
type: string
version:
pattern: '[a-zA-Z0-9\.-]+'
type: string
required:
- image
- repository
- version
type: object
nodeAffinity:
description: Node affinity is a group of node affinity scheduling
rules.
Expand Down
91 changes: 62 additions & 29 deletions controllers/nicclusterpolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func (r *NicClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Req
// Request object not found, could have been deleted after reconcile request.
// Owned objects are automatically garbage collected. For additional cleanup logic use finalizers.
// Return and don't requeue
err := r.clearMofedWaitLabel(ctx)
err := r.handleMOFEDWaitLabelsNoConfig(ctx)
if err != nil {
reqLogger.V(consts.LogLevelError).Error(err, "Fail to clear Mofed label on CR deletion.")
return reconcile.Result{}, err
Expand All @@ -111,7 +111,7 @@ func (r *NicClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Req
// Create a new State service catalog
sc := state.NewInfoCatalog()
if instance.Spec.OFEDDriver != nil || instance.Spec.RdmaSharedDevicePlugin != nil ||
instance.Spec.SriovDevicePlugin != nil {
instance.Spec.SriovDevicePlugin != nil || instance.Spec.NicFeatureDiscovery != nil {
// Create node infoProvider and add to the service catalog
reqLogger.V(consts.LogLevelInfo).Info("Creating Node info provider")
nodeList := &corev1.NodeList{}
Expand All @@ -135,7 +135,7 @@ func (r *NicClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Req
managerStatus := r.stateManager.SyncState(ctx, instance, sc)
r.updateCrStatus(ctx, instance, managerStatus)

err = r.updateNodeLabels(ctx, instance)
err = r.handleMOFEDWaitLabels(ctx, instance)
if err != nil {
return reconcile.Result{}, err
}
Expand All @@ -149,9 +149,9 @@ func (r *NicClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Req
return ctrl.Result{}, nil
}

// updateNodeLabels updates nodes labels to mark device plugins should wait for OFED pod
// handleMOFEDWaitLabels updates nodes labels to mark device plugins should wait for OFED pod
// Set nvidia.com/ofed.wait=false if OFED is not deployed.
func (r *NicClusterPolicyReconciler) updateNodeLabels(
func (r *NicClusterPolicyReconciler) handleMOFEDWaitLabels(
ctx context.Context, cr *mellanoxv1alpha1.NicClusterPolicy) error {
if cr.Spec.OFEDDriver != nil {
pods := &corev1.PodList{}
Expand All @@ -165,47 +165,80 @@ func (r *NicClusterPolicyReconciler) updateNodeLabels(
if len(pod.Status.ContainerStatuses) != 0 && pod.Status.ContainerStatuses[0].Ready {
labelValue = "false"
}
patch := []byte(fmt.Sprintf(`{"metadata":{"labels":{%q:%q}}}`, nodeinfo.NodeLabelWaitOFED, labelValue))
err := r.Client.Patch(ctx, &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: pod.Spec.NodeName,
},
}, client.RawPatch(types.StrategicMergePatchType, patch))

if err != nil {
return errors.Wrapf(err, "unable to patch %s label for node %s", nodeinfo.NodeLabelWaitOFED,
pod.Spec.NodeName)
if err := setOFEDWaitLabel(ctx, r.Client, pod.Spec.NodeName, labelValue); err != nil {
return err
}
}
} else {
return r.clearMofedWaitLabel(ctx)
return r.handleMOFEDWaitLabelsNoConfig(ctx)
}

return nil
}

// clearMofedWaitLabel set "network.nvidia.com/operator.mofed.wait" to false
// on Nodes with Mellanox NICs
func (r *NicClusterPolicyReconciler) clearMofedWaitLabel(ctx context.Context) error {
// We deploy OFED and Device plugins only on a nodes with Mellanox NICs
// handleMOFEDWaitLabelsNoConfig handles MOFED wait label for scenarios when OFED is
// not configured in NicClusterPolicy, do the following:
// - set "network.nvidia.com/operator.mofed.wait" to false on Nodes with NVIDIA NICs
// - remove "network.nvidia.com/operator.mofed.wait" which have no NVIDIA NICs anymore
// - set "network.nvidia.com/operator.mofed.wait" to true if detects OFED Pod
// on the node (probably in the terminating state)
func (r *NicClusterPolicyReconciler) handleMOFEDWaitLabelsNoConfig(ctx context.Context) error {
nodesWithOFEDContainer := map[string]struct{}{}
pods := &corev1.PodList{}
if err := r.Client.List(ctx, pods, client.MatchingLabels{consts.OfedDriverLabel: ""}); err != nil {
return errors.Wrap(err, "failed to list pods")
}
for i := range pods.Items {
pod := pods.Items[i]
if pod.Spec.NodeName != "" {
nodesWithOFEDContainer[pod.Spec.NodeName] = struct{}{}
}
}
nodes := &corev1.NodeList{}

err := r.Client.List(ctx, nodes, client.MatchingLabels{nodeinfo.NodeLabelMlnxNIC: "true"})
if err != nil {
if err := r.Client.List(ctx, nodes); err != nil {
return errors.Wrap(err, "failed to list nodes")
}

for i := range nodes.Items {
patch := []byte(fmt.Sprintf(`{"metadata":{"labels":{%q:"false"}}}`, nodeinfo.NodeLabelWaitOFED))
err := r.Client.Patch(ctx, &nodes.Items[i], client.RawPatch(types.StrategicMergePatchType, patch))
if err != nil {
return errors.Wrapf(err, "unable to patch %s node label for node %s",
nodeinfo.NodeLabelWaitOFED, nodes.Items[i].Name)
node := nodes.Items[i]
labelValue := ""
if _, hasOFED := nodesWithOFEDContainer[node.Name]; hasOFED {
labelValue = "true"
} else if node.GetLabels()[nodeinfo.NodeLabelMlnxNIC] == "true" {
labelValue = "false"
}
if node.GetLabels()[nodeinfo.NodeLabelWaitOFED] == labelValue {
// already has the right value
continue
}
if err := setOFEDWaitLabel(ctx, r.Client, node.Name, labelValue); err != nil {
return err
}
}
return nil
}

// set the value for the OFED wait label, remove the label if the value is ""
func setOFEDWaitLabel(ctx context.Context, c client.Client, node, value string) error {
var patch []byte
if value == "" {
patch = []byte(fmt.Sprintf(`{"metadata":{"labels":{%q: null}}}`, nodeinfo.NodeLabelWaitOFED))
} else {
patch = []byte(fmt.Sprintf(`{"metadata":{"labels":{%q: %q}}}`, nodeinfo.NodeLabelWaitOFED, value))
}

err := c.Patch(ctx, &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: node,
},
}, client.RawPatch(types.StrategicMergePatchType, patch))

if err != nil {
return errors.Wrapf(err, "unable to patch %s label for node %s", nodeinfo.NodeLabelWaitOFED,
node)
}
return nil
}

//nolint:dupl
func (r *NicClusterPolicyReconciler) updateCrStatus(
ctx context.Context, cr *mellanoxv1alpha1.NicClusterPolicy, status state.Results) {
Expand Down
35 changes: 24 additions & 11 deletions deployment/network-operator/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ parameters.

### General parameters

| Name | Type | Default | description |
| Name | Type | Default | Description |
|------------------------------------------------------|--------|------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `nfd.enabled` | bool | `True` | deploy Node Feature Discovery |
| `nfd.deployNodeFeatureRules` | bool | `True` | deploy Node Feature Rules to label the nodes |
Expand Down Expand Up @@ -373,7 +373,7 @@ imagePullSecrets:

#### Mellanox OFED driver

| Name | Type | Default | description |
| Name | Type | Default | Description |
| ---- | ---- | ------- |---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `ofedDriver.deploy` | bool | `false` | deploy Mellanox OFED driver container |
| `ofedDriver.repository` | string | `mellanox` | Mellanox OFED driver image repository |
Expand All @@ -393,7 +393,7 @@ imagePullSecrets:

#### RDMA Device Plugin

| Name | Type | Default | description |
| Name | Type | Default | Description |
| ---- | ---- | ------- | ----------- |
| `rdmaSharedDevicePlugin.deploy` | bool | `true` | Deploy RDMA Shared device plugin |
| `rdmaSharedDevicePlugin.repository` | string | `nvcr.io/nvidia/cloud-native` | RDMA Shared device plugin image repository |
Expand Down Expand Up @@ -425,7 +425,7 @@ resources:

#### SR-IOV Network Device plugin

| Name | Type | Default | description |
| Name | Type | Default | Description |
| ---- | ---- | ------- | ----------- |
| `sriovDevicePlugin.deploy` | bool | `false` | Deploy SR-IOV Network device plugin |
| `sriovDevicePlugin.repository` | string | `ghcr.io/k8snetworkplumbingwg` | SR-IOV Network device plugin image repository |
Expand Down Expand Up @@ -464,7 +464,7 @@ Create/Update/Delete), reading the Pod's network annotation and fetching its cor
PKey, to add the newly generated Guid or the predefined Guid in guid field of CRD cni-args to that PKey, for pods with
annotation mellanox.infiniband.app.

| Name | Type | Default | description |
| Name | Type | Default | Description |
|---------------------------------------|--------|---------------------------|---------------------------------------------------------------------------------------------|
| `ibKubernetes.deploy` | bool | `false` | Deploy IB Kubernetes |
| `ibKubernetes.repository` | string | `ghcr.io/mellanox` | IB Kubernetes image repository |
Expand Down Expand Up @@ -501,7 +501,7 @@ data:
#### Secondary Network

| Name | Type | Default | description |
| Name | Type | Default | Description |
| ---- | ---- | ------- | ----------- |
| `secondaryNetwork.deploy` | bool | `true` | Deploy Secondary Network |

Expand All @@ -515,7 +515,7 @@ optionally deployed components:

##### CNI Plugin Secondary Network

| Name | Type | Default | description |
| Name | Type | Default | Description |
| ---- | ---- | ------- | ----------- |
| `cniPlugins.deploy` | bool | `true` | Deploy CNI Plugins Secondary Network |
| `cniPlugins.image` | string | `plugins` | CNI Plugins image name |
Expand All @@ -525,7 +525,7 @@ optionally deployed components:

##### Multus CNI Secondary Network

| Name | Type | Default | description |
| Name | Type | Default | Description |
| ---- | ---- | ------- | ----------- |
| `multus.deploy` | bool | `true` | Deploy Multus Secondary Network |
| `multus.image` | string | `multus-cni` | Multus image name |
Expand All @@ -536,7 +536,7 @@ optionally deployed components:

##### IPoIB CNI

| Name | Type | Default | description |
| Name | Type | Default | Description |
| ---- | ---- | ------- | ----------- |
| `ipoib.deploy` | bool | `false` | Deploy IPoIB CNI |
| `ipoib.image` | string | `ipoib-cni` | IPoIB CNI image name |
Expand All @@ -546,7 +546,7 @@ optionally deployed components:

##### IPAM CNI Plugin Secondary Network

| Name | Type | Default | description |
| Name | Type | Default | Description |
| ----------------------------- | ------ |--------------------------------| ----------- |
| `ipamPlugin.deploy` | bool | `true` | Deploy IPAM CNI Plugin Secondary Network |
| `ipamPlugin.image` | string | `whereabouts` | IPAM CNI Plugin image name |
Expand All @@ -556,7 +556,7 @@ optionally deployed components:

#### NVIDIA IPAM Plugin

| Name | Type | Default | description |
| Name | Type | Default | Description |
| ------------------------- | ------ |--------------------| -----------------------------------------------------------------------------------------|
| `nvIpam.deploy` | bool | `false` | Deploy NVIDIA IPAM Plugin |
| `nvIpam.image` | string | `nvidia-k8s-ipam` | NVIDIA IPAM Plugin image name |
Expand All @@ -565,6 +565,19 @@ optionally deployed components:
| `nvIpam.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the Plugin image |
| `nvIpam.config` | string | Deprecated | This field is ignored. Configuration is done by using IPPool CRD |

#### NVIDIA NIC Feature Discovery

[NVIDIA NIC Feature Discovery](https://github.com/Mellanox/nic-feature-discovery)
leverages [Node Feature Discovery](https://kubernetes-sigs.github.io/node-feature-discovery/stable/get-started/index.html)
to advertise NIC specific labels on K8s Node objects.

| Name | Type | Default | Description |
| -------------------------------- | ------ | ----------------------- | --------------------------------------------- |
| `nicFeatureDiscovery.deploy` | bool | `false` | Deploy NVIDIA NIC Feature Discovery |
| `nicFeatureDiscovery.image` | string | `nic-feature-discovery` | NVIDIA NIC Feature Discovery image name |
| `nicFeatureDiscovery.repository` | string | `ghcr.io/mellanox` | NVIDIA NIC Feature Discovery image repository |
| `nicFeatureDiscovery.version` | string | `v0.0.1` | NVIDIA NIC Feature Discovery image version |

## Deployment Examples

As there are several parameters that are required to be provided to create the custom resource during operator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,28 @@ spec:
- repository
- version
type: object
nicFeatureDiscovery:
description: NicFeatureDiscoverySpec describes configuration options
for nic-feature-discovery
properties:
image:
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullSecrets:
items:
type: string
type: array
repository:
pattern: '[a-zA-Z0-9\.\-\/]+'
type: string
version:
pattern: '[a-zA-Z0-9\.-]+'
type: string
required:
- image
- repository
- version
type: object
nodeAffinity:
description: Node affinity is a group of node affinity scheduling
rules.
Expand Down
Loading

0 comments on commit 6216af9

Please sign in to comment.