Skip to content

Commit

Permalink
feat: Support ForcePrecompiled flag
Browse files Browse the repository at this point in the history
Mofed images support some OS/Kernel with precompiled
drivers.

In case that a Precompiled image is available in the image registry,
it will be used for MOFED, otherwise the image with sources will be
used.

The user can specify that in case the precompiled image does not exists,
the Ofed State will fail.
This can be done with setting ForcePrecompiled to "true" in "ofedDriver"
spec in NicClusterPolicy.

The default for ForcePrecompiled is false.

Signed-off-by: Fred Rolland <frolland@nvidia.com>
  • Loading branch information
rollandf committed Mar 3, 2024
1 parent b635fad commit f23b5a2
Show file tree
Hide file tree
Showing 19 changed files with 450 additions and 33 deletions.
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ COPY --from=builder /workspace/manager .
COPY --from=builder /workspace/kubectl-${ARCH} /usr/local/bin/kubectl
COPY --from=builder /workspace/crds /crds

# Default Certificates are missing in micro-ubi. These are need to fetch DOCA drivers image tags
COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem
COPY /webhook-schemas /webhook-schemas
COPY manifests/ manifests/
USER 65532:65532
Expand Down
6 changes: 6 additions & 0 deletions api/v1alpha1/nicclusterpolicy_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,12 @@ type OFEDDriverSpec struct {
// +kubebuilder:default:=300
// +kubebuilder:validation:Minimum:=0
TerminationGracePeriodSeconds int64 `json:"terminationGracePeriodSeconds,omitempty"`
// ForcePrecompiled specifies if only MOFED precompiled images are allowed
// If set to false and precompiled image does not exists, MOFED drivers will be compiled on Nodes
// If set to true and precompiled image does not exists, OFED state will be Error.
// +optional
// +kubebuilder:default:=false
ForcePrecompiled bool `json:"forcePrecompiled,omitempty"`
}

// DriverUpgradePolicySpec describes policy configuration for automatic upgrades
Expand Down
7 changes: 7 additions & 0 deletions config/crd/bases/mellanox.com_nicclusterpolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,13 @@ spec:
- name
type: object
type: array
forcePrecompiled:
default: false
description: |-
ForcePrecompiled specifies if only MOFED precompiled images are allowed
If set to false and precompiled image does not exists, MOFED drivers will be compiled on Nodes
If set to true and precompiled image does not exists, OFED state will be Error.
type: boolean
image:
pattern: '[a-zA-Z0-9\-]+'
type: string
Expand Down
15 changes: 11 additions & 4 deletions controllers/nicclusterpolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
"github.com/Mellanox/network-operator/pkg/clustertype"
"github.com/Mellanox/network-operator/pkg/config"
"github.com/Mellanox/network-operator/pkg/consts"
"github.com/Mellanox/network-operator/pkg/docadriverimages"
"github.com/Mellanox/network-operator/pkg/nodeinfo"
"github.com/Mellanox/network-operator/pkg/state"
"github.com/Mellanox/network-operator/pkg/staticconfig"
Expand All @@ -49,10 +50,11 @@ import (
// NicClusterPolicyReconciler reconciles a NicClusterPolicy object
type NicClusterPolicyReconciler struct {
client.Client
Scheme *runtime.Scheme
ClusterTypeProvider clustertype.Provider
StaticConfigProvider staticconfig.Provider
MigrationCh chan struct{}
Scheme *runtime.Scheme
ClusterTypeProvider clustertype.Provider
StaticConfigProvider staticconfig.Provider
MigrationCh chan struct{}
DocaDriverImagesProvider docadriverimages.Provider

stateManager state.Manager
}
Expand Down Expand Up @@ -128,6 +130,7 @@ func (r *NicClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Req
sc := state.NewInfoCatalog()
sc.Add(state.InfoTypeClusterType, r.ClusterTypeProvider)
sc.Add(state.InfoTypeStaticConfig, r.StaticConfigProvider)

if instance.Spec.OFEDDriver != nil {
// Create node infoProvider and add to the service catalog
reqLogger.V(consts.LogLevelInfo).Info("Creating Node info provider")
Expand All @@ -147,6 +150,10 @@ func (r *NicClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Req
reqLogger.V(consts.LogLevelDebug).Info("Node info provider with", "Nodes:", nodeNames)
infoProvider := nodeinfo.NewProvider(nodePtrList)
sc.Add(state.InfoTypeNodeInfo, infoProvider)
r.DocaDriverImagesProvider.SetImageSpec(&instance.Spec.OFEDDriver.ImageSpec)
sc.Add(state.InfoTypeDocaDriverImage, r.DocaDriverImagesProvider)
} else {
r.DocaDriverImagesProvider.SetImageSpec(nil)
}
// Sync state and update status
managerStatus := r.stateManager.SyncState(ctx, instance, sc)
Expand Down
13 changes: 8 additions & 5 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import (

mellanoxcomv1alpha1 "github.com/Mellanox/network-operator/api/v1alpha1"
"github.com/Mellanox/network-operator/pkg/clustertype"
"github.com/Mellanox/network-operator/pkg/docadriverimages"
"github.com/Mellanox/network-operator/pkg/staticconfig"
// +kubebuilder:scaffold:imports
)
Expand Down Expand Up @@ -130,13 +131,15 @@ var _ = BeforeSuite(func() {

migrationCompletionChan := make(chan struct{})
close(migrationCompletionChan)
docaImagesProvider := docadriverimages.NewProvider(context.Background(), k8sClient)

err = (&NicClusterPolicyReconciler{
Client: k8sManager.GetClient(),
Scheme: k8sManager.GetScheme(),
ClusterTypeProvider: clusterTypeProvider,
StaticConfigProvider: staticConfigProvider,
MigrationCh: migrationCompletionChan,
Client: k8sManager.GetClient(),
Scheme: k8sManager.GetScheme(),
ClusterTypeProvider: clusterTypeProvider,
StaticConfigProvider: staticConfigProvider,
MigrationCh: migrationCompletionChan,
DocaDriverImagesProvider: docaImagesProvider,
}).SetupWithManager(k8sManager, testSetupLog)
Expect(err).ToNot(HaveOccurred())

Expand Down
3 changes: 2 additions & 1 deletion deployment/network-operator/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ containerResources:
#### Mellanox OFED driver

| Name | Type | Default | Description |
|-------------------------------------------------------------|--------|-----------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| ----------------------------------------------------------- | ------ | --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `ofedDriver.deploy` | bool | `false` | deploy Mellanox OFED driver container |
| `ofedDriver.repository` | string | `mellanox` | Mellanox OFED driver image repository |
| `ofedDriver.image` | string | `mofed` | Mellanox OFED driver image name |
Expand Down Expand Up @@ -448,6 +448,7 @@ containerResources:
| `ofedDriver.upgradePolicy.waitForCompletion.podSelector` | string | not set | specifies a label selector for the pods to wait for completion before starting the driver upgrade |
| `ofedDriver.upgradePolicy.waitForCompletion.timeoutSeconds` | int | not set | specify the length of time in seconds to wait before giving up for workload to finish, zero means infinite |
| `ofedDriver.containerResources` | [] | not set | Optional [resource requests and limits](#container-resources) for the `mofed-container` container |
| `ofedDriver.forcePrecompiled` | bool | `false` | Fail Mellanox OFED deployment if precompiled OFED driver container image does not exists |

#### RDMA Device Plugin

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,13 @@ spec:
- name
type: object
type: array
forcePrecompiled:
default: false
description: |-
ForcePrecompiled specifies if only MOFED precompiled images are allowed
If set to false and precompiled image does not exists, MOFED drivers will be compiled on Nodes
If set to true and precompiled image does not exists, OFED state will be Error.
type: boolean
image:
pattern: '[a-zA-Z0-9\-]+'
type: string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ spec:
image: {{ .Values.ofedDriver.image }}
repository: {{ .Values.ofedDriver.repository }}
version: {{ .Values.ofedDriver.version }}
forcePrecompiled: {{ .Values.ofedDriver.forcePrecompiled }}
{{- if .Values.ofedDriver.env }}
env:
{{ toYaml .Values.ofedDriver.env | nindent 6 }}
Expand Down
1 change: 1 addition & 0 deletions deployment/network-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ ofedDriver:
# podSelector: "app=myapp"
# specify the length of time in seconds to wait before giving up for workload to finish, zero means infinite
# timeoutSeconds: 300
forcePrecompiled: false

rdmaSharedDevicePlugin:
deploy: true
Expand Down
12 changes: 12 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ require (
github.com/caarlos0/env/v6 v6.10.1
github.com/containers/image/v5 v5.29.2
github.com/go-logr/logr v1.4.1
github.com/google/go-containerregistry v0.17.0
github.com/google/go-containerregistry/pkg/authn/kubernetes v0.0.0-20231129213221-4fdaa32ee934
github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.6.0
github.com/onsi/ginkgo/v2 v2.15.0
github.com/onsi/gomega v1.31.1
Expand All @@ -30,8 +32,13 @@ require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/chai2010/gettext-go v1.0.2 // indirect
github.com/containerd/stargz-snapshotter/estargz v0.15.1 // indirect
github.com/containers/storage v1.51.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/docker/cli v24.0.7+incompatible // indirect
github.com/docker/distribution v2.8.3+incompatible // indirect
github.com/docker/docker v24.0.7+incompatible // indirect
github.com/docker/docker-credential-helpers v0.8.0 // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/evanphx/json-patch v5.7.0+incompatible // indirect
github.com/evanphx/json-patch/v5 v5.8.0 // indirect
Expand Down Expand Up @@ -59,9 +66,11 @@ require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.17.3 // indirect
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/mitchellh/go-wordwrap v1.0.1 // indirect
github.com/moby/spdystream v0.2.0 // indirect
github.com/moby/term v0.5.0 // indirect
Expand All @@ -71,16 +80,19 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.0-rc5 // indirect
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_golang v1.18.0 // indirect
github.com/prometheus/client_model v0.5.0 // indirect
github.com/prometheus/common v0.45.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/spf13/cobra v1.8.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/objx v0.5.1 // indirect
github.com/vbatts/tar-split v0.11.5 // indirect
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
github.com/xlab/treeprint v1.2.0 // indirect
Expand Down
Loading

0 comments on commit f23b5a2

Please sign in to comment.