Skip to content

Commit

Permalink
Add support for CDI in k8s-rdma-shared-dev-plugin deployments
Browse files Browse the repository at this point in the history
Signed-off-by: amaslennikov <amaslennikov@nvidia.com>
  • Loading branch information
almaslennikov committed Sep 26, 2023
1 parent 578178f commit a76c373
Show file tree
Hide file tree
Showing 12 changed files with 53 additions and 18 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ spec:
periodSeconds: 30
rdmaSharedDevicePlugin:
image: k8s-rdma-shared-dev-plugin
repository: nvcr.io/nvidia/cloud-native
version: v1.3.2
repository: ghcr.io/mellanox
version: fe7f371c7e1b8315bf900f71cd25cfc1251dc775
# The config below directly propagates to k8s-rdma-shared-device-plugin configuration.
# Replace 'devices' with your (RDMA capable) netdevice name.
config: |
Expand Down Expand Up @@ -189,8 +189,8 @@ spec:
periodSeconds: 30
rdmaSharedDevicePlugin:
image: k8s-rdma-shared-dev-plugin
repository: nvcr.io/nvidia/cloud-native
version: v1.3.2
repository: ghcr.io/mellanox
version: fe7f371c7e1b8315bf900f71cd25cfc1251dc775
# The config below directly propagates to k8s-rdma-shared-device-plugin configuration.
# Replace 'devices' with your (RDMA capable) netdevice name.
config: |
Expand Down
1 change: 1 addition & 0 deletions api/v1alpha1/nicclusterpolicy_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ type DrainSpec struct {
// 2. Device plugin configuration
type DevicePluginSpec struct {
ImageSpecWithConfig `json:""`
UseCdi bool `json:"useCdi,omitempty"`
}

// MultusSpec describes configuration options for Multus CNI
Expand Down
4 changes: 4 additions & 0 deletions config/crd/bases/mellanox.com_nicclusterpolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,8 @@ spec:
repository:
pattern: '[a-zA-Z0-9\.\-\/]+'
type: string
useCdi:
type: boolean
version:
pattern: '[a-zA-Z0-9\.-]+'
type: string
Expand Down Expand Up @@ -698,6 +700,8 @@ spec:
repository:
pattern: '[a-zA-Z0-9\.\-\/]+'
type: string
useCdi:
type: boolean
version:
pattern: '[a-zA-Z0-9\.-]+'
type: string
Expand Down
4 changes: 2 additions & 2 deletions config/samples/mellanox.com_v1alpha1_nicclusterpolicy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ spec:
maxParallelUpgrades: 1
rdmaSharedDevicePlugin:
image: k8s-rdma-shared-dev-plugin
repository: nvcr.io/nvidia/cloud-native
version: v1.3.2
repository: ghcr.io/mellanox
version: fe7f371c7e1b8315bf900f71cd25cfc1251dc775
# The config below directly propagates to k8s-rdma-shared-device-plugin configuration.
# Replace 'devices' with your (RDMA capable) netdevice name.
config: |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,8 @@ spec:
repository:
pattern: '[a-zA-Z0-9\.\-\/]+'
type: string
useCdi:
type: boolean
version:
pattern: '[a-zA-Z0-9\.-]+'
type: string
Expand Down Expand Up @@ -698,6 +700,8 @@ spec:
repository:
pattern: '[a-zA-Z0-9\.\-\/]+'
type: string
useCdi:
type: boolean
version:
pattern: '[a-zA-Z0-9\.-]+'
type: string
Expand Down
4 changes: 2 additions & 2 deletions deployment/network-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ ofedDriver:
rdmaSharedDevicePlugin:
deploy: true
image: k8s-rdma-shared-dev-plugin
repository: nvcr.io/nvidia/cloud-native
version: v1.3.2
repository: ghcr.io/mellanox
version: fe7f371c7e1b8315bf900f71cd25cfc1251dc775
# imagePullSecrets: []
# The following defines the RDMA resources in the cluster
# it must be provided by the user when deploying the chart
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ spec:
periodSeconds: 30
rdmaSharedDevicePlugin:
image: k8s-rdma-shared-dev-plugin
repository: nvcr.io/nvidia/cloud-native
version: v1.3.2
repository: ghcr.io/mellanox
version: fe7f371c7e1b8315bf900f71cd25cfc1251dc775
# The config below directly propagates to k8s-rdma-shared-device-plugin configuration.
# Replace 'devices' with your (RDMA capable) netdevice name.
config: |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ spec:
periodSeconds: 30
rdmaSharedDevicePlugin:
image: k8s-rdma-shared-dev-plugin
repository: nvcr.io/nvidia/cloud-native
version: v1.3.2
repository: ghcr.io/mellanox
version: fe7f371c7e1b8315bf900f71cd25cfc1251dc775
# The config below directly propagates to k8s-rdma-shared-device-plugin configuration.
# Replace 'devices' with your (RDMA capable) netdevice name.
config: |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ spec:
maxParallelUpgrades: 1
rdmaSharedDevicePlugin:
image: k8s-rdma-shared-dev-plugin
repository: nvcr.io/nvidia/cloud-native
version: v1.3.2
repository: ghcr.io/mellanox
version: fe7f371c7e1b8315bf900f71cd25cfc1251dc775
# The config below directly propagates to k8s-rdma-shared-device-plugin configuration.
# Replace 'devices' with your (RDMA capable) netdevice name.
config: |
Expand Down
4 changes: 2 additions & 2 deletions example/crs/mellanox.com_v1alpha1_nicclusterpolicy_cr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ spec:
periodSeconds: 30
rdmaSharedDevicePlugin:
image: k8s-rdma-shared-dev-plugin
repository: nvcr.io/nvidia/cloud-native
version: v1.3.2
repository: ghcr.io/mellanox
version: fe7f371c7e1b8315bf900f71cd25cfc1251dc775
# The config below directly propagates to k8s-rdma-shared-device-plugin configuration.
# Replace 'devices' with your (RDMA capable) netdevice name.
config: |
Expand Down
4 changes: 2 additions & 2 deletions hack/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ Mofed:
version: 23.07-0.4.5.0
RdmaSharedDevicePlugin:
image: k8s-rdma-shared-dev-plugin
repository: nvcr.io/nvidia/cloud-native
version: v1.3.2
repository: ghcr.io/mellanox
version: fe7f371c7e1b8315bf900f71cd25cfc1251dc775
SriovDevicePlugin:
image: sriov-network-device-plugin
repository: ghcr.io/k8snetworkplumbingwg
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ spec:
containers:
- image: {{ .CrSpec.Repository }}/{{ .CrSpec.Image }}:{{ .CrSpec.Version }}
name: rdma-shared-dp
command: [ "/bin/k8s-rdma-shared-dp" ]
{{- if .CrSpec.UseCdi }}
args: [ "--use-cdi" ]
{{- end }}
imagePullPolicy: IfNotPresent
securityContext:
privileged: true
Expand All @@ -64,6 +68,14 @@ spec:
mountPath: /k8s-rdma-shared-dev-plugin
- name: devs
mountPath: /dev/
{{- if .CrSpec.UseCdi }}
- name: default-cdi
mountPath: /etc/cdi/
- name: dynamic-cdi
mountPath: /var/run/cdi
- name: host-config-volume
mountPath: /host/etc/pcidp/
{{- end }}
volumes:
- name: device-plugin
hostPath:
Expand All @@ -77,6 +89,20 @@ spec:
- name: devs
hostPath:
path: /dev/
{{- if .CrSpec.UseCdi }}
- name: default-cdi
hostPath:
path: /etc/cdi
type: DirectoryOrCreate
- name: dynamic-cdi
hostPath:
path: /var/run/cdi
type: DirectoryOrCreate
- name: host-config-volume
hostPath:
path: /etc/pcidp
type: DirectoryOrCreate
{{- end }}
nodeSelector:
feature.node.kubernetes.io/pci-15b3.present: "true"
network.nvidia.com/operator.mofed.wait: "false"
Expand Down

0 comments on commit a76c373

Please sign in to comment.